File Coverage

File:Dpkg/Control/HashCore.pm
Coverage:73.2%

linestmtbrancondsubpodtimecode
1# Copyright © 2007-2009 Raphaël Hertzog <hertzog@debian.org>
2# Copyright © 2009, 2012-2019, 2021 Guillem Jover <guillem@debian.org>
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17=encoding utf8
18
19 - 43
=head1 NAME

Dpkg::Control::HashCore - parse and manipulate a stanza of deb822 fields

=head1 DESCRIPTION

The L<Dpkg::Control::Hash> class is a hash-like representation of a set of
RFC822-like fields. The fields names are case insensitive and are always
capitalized the same when output (see field_capitalize() function in
L<Dpkg::Control::Fields>).
The order in which fields have been set is remembered and is used
to be able to dump back the same content. The output order can also be
overridden if needed.

You can store arbitrary values in the hash, they will always be properly
escaped in the output to conform to the syntax of control files. This is
relevant mainly for multilines values: while the first line is always output
unchanged directly after the field name, supplementary lines are
modified. Empty lines and lines containing only dots are prefixed with
" ." (space + dot) while other lines are prefixed with a single space.

During parsing, trailing spaces are stripped on all lines while leading
spaces are stripped only on the first line of each field.

=cut
44
45package Dpkg::Control::HashCore 1.02;
46
47
138
138
138
330
98
1884
use strict;
48
138
138
138
222
110
2807
use warnings;
49
50
138
138
138
227
118
4266
use Dpkg::Gettext;
51
138
138
138
457
100
4571
use Dpkg::ErrorHandling;
52
138
138
138
18081
154
8084
use Dpkg::Control::FieldsCore;
53
138
138
138
25219
173
3030
use Dpkg::Control::HashCore::Tie;
54
55# This module cannot use Dpkg::Control::Fields, because that one makes use
56# of Dpkg::Vendor which at the same time uses this module, which would turn
57# into a compilation error. We can use Dpkg::Control::FieldsCore instead.
58
59
138
138
138
377
155
285
use parent qw(Dpkg::Interface::Storable);
60
61use overload
62
42186
42186
27690
69524
    '%{}' => sub { ${$_[0]}->{fields} },
63
138
138
138
12
7165
116
710
32
    'eq' => sub { "$_[0]" eq "$_[1]" };
64
65 - 114
=head1 METHODS

=over 4

=item $c = Dpkg::Control::Hash->new(%opts)

Creates a new object with the indicated options. Supported options
are:

=over 8

=item allow_pgp

Configures the parser to accept OpenPGP signatures around the control
information. Value can be 0 (default) or 1.

=item allow_duplicate

Configures the parser to allow duplicate fields in the control
information.
The last value overrides any previous values.
Value can be 0 (default) or 1.

=item keep_duplicate

Configure the parser to keep values for duplicate fields found in the control
information (when B<allow_duplicate> is enabled), as array references.
Value can be 0 (default) or 1.

=item drop_empty

Defines if empty fields are dropped during the output. Value can be 0
(default) or 1.

=item name

The user friendly name of the information stored in the object. It might
be used in some error messages or warnings. A default name might be set
depending on the type.

=item is_pgp_signed

Set by the parser (starting in dpkg 1.17.0) if it finds an OpenPGP
signature around the control information. Value can be 0 (default)
or 1, and undef when the option is not supported by the code (in
versions older than dpkg 1.17.0).

=back

=cut
115
116sub new {
117
2676
1
2464
    my ($this, %opts) = @_;
118
2676
4915
    my $class = ref($this) || $this;
119
120    # Object is a scalar reference and not a hash ref to avoid
121    # infinite recursion due to overloading hash-dereferencing
122
2676
8571
    my $self = \{
123        in_order => [],
124        out_order => [],
125        is_pgp_signed => 0,
126        allow_pgp => 0,
127        allow_duplicate => 0,
128        keep_duplicate => 0,
129        drop_empty => 0,
130    };
131
2676
2900
    bless $self, $class;
132
133
2676
4208
    $$self->{fields} = Dpkg::Control::HashCore::Tie->new($self);
134
135    # Options set by the user override default values
136
2676
3184
    $$self->{$_} = $opts{$_} foreach keys %opts;
137
138
2676
2953
    return $self;
139}
140
141# There is naturally a circular reference between the tied hash and its
142# containing object. Happily, the extra layer of scalar reference can
143# be used to detect the destruction of the object and break the loop so
144# that everything gets garbage-collected.
145
146sub DESTROY {
147
2451
22651
    my $self = shift;
148
2451
15852
    delete $$self->{fields};
149}
150
151 - 155
=item $c->set_options($option, %opts)

Changes the value of one or more options.

=cut
156
157sub set_options {
158
0
1
0
    my ($self, %opts) = @_;
159
0
0
    $$self->{$_} = $opts{$_} foreach keys %opts;
160}
161
162 - 166
=item $value = $c->get_option($option)

Returns the value of the corresponding option.

=cut
167
168sub get_option {
169
0
1
0
    my ($self, $k) = @_;
170
0
0
    return $$self->{$k};
171}
172
173 - 177
=item $c->parse_error($file, $fmt, ...)

Prints an error message and dies on syntax parse errors.

=cut
178
179sub parse_error {
180
21
1
27
    my ($self, $file, $msg, @args) = @_;
181
182
21
33
    $msg = sprintf $msg, @args if @args;
183
21
21
    error(g_('syntax error in %s at line %d: %s'), $file, $., $msg);
184}
185
186 - 195
=item $c->parse($fh, $description)

Parse a control file from the given filehandle. Exits in case of errors.
$description is used to describe the filehandle, ideally it's a filename
or a description of where the data comes from. It's used in error
messages. When called multiple times, the parsed fields are accumulated.

Returns true if some fields have been parsed.

=cut
196
197sub parse {
198
207
1
270
    my ($self, $fh, $desc) = @_;
199
200
207
193
    my $paraborder = 1;
201
207
162
    my $parabody = 0;
202
207
134
    my $cf; # Current field
203
207
145
    my $expect_pgp_sig = 0;
204
207
153
    local $_;
205
206
207
756
    while (<$fh>) {
207        # In the common case there will be just a trailing \n character,
208        # so using chomp here which is very fast will avoid the latter
209        # s/// doing anything, which gives us a significant speed up.
210
561
490
        chomp;
211
561
389
        my $armor = $_;
212
561
1039
        s/\s+$//;
213
214
561
736
        next if length == 0 and $paraborder;
215
216
561
512
        my $lead = substr $_, 0, 1;
217
561
494
        next if $lead eq '#';
218
552
344
        $paraborder = 0;
219
220
552
1195
        my ($name, $value) = split /\s*:\s*/, $_, 2;
221
552
1952
        if (defined $name and $name =~ m/^\S+?$/) {
222
411
246
            $parabody = 1;
223
411
371
            if ($lead eq '-') {
224
0
0
                $self->parse_error($desc, g_('field cannot start with a hyphen'));
225            }
226
411
644
            if (exists $self->{$name}) {
227
0
0
                unless ($$self->{allow_duplicate}) {
228
0
0
                    $self->parse_error($desc, g_('duplicate field %s found'), $name);
229                }
230
0
0
                if ($$self->{keep_duplicate}) {
231
0
0
                    if (ref $self->{$name} ne 'ARRAY') {
232                        # Switch value into an array.
233
0
0
                        $self->{$name} = [ $self->{$name}, $value ];
234                    } else {
235                        # Append the value.
236
0
0
0
0
                        push @{$self->{$name}}, $value;
237                    }
238                } else {
239                    # Overwrite with last value.
240
0
0
                    $self->{$name} = $value;
241                }
242            } else {
243
411
313
                $self->{$name} = $value;
244            }
245
411
914
            $cf = $name;
246        } elsif (m/^\s(\s*\S.*)$/) {
247
48
68
            my $line = $1;
248
48
51
            unless (defined($cf)) {
249
0
0
                $self->parse_error($desc, g_('continued value line not in field'));
250            }
251
48
57
            if ($line =~ /^\.+$/) {
252
9
15
                $line = substr $line, 1;
253            }
254
48
47
            $self->{$cf} .= "\n$line";
255        } elsif (length == 0 ||
256                 ($expect_pgp_sig && $armor =~ m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/)) {
257
66
91
            if ($expect_pgp_sig) {
258                # Skip empty lines
259
9
28
                $_ = <$fh> while defined && m/^\s*$/;
260
9
8
                unless (length) {
261
0
0
                    $self->parse_error($desc, g_('expected OpenPGP signature, ' .
262                                                 'found end of file after blank line'));
263                }
264
9
7
                chomp;
265
9
12
                unless (m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/) {
266
0
0
                    $self->parse_error($desc, g_('expected OpenPGP signature, ' .
267                                                 "found something else '%s'"), $_);
268                }
269                # Skip OpenPGP signature
270
9
8
                while (<$fh>) {
271
48
24
                    chomp;
272
48
50
                    last if m/^-----END PGP SIGNATURE-----[\r\t ]*$/;
273                }
274
9
20
                unless (defined) {
275
0
0
                    $self->parse_error($desc, g_('unfinished OpenPGP signature'));
276                }
277                # This does not mean the signature is correct, that needs to
278                # be verified by an OpenPGP backend.
279
9
9
                $$self->{is_pgp_signed} = 1;
280            }
281            # Finished parsing one stanza.
282
66
74
            last;
283        } elsif ($armor =~ m/^-----BEGIN PGP SIGNED MESSAGE-----[\r\t ]*$/) {
284
15
12
            $expect_pgp_sig = 1;
285
15
36
            if ($$self->{allow_pgp} and not $parabody) {
286                # Skip OpenPGP headers
287
12
12
                while (<$fh>) {
288
24
41
                    last if m/^\s*$/;
289                }
290            } else {
291
3
4
                $self->parse_error($desc, g_('OpenPGP signature not allowed here'));
292            }
293        } else {
294
12
19
            $self->parse_error($desc,
295                               g_('line with unknown format (not field-colon-value)'));
296        }
297    }
298
299
192
285
    if ($expect_pgp_sig and not $$self->{is_pgp_signed}) {
300
3
6
        $self->parse_error($desc, g_('unfinished OpenPGP signature'));
301    }
302
303
189
370
    return defined($cf);
304}
305
306 - 317
=item $c->load($file)

Parse the content of $file. Exits in case of errors. Returns true if some
fields have been parsed.

=item $c->find_custom_field($name)

Scan the fields and look for a user specific field whose name matches the
following regex: /X[SBC]*-$name/i. Return the name of the field found or
undef if nothing has been found.

=cut
318
319sub find_custom_field {
320
0
1
0
    my ($self, $name) = @_;
321
0
0
    foreach my $key (keys %$self) {
322
0
0
        return $key if $key =~ /^X[SBC]*-\Q$name\E$/i;
323    }
324
0
0
    return;
325}
326
327 - 331
=item $c->get_custom_field($name)

Identify a user field and retrieve its value.

=cut
332
333sub get_custom_field {
334
0
1
0
    my ($self, $name) = @_;
335
0
0
    my $key = $self->find_custom_field($name);
336
0
0
    return $self->{$key} if defined $key;
337
0
0
    return;
338}
339
340 - 353
=item $str = $c->output()

=item "$c"

Get a string representation of the control information. The fields
are sorted in the order in which they have been read or set except
if the order has been overridden with set_output_order().

=item $c->output($fh)

Print the string representation of the control information to a
filehandle.

=cut
354
355sub output {
356
78
1
89
    my ($self, $fh) = @_;
357
78
67
    my $str = '';
358
78
64
    my @keys;
359
78
78
52
149
    if (@{$$self->{out_order}}) {
360
78
63
        my $i = 1;
361
78
65
        my $imp = {};
362
78
78
61
657
        $imp->{$_} = $i++ foreach @{$$self->{out_order}};
363        @keys = sort {
364
78
708
105
1408
            if (defined $imp->{$a} && defined $imp->{$b}) {
365
561
628
                $imp->{$a} <=> $imp->{$b};
366            } elsif (defined($imp->{$a})) {
367
63
71
                -1;
368            } elsif (defined($imp->{$b})) {
369
24
25
                1;
370            } else {
371
60
111
                $a cmp $b;
372            }
373        } keys %$self;
374    } else {
375
0
0
0
0
        @keys = @{$$self->{in_order}};
376    }
377
378
78
127
    foreach my $key (@keys) {
379
411
384
        if (exists $self->{$key}) {
380
411
341
            my $value = $self->{$key};
381            # Skip whitespace-only fields
382
411
1049
            next if $$self->{drop_empty} and $value !~ m/\S/;
383            # Escape data to follow control file syntax
384
408
474
            my ($first_line, @lines) = split /\n/, $value;
385
386
408
320
            my $kv = "$key:";
387
408
405
            $kv .= ' ' . $first_line if length $first_line;
388
408
248
            $kv .= "\n";
389
408
305
            foreach (@lines) {
390
330
381
                s/\s+$//;
391
330
572
                if (length == 0 or /^\.+$/) {
392
111
89
                    $kv .= " .$_\n";
393                } else {
394
219
183
                    $kv .= " $_\n";
395                }
396            }
397            # Print it out
398
408
340
            if ($fh) {
399
57
57
39
89
                print { $fh } $kv
400                    or syserr(g_('write error on control data'));
401            }
402
408
622
            $str .= $kv if defined wantarray;
403        }
404    }
405
78
366
    return $str;
406}
407
408 - 416
=item $c->save($filename)

Write the string representation of the control information to a file.

=item $c->set_output_order(@fields)

Define the order in which fields will be displayed in the output() method.

=cut
417
418sub set_output_order {
419
2607
1
5319
    my ($self, @fields) = @_;
420
421
2607
7928
    $$self->{out_order} = [@fields];
422}
423
424 - 429
=item $c->apply_substvars($substvars)

Update all fields by replacing the variables references with
the corresponding value stored in the L<Dpkg::Substvars> object.

=cut
430
431sub apply_substvars {
432
0
1
    my ($self, $substvars, %opts) = @_;
433
434    # Add substvars to refer to other fields
435
0
    $substvars->set_field_substvars($self, 'F');
436
437
0
    foreach my $f (keys %$self) {
438
0
        my $v = $substvars->substvars($self->{$f}, %opts);
439
0
        if ($v ne $self->{$f}) {
440
0
            my $sep;
441
442
0
            $sep = field_get_sep_type($f);
443
444            # If we replaced stuff, ensure we're not breaking
445            # a dependency field by introducing empty lines, or multiple
446            # commas
447
448
0
            if ($sep & (FIELD_SEP_COMMA | FIELD_SEP_LINE)) {
449                # Drop empty/whitespace-only lines
450
0
                $v =~ s/\n[ \t]*(\n|$)/$1/;
451            }
452
453
0
            if ($sep & FIELD_SEP_COMMA) {
454
0
                $v =~ s/,[\s,]*,/,/g;
455
0
                $v =~ s/^\s*,\s*//;
456
0
                $v =~ s/\s*,\s*$//;
457            }
458        }
459        # Replace ${} with $, which is otherwise an invalid substitution, but
460        # this then makes it possible to use ${} as an escape sequence such
461        # as ${}{VARIABLE}.
462
0
        $v =~ s/\$\{\}/\$/g;
463
464
0
        $self->{$f} = $v;
465    }
466}
467
468=back
469
470 - 484
=head1 CHANGES

=head2 Version 1.02 (dpkg 1.21.0)

New option: "keep_duplicate" in new().

=head2 Version 1.01 (dpkg 1.17.2)

New method: $c->parse_error().

=head2 Version 1.00 (dpkg 1.17.0)

Mark the module as public.

=cut
485
4861;