File Coverage

File:Dpkg/Control/HashCore.pm
Coverage:77.0%

linestmtbrancondsubpodtimecode
1# Copyright © 2007-2009 Raphaël Hertzog <hertzog@debian.org>
2# Copyright © 2009, 2012-2019, 2021 Guillem Jover <guillem@debian.org>
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17package Dpkg::Control::HashCore;
18
19
44
44
44
84
33
451
use strict;
20
44
44
44
66
21
887
use warnings;
21
22our $VERSION = '1.02';
23
24
44
44
44
62
91
946
use Dpkg::Gettext;
25
44
44
44
76
29
1105
use Dpkg::ErrorHandling;
26
44
44
44
4723
36
2083
use Dpkg::Control::FieldsCore;
27
28# This module cannot use Dpkg::Control::Fields, because that one makes use
29# of Dpkg::Vendor which at the same time uses this module, which would turn
30# into a compilation error. We can use Dpkg::Control::FieldsCore instead.
31
32
44
44
44
99
35
86
use parent qw(Dpkg::Interface::Storable);
33
34use overload
35
13924
13924
7661
15332
    '%{}' => sub { ${$_[0]}->{fields} },
36
44
44
44
4
1881
32
118
5
    'eq' => sub { "$_[0]" eq "$_[1]" };
37
38=encoding utf8
39
40 - 113
=head1 NAME

Dpkg::Control::HashCore - parse and manipulate a block of RFC822-like fields

=head1 DESCRIPTION

The Dpkg::Control::Hash class is a hash-like representation of a set of
RFC822-like fields. The fields names are case insensitive and are always
capitalized the same when output (see field_capitalize function in
Dpkg::Control::Fields).
The order in which fields have been set is remembered and is used
to be able to dump back the same content. The output order can also be
overridden if needed.

You can store arbitrary values in the hash, they will always be properly
escaped in the output to conform to the syntax of control files. This is
relevant mainly for multilines values: while the first line is always output
unchanged directly after the field name, supplementary lines are
modified. Empty lines and lines containing only dots are prefixed with
" ." (space + dot) while other lines are prefixed with a single space.

During parsing, trailing spaces are stripped on all lines while leading
spaces are stripped only on the first line of each field.

=head1 METHODS

=over 4

=item $c = Dpkg::Control::Hash->new(%opts)

Creates a new object with the indicated options. Supported options
are:

=over 8

=item allow_pgp

Configures the parser to accept OpenPGP signatures around the control
information. Value can be 0 (default) or 1.

=item allow_duplicate

Configures the parser to allow duplicate fields in the control
information.
The last value overrides any previous values.
Value can be 0 (default) or 1.

=item keep_duplicate

Configure the parser to keep values for duplicate fields found in the control
information (when B<allow_duplicate> is enabled), as array references.
Value can be 0 (default) or 1.

=item drop_empty

Defines if empty fields are dropped during the output. Value can be 0
(default) or 1.

=item name

The user friendly name of the information stored in the object. It might
be used in some error messages or warnings. A default name might be set
depending on the type.

=item is_pgp_signed

Set by the parser (starting in dpkg 1.17.0) if it finds an OpenPGP
signature around the control information. Value can be 0 (default)
or 1, and undef when the option is not supported by the code (in
versions older than dpkg 1.17.0).

=back

=cut
114
115sub new {
116
870
1
559
    my ($this, %opts) = @_;
117
870
1104
    my $class = ref($this) || $this;
118
119    # Object is a scalar reference and not a hash ref to avoid
120    # infinite recursion due to overloading hash-dereferencing
121
870
1787
    my $self = \{
122        in_order => [],
123        out_order => [],
124        is_pgp_signed => 0,
125        allow_pgp => 0,
126        allow_duplicate => 0,
127        keep_duplicate => 0,
128        drop_empty => 0,
129    };
130
870
705
    bless $self, $class;
131
132
870
731
    $$self->{fields} = Dpkg::Control::HashCore::Tie->new($self);
133
134    # Options set by the user override default values
135
870
708
    $$self->{$_} = $opts{$_} foreach keys %opts;
136
137
870
725
    return $self;
138}
139
140# There is naturally a circular reference between the tied hash and its
141# containing object. Happily, the extra layer of scalar reference can
142# be used to detect the destruction of the object and break the loop so
143# that everything gets garbage-collected.
144
145sub DESTROY {
146
796
3634
    my $self = shift;
147
796
2885
    delete $$self->{fields};
148}
149
150 - 154
=item $c->set_options($option, %opts)

Changes the value of one or more options.

=cut
155
156sub set_options {
157
0
1
0
    my ($self, %opts) = @_;
158
0
0
    $$self->{$_} = $opts{$_} foreach keys %opts;
159}
160
161 - 165
=item $value = $c->get_option($option)

Returns the value of the corresponding option.

=cut
166
167sub get_option {
168
0
1
0
    my ($self, $k) = @_;
169
0
0
    return $$self->{$k};
170}
171
172 - 176
=item $c->parse_error($file, $fmt, ...)

Prints an error message and dies on syntax parse errors.

=cut
177
178sub parse_error {
179
7
1
8
    my ($self, $file, $msg) = (shift, shift, shift);
180
181
7
7
    $msg = sprintf($msg, @_) if (@_);
182
7
8
    error(g_('syntax error in %s at line %d: %s'), $file, $., $msg);
183}
184
185 - 194
=item $c->parse($fh, $description)

Parse a control file from the given filehandle. Exits in case of errors.
$description is used to describe the filehandle, ideally it's a filename
or a description of where the data comes from. It's used in error
messages. When called multiple times, the parsed fields are accumulated.

Returns true if some fields have been parsed.

=cut
195
196sub parse {
197
47
1
45
    my ($self, $fh, $desc) = @_;
198
199
47
31
    my $paraborder = 1;
200
47
29
    my $parabody = 0;
201
47
25
    my $cf; # Current field
202
47
26
    my $expect_pgp_sig = 0;
203
47
24
    local $_;
204
205
47
176
    while (<$fh>) {
206        # In the common case there will be just a trailing \n character,
207        # so using chomp here which is very fast will avoid the latter
208        # s/// doing anything, which gives usa significant speed up.
209
140
97
        chomp;
210
140
85
        my $armor = $_;
211
140
171
        s/\s+$//;
212
213
140
154
        next if length == 0 and $paraborder;
214
215
140
106
        my $lead = substr $_, 0, 1;
216
140
105
        next if $lead eq '#';
217
137
67
        $paraborder = 0;
218
219
137
199
        my ($name, $value) = split /\s*:\s*/, $_, 2;
220
137
385
        if (defined $name and $name =~ m/^\S+?$/) {
221
105
62
            $parabody = 1;
222
105
91
            if ($lead eq '-') {
223
0
0
                $self->parse_error($desc, g_('field cannot start with a hyphen'));
224            }
225
105
105
            if (exists $self->{$name}) {
226
0
0
                unless ($$self->{allow_duplicate}) {
227
0
0
                    $self->parse_error($desc, g_('duplicate field %s found'), $name);
228                }
229
0
0
                if ($$self->{keep_duplicate}) {
230
0
0
                    if (ref $self->{$name} ne 'ARRAY') {
231                        # Switch value into an array.
232
0
0
                        $self->{$name} = [ $self->{$name}, $value ];
233                    } else {
234                        # Append the value.
235
0
0
0
0
                        push @{$self->{$name}}, $value;
236                    }
237                } else {
238                    # Overwrite with last value.
239
0
0
                    $self->{$name} = $value;
240                }
241            } else {
242
105
67
                $self->{$name} = $value;
243            }
244
105
195
            $cf = $name;
245        } elsif (m/^\s(\s*\S.*)$/) {
246
8
8
            my $line = $1;
247
8
4
            unless (defined($cf)) {
248
0
0
                $self->parse_error($desc, g_('continued value line not in field'));
249            }
250
8
7
            if ($line =~ /^\.+$/) {
251
3
3
                $line = substr $line, 1;
252            }
253
8
7
            $self->{$cf} .= "\n$line";
254        } elsif (length == 0 ||
255                 ($expect_pgp_sig && $armor =~ m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/)) {
256
15
11
            if ($expect_pgp_sig) {
257                # Skip empty lines
258
3
8
                $_ = <$fh> while defined && m/^\s*$/;
259
3
5
                unless (length) {
260
0
0
                    $self->parse_error($desc, g_('expected OpenPGP signature, ' .
261                                                 'found end of file after blank line'));
262                }
263
3
3
                chomp;
264
3
4
                unless (m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/) {
265
0
0
                    $self->parse_error($desc, g_('expected OpenPGP signature, ' .
266                                                 "found something else '%s'"), $_);
267                }
268                # Skip OpenPGP signature
269
3
2
                while (<$fh>) {
270
16
7
                    chomp;
271
16
14
                    last if m/^-----END PGP SIGNATURE-----[\r\t ]*$/;
272                }
273
3
3
                unless (defined) {
274
0
0
                    $self->parse_error($desc, g_('unfinished OpenPGP signature'));
275                }
276                # This does not mean the signature is correct, that needs to
277                # be verified by an OpenPGP backend.
278
3
3
                $$self->{is_pgp_signed} = 1;
279            }
280
15
10
            last; # Finished parsing one block
281        } elsif ($armor =~ m/^-----BEGIN PGP SIGNED MESSAGE-----[\r\t ]*$/) {
282
5
4
            $expect_pgp_sig = 1;
283
5
12
            if ($$self->{allow_pgp} and not $parabody) {
284                # Skip OpenPGP headers
285
4
4
                while (<$fh>) {
286
8
12
                    last if m/^\s*$/;
287                }
288            } else {
289
1
0
                $self->parse_error($desc, g_('OpenPGP signature not allowed here'));
290            }
291        } else {
292
4
6
            $self->parse_error($desc,
293                               g_('line with unknown format (not field-colon-value)'));
294        }
295    }
296
297
42
65
    if ($expect_pgp_sig and not $$self->{is_pgp_signed}) {
298
1
1
        $self->parse_error($desc, g_('unfinished OpenPGP signature'));
299    }
300
301
41
56
    return defined($cf);
302}
303
304 - 315
=item $c->load($file)

Parse the content of $file. Exits in case of errors. Returns true if some
fields have been parsed.

=item $c->find_custom_field($name)

Scan the fields and look for a user specific field whose name matches the
following regex: /X[SBC]*-$name/i. Return the name of the field found or
undef if nothing has been found.

=cut
316
317sub find_custom_field {
318
0
1
0
    my ($self, $name) = @_;
319
0
0
    foreach my $key (keys %$self) {
320
0
0
        return $key if $key =~ /^X[SBC]*-\Q$name\E$/i;
321    }
322
0
0
    return;
323}
324
325 - 329
=item $c->get_custom_field($name)

Identify a user field and retrieve its value.

=cut
330
331sub get_custom_field {
332
0
1
0
    my ($self, $name) = @_;
333
0
0
    my $key = $self->find_custom_field($name);
334
0
0
    return $self->{$key} if defined $key;
335
0
0
    return;
336}
337
338 - 351
=item $str = $c->output()

=item "$c"

Get a string representation of the control information. The fields
are sorted in the order in which they have been read or set except
if the order has been overridden with set_output_order().

=item $c->output($fh)

Print the string representation of the control information to a
filehandle.

=cut
352
353sub output {
354
26
1
20
    my ($self, $fh) = @_;
355
26
12
    my $str = '';
356
26
25
    my @keys;
357
26
26
16
28
    if (@{$$self->{out_order}}) {
358
26
10
        my $i = 1;
359
26
21
        my $imp = {};
360
26
26
13
124
        $imp->{$_} = $i++ foreach @{$$self->{out_order}};
361        @keys = sort {
362
26
236
19
322
            if (defined $imp->{$a} && defined $imp->{$b}) {
363
187
146
                $imp->{$a} <=> $imp->{$b};
364            } elsif (defined($imp->{$a})) {
365
21
15
                -1;
366            } elsif (defined($imp->{$b})) {
367
8
6
                1;
368            } else {
369
20
22
                $a cmp $b;
370            }
371        } keys %$self;
372    } else {
373
0
0
0
0
        @keys = @{$$self->{in_order}};
374    }
375
376
26
25
    foreach my $key (@keys) {
377
137
79
        if (exists $self->{$key}) {
378
137
78
            my $value = $self->{$key};
379            # Skip whitespace-only fields
380
137
216
            next if $$self->{drop_empty} and $value !~ m/\S/;
381            # Escape data to follow control file syntax
382
136
122
            my ($first_line, @lines) = split /\n/, $value;
383
384
136
72
            my $kv = "$key:";
385
136
97
            $kv .= ' ' . $first_line if length $first_line;
386
136
71
            $kv .= "\n";
387
136
76
            foreach (@lines) {
388
110
84
                s/\s+$//;
389
110
135
                if (length == 0 or /^\.+$/) {
390
37
15
                    $kv .= " .$_\n";
391                } else {
392
73
46
                    $kv .= " $_\n";
393                }
394            }
395            # Print it out
396
136
96
            if ($fh) {
397
19
19
10
17
                print { $fh } $kv
398                    or syserr(g_('write error on control data'));
399            }
400
136
141
            $str .= $kv if defined wantarray;
401        }
402    }
403
26
71
    return $str;
404}
405
406 - 414
=item $c->save($filename)

Write the string representation of the control information to a file.

=item $c->set_output_order(@fields)

Define the order in which fields will be displayed in the output() method.

=cut
415
416sub set_output_order {
417
848
1
1315
    my ($self, @fields) = @_;
418
419
848
1750
    $$self->{out_order} = [@fields];
420}
421
422 - 427
=item $c->apply_substvars($substvars)

Update all fields by replacing the variables references with
the corresponding value stored in the Dpkg::Substvars object.

=cut
428
429sub apply_substvars {
430
0
1
0
    my ($self, $substvars, %opts) = @_;
431
432    # Add substvars to refer to other fields
433
0
0
    $substvars->set_field_substvars($self, 'F');
434
435
0
0
    foreach my $f (keys %$self) {
436
0
0
        my $v = $substvars->substvars($self->{$f}, %opts);
437
0
0
        if ($v ne $self->{$f}) {
438
0
0
            my $sep;
439
440
0
0
            $sep = field_get_sep_type($f);
441
442            # If we replaced stuff, ensure we're not breaking
443            # a dependency field by introducing empty lines, or multiple
444            # commas
445
446
0
0
            if ($sep & (FIELD_SEP_COMMA | FIELD_SEP_LINE)) {
447                # Drop empty/whitespace-only lines
448
0
0
                $v =~ s/\n[ \t]*(\n|$)/$1/;
449            }
450
451
0
0
            if ($sep & FIELD_SEP_COMMA) {
452
0
0
                $v =~ s/,[\s,]*,/,/g;
453
0
0
                $v =~ s/^\s*,\s*//;
454
0
0
                $v =~ s/\s*,\s*$//;
455            }
456        }
457        # Replace ${} with $, which is otherwise an invalid substitution, but
458        # this then makes it possible to use ${} as an escape sequence such
459        # as ${}{VARIABLE}.
460
0
0
        $v =~ s/\$\{\}/\$/g;
461
462
0
0
        $self->{$f} = $v;
463    }
464}
465
466package Dpkg::Control::HashCore::Tie;
467
468# This class is used to tie a hash. It implements hash-like functions by
469# normalizing the name of fields received in keys (using
470# Dpkg::Control::Fields::field_capitalize). It also stores the order in
471# which fields have been added in order to be able to dump them in the
472# same order. But the order information is stored in a parent object of
473# type Dpkg::Control.
474
475
44
44
44
39065
29
408
use strict;
476
44
44
44
83
35
728
use warnings;
477
478
44
44
44
77
52
1693
use Dpkg::Control::FieldsCore;
479
480
44
44
44
101
15
924
use Carp;
481
44
44
44
88
32
911
use Tie::Hash;
482
44
44
44
148
39
86
use parent -norequire, qw(Tie::ExtraHash);
483
484# $self->[0] is the real hash
485# $self->[1] is a reference to the hash contained by the parent object.
486# This reference bypasses the top-level scalar reference of a
487# Dpkg::Control::Hash, hence ensuring that reference gets DESTROYed
488# properly.
489
490# Dpkg::Control::Hash->new($parent)
491#
492# Return a reference to a tied hash implementing storage of simple
493# "field: value" mapping as used in many Debian-specific files.
494
495sub new {
496
870
479
    my $class = shift;
497
870
502
    my $hash = {};
498
870
870
511
935
    tie %{$hash}, $class, @_; ## no critic (Miscellanea::ProhibitTies)
499
870
801
    return $hash;
500}
501
502sub TIEHASH  {
503
870
586
    my ($class, $parent) = @_;
504
870
1115
    croak 'parent object must be Dpkg::Control::Hash'
505        if not $parent->isa('Dpkg::Control::HashCore') and
506           not $parent->isa('Dpkg::Control::Hash');
507
870
1713
    return bless [ {}, $$parent ], $class;
508}
509
510sub FETCH {
511
4670
2699
    my ($self, $key) = @_;
512
4670
2496
    $key = lc($key);
513
4670
7545
    return $self->[0]->{$key} if exists $self->[0]->{$key};
514
0
0
    return;
515}
516
517sub STORE {
518
6327
5120
    my ($self, $key, $value) = @_;
519
6327
3317
    $key = lc($key);
520
6327
4521
    if (not exists $self->[0]->{$key}) {
521
4978
4978
2319
3709
        push @{$self->[1]->{in_order}}, field_capitalize($key);
522    }
523
6327
6272
    $self->[0]->{$key} = $value;
524}
525
526sub EXISTS {
527
2408
1474
    my ($self, $key) = @_;
528
2408
1348
    $key = lc($key);
529
2408
2850
    return exists $self->[0]->{$key};
530}
531
532sub DELETE {
533
1
1
    my ($self, $key) = @_;
534
1
2
    my $parent = $self->[1];
535
1
1
    my $in_order = $parent->{in_order};
536
1
122
    $key = lc($key);
537
1
2
    if (exists $self->[0]->{$key}) {
538
1
1
        delete $self->[0]->{$key};
539
1
1
3
1
1
1
2
1
        @{$in_order} = grep { lc ne $key } @{$in_order};
540
1
2
        return 1;
541    } else {
542
0
0
        return 0;
543    }
544}
545
546sub FIRSTKEY {
547
764
426
    my $self = shift;
548
764
446
    my $parent = $self->[1];
549
764
764
404
581
    foreach my $key (@{$parent->{in_order}}) {
550
764
1156
        return $key if exists $self->[0]->{lc $key};
551    }
552}
553
554sub NEXTKEY {
555
1413
869
    my ($self, $last) = @_;
556
1413
727
    my $parent = $self->[1];
557
1413
714
    my $found = 0;
558
1413
1413
674
831
    foreach my $key (@{$parent->{in_order}}) {
559
3070
1675
        if ($found) {
560
649
851
            return $key if exists $self->[0]->{lc $key};
561        } else {
562
2421
1738
            $found = 1 if $key eq $last;
563        }
564    }
565
764
690
    return;
566}
567
5681;
569
570=back
571
572 - 586
=head1 CHANGES

=head2 Version 1.02 (dpkg 1.21.0)

New option: "keep_duplicate" in new().

=head2 Version 1.01 (dpkg 1.17.2)

New method: $c->parse_error().

=head2 Version 1.00 (dpkg 1.17.0)

Mark the module as public.

=cut
587
5881;