PICA-Data

 view release on metacpan or  search on metacpan

t/20-parser.t  view on Meta::CPAN


my $first = pica_parser(plain => 't/files/pica.plain')->next;
is $first->{_id}, '12345', 'record _id';
is pica_id($first), '12345', 'pica_id';
ok $first->{record}->[0][0] eq '002@', 'tag from first field';
is_deeply $first->{record}->[1], ['003@', '', 0 => '12345'], 'second field';
is_deeply $first->{record}->[4], ['012X', '', 0 => '0', x => '', y => ''],
    'empty subfields';


is $first->{record}->[6]->[7], '柳经纬主编;', 'Unicode';
is_deeply $first->{record}->[11],
    ['145Z', '40', 'a', '$', 'b', 'test$', 'c', '...'], 'sub field with $';

my $all = pica_parser(plain => 't/files/pica.plain')->all;
is_deeply $all->[0], $first, 'read all';
is @$all, 2, 'read all';

foreach my $type (qw(Plain Plus JSON Binary XML PPXML PIXML Import)) {
    my $module = "PICA::Parser::$type";
    my $file   = 't/files/pica.' . lc($type);

    note $module;
    my $parser = pica_parser($type => $file);
    is ref($parser), "PICA::Parser::$type", "parser from file";

    my $record = $parser->next;
    is_deeply $record, $first, 'first record';
    is $parser->count, 1, 'count';

    #note explain $record if $type eq 'Import';

    ok $parser->next()->{_id} eq '67890', 'next record';
    ok !$parser->next, 'parsed all records';
    is $parser->count, 2, 'count';

    foreach my $mode ('<', '<:utf8') {
        next
            if ($mode eq '<' and $type ne 'XML')
            or ($mode eq '<:utf8' and $type eq 'XML');
        open(my $fh, $mode, $file);
        my $record = pica_parser($type => $fh)->next;
        is_deeply pica_value($record, '021A$h'), '柳经纬主编;',
            'read from handle';
    }

    # read file as Unicode text string
    my $data = do {
        open my $fh, "<:encoding(UTF-8)", $file;
        join '', <$fh>;
    };

    # read from string reference
    $record = pica_parser($type, \$data)->next;
    is $record->{record}[6][7], '柳经纬主编;',
        'Unicode from string reference';

}

# TODO: dump.dat, bgb.example, sru_picaxml.xml
# test XML with BOM

my $xml
    = q{<record xmlns="info:srw/schema/5/picaXML-v1.0"><datafield tag="003@"><subfield code="0">1234€</subfield></datafield></record>};
my $record = pica_parser(xml => $xml)->next;
is_deeply $record->{record}, [['003@', '', '0', '1234€']],
    'xml from string';

note 'XML with namespace';
$xml = <<XML;
<p:record xmlns:p="info:srw/schema/5/picaXML-v1.0">
 <p:datafield p:tag="003@">
   <p:subfield p:code="0">1234€</p:subfield>
 </p:datafield>
</p:record>
XML

$record = pica_parser(xml => $xml)->next;
is_deeply $record->{record}, [['003@', '', '0', '1234€']],
    'xml with namespace';

$record = pica_parser(plain => \"003@ Æ’0123\n123A/01 Æ’x1Æ’y\$2")->next;
is_deeply $record->{record},
    [['003@', '', '0', '123'], [qw(123A 01 x 1 y $2)]],
    'plain parser supports U+0192 as subfield indicator';
is $record->{_id}, '123', 'include PPN (#80)';

note 'error handling';
{
    my $plus
        = "X01A \x{1F}01\x{1E}001A/0 \x{1F}01\x{1E}001A/AB \x{1F}01\x{1E}";
    warnings_exist {PICA::Parser::Plus->new(\$plus)->next}[
        qr{invalid PICA field structure},
        qr{invalid PICA field structure},
        qr{invalid PICA field structure}
    ],
        'skip faulty fields with warnings';
    dies_ok {PICA::Parser::Plus->new(\$plus, strict => 1)->next}
    'die on faulty fields with option strict';
    my $plain = "X01@ \$01\n\n001@/0 \$01\n\n001@/AB \$01";
    warnings_exist {PICA::Parser::Plain->new(\$plain)->next}[
        qr{invalid PICA field structure},
        qr{invalid PICA field structure},
        qr{invalid PICA field structure}
    ],
        'skip faulty fields with warnings';
    dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next}
    'die on faulty fields with option strict';

    dies_ok {pica_parser('doesnotexist')} 'unknown parser';
    dies_ok {pica_parser(xml => '')} 'invalid handle';
    dies_ok {pica_parser(plus => [])} 'invalid handle';
    dies_ok {pica_parser(plain => bless({}, 'MyFooBar'))} 'invalid handle';

    # https://github.com/gbv/PICA-Data/issues/136
    $plain = '021A $$';
    dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next} 'issue 136';
}

is pica_parser(plain => \'012A/00 $xy')->next->string, 
  "012A \$xy\n\n", 'occurrence zero';



( run in 1.503 second using v1.01-cache-2.11-cpan-39bf76dae61 )