PICA-Data
view release on metacpan or search on metacpan
t/20-parser.t view on Meta::CPAN
my $first = pica_parser(plain => 't/files/pica.plain')->next;
is $first->{_id}, '12345', 'record _id';
is pica_id($first), '12345', 'pica_id';
ok $first->{record}->[0][0] eq '002@', 'tag from first field';
is_deeply $first->{record}->[1], ['003@', '', 0 => '12345'], 'second field';
is_deeply $first->{record}->[4], ['012X', '', 0 => '0', x => '', y => ''],
'empty subfields';
is $first->{record}->[6]->[7], 'æ³ç»çº¬ä¸»ç¼;', 'Unicode';
is_deeply $first->{record}->[11],
['145Z', '40', 'a', '$', 'b', 'test$', 'c', '...'], 'sub field with $';
my $all = pica_parser(plain => 't/files/pica.plain')->all;
is_deeply $all->[0], $first, 'read all';
is @$all, 2, 'read all';
foreach my $type (qw(Plain Plus JSON Binary XML PPXML PIXML Import)) {
my $module = "PICA::Parser::$type";
my $file = 't/files/pica.' . lc($type);
note $module;
my $parser = pica_parser($type => $file);
is ref($parser), "PICA::Parser::$type", "parser from file";
my $record = $parser->next;
is_deeply $record, $first, 'first record';
is $parser->count, 1, 'count';
#note explain $record if $type eq 'Import';
ok $parser->next()->{_id} eq '67890', 'next record';
ok !$parser->next, 'parsed all records';
is $parser->count, 2, 'count';
foreach my $mode ('<', '<:utf8') {
next
if ($mode eq '<' and $type ne 'XML')
or ($mode eq '<:utf8' and $type eq 'XML');
open(my $fh, $mode, $file);
my $record = pica_parser($type => $fh)->next;
is_deeply pica_value($record, '021A$h'), 'æ³ç»çº¬ä¸»ç¼;',
'read from handle';
}
# read file as Unicode text string
my $data = do {
open my $fh, "<:encoding(UTF-8)", $file;
join '', <$fh>;
};
# read from string reference
$record = pica_parser($type, \$data)->next;
is $record->{record}[6][7], 'æ³ç»çº¬ä¸»ç¼;',
'Unicode from string reference';
}
# TODO: dump.dat, bgb.example, sru_picaxml.xml
# test XML with BOM
my $xml
= q{<record xmlns="info:srw/schema/5/picaXML-v1.0"><datafield tag="003@"><subfield code="0">1234â¬</subfield></datafield></record>};
my $record = pica_parser(xml => $xml)->next;
is_deeply $record->{record}, [['003@', '', '0', '1234â¬']],
'xml from string';
note 'XML with namespace';
$xml = <<XML;
<p:record xmlns:p="info:srw/schema/5/picaXML-v1.0">
<p:datafield p:tag="003@">
<p:subfield p:code="0">1234â¬</p:subfield>
</p:datafield>
</p:record>
XML
$record = pica_parser(xml => $xml)->next;
is_deeply $record->{record}, [['003@', '', '0', '1234â¬']],
'xml with namespace';
$record = pica_parser(plain => \"003@ Æ0123\n123A/01 Æx1Æy\$2")->next;
is_deeply $record->{record},
[['003@', '', '0', '123'], [qw(123A 01 x 1 y $2)]],
'plain parser supports U+0192 as subfield indicator';
is $record->{_id}, '123', 'include PPN (#80)';
note 'error handling';
{
my $plus
= "X01A \x{1F}01\x{1E}001A/0 \x{1F}01\x{1E}001A/AB \x{1F}01\x{1E}";
warnings_exist {PICA::Parser::Plus->new(\$plus)->next}[
qr{invalid PICA field structure},
qr{invalid PICA field structure},
qr{invalid PICA field structure}
],
'skip faulty fields with warnings';
dies_ok {PICA::Parser::Plus->new(\$plus, strict => 1)->next}
'die on faulty fields with option strict';
my $plain = "X01@ \$01\n\n001@/0 \$01\n\n001@/AB \$01";
warnings_exist {PICA::Parser::Plain->new(\$plain)->next}[
qr{invalid PICA field structure},
qr{invalid PICA field structure},
qr{invalid PICA field structure}
],
'skip faulty fields with warnings';
dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next}
'die on faulty fields with option strict';
dies_ok {pica_parser('doesnotexist')} 'unknown parser';
dies_ok {pica_parser(xml => '')} 'invalid handle';
dies_ok {pica_parser(plus => [])} 'invalid handle';
dies_ok {pica_parser(plain => bless({}, 'MyFooBar'))} 'invalid handle';
# https://github.com/gbv/PICA-Data/issues/136
$plain = '021A $$';
dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next} 'issue 136';
}
is pica_parser(plain => \'012A/00 $xy')->next->string,
"012A \$xy\n\n", 'occurrence zero';
( run in 1.503 second using v1.01-cache-2.11-cpan-39bf76dae61 )