Data-TableReader
view release on metacpan or search on metacpan
t/10-decoder-csv.t view on Meta::CPAN
}
sub test_utf_bom {
for my $input_fn (qw( utf8_bom utf16_le_bom utf16_be_bom utf8_nobom deceptive_utf8_nobom )) {
subtest "seekable $input_fn" => sub {
my $input= main->$input_fn;
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( $iter->seek(0), 'rewind' );
# workaround for a perl bug! the input string gets corrupted
substr($input,0,8)= substr(main->$input_fn,0,8);
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->([0,3]), [ "\x{8A66}\x{3057}", 3 ], 'slice from second row' );
ok( !$iter->next_dataset, 'no next dataset' );
};
subtest "nonseekable $input_fn" => sub {
my $input= main->$input_fn;
pipe(my ($input_fh, $out_fh)) or die "pipe: $!";
print $out_fh $input or die "print(pipe_out): $!";
close $out_fh or die "close: $!";
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
if ($input_fn =~ /deceptive/) {
# Some inputs on non-seekable file handles will result in this exception.
# This is expected.
like( (try { $d->iterator } catch {$_}), qr/seek/, 'can\'t seek exception' );
} else {
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( !$iter->next_dataset, 'no next dataset' );
}
};
}
}
subtest basic => \&test_basic;
subtest multi_iter => \&test_multi_iterator;
subtest utf_bom => \&test_utf_bom;
done_testing;
sub ascii {
return <<END;
a,b,c,d
1,2,3,4
END
}
sub utf8_bom {
# BOM "test\n"
# "\x{8A66}\x{3057},1,2,3\n"
# "\x{27000}\n"
return "\xEF\xBB\xBF"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub utf16_le_bom {
return "\xFF\xFE"
."t\0e\0s\0t\0\n\0"
."\x66\x8A\x57\x30,\x001\x00,\x002\x00,\x003\x00\n\x00"
."\x5C\xD8\x00\xDC\n\0";
}
sub utf16_be_bom {
return "\xFE\xFF"
."\x00t\x00e\x00s\x00t\x00\n"
."\x8A\x66\x30\x57\x00,\x001\x00,\x002\x00,\x003\x00\n"
."\xD8\x5C\xDC\x00\0\n";
}
sub utf8_nobom {
return "test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub deceptive_utf8_nobom {
return "\xEF\xBF\xBD"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
( run in 0.577 second using v1.01-cache-2.11-cpan-e1769b4cff6 )