Text-AutoCSV
view release on metacpan or search on metacpan
use File::Temp qw(tmpnam);
if ($DEVTIME) {
note("");
note("***");
note("***");
note("*** !! WARNING !!");
note("***");
note("*** SET \$DEVTIME TO 0 BEFORE RELEASING THIS CODE TO PRODUCTION");
note("*** RIGHT NOW, \$DEVTIME IS EQUAL TO $DEVTIME");
note("***");
note("***");
note("");
}
can_ok( 'Text::AutoCSV', ('new') );
# * *********** *
# * UTF-8 files *
# * *********** *
{
note("");
note("[UT]F-8 tests");
# R/O
my $csv = Text::AutoCSV->new(
in_file => "t/${ww}e1.csv",
croak_if_error => 0,
sep_char => ","
);
my $all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "A\x{bf}\x{ed}" } ],
"UT01 - t/e1.csv: read CSV UTF8 chars that are latin1"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e2.csv",
croak_if_error => 0,
sep_char => ","
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT02 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ","
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT03 - t/e2.csv: read CSV UTF8 + BOM chars that are latin1+latin2"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e2.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT04 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2, explicit encoding"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8',
via => '',
has_headers => 0,
fields_column_names => ['Z']
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
# BOM appears here as explicit via discards the use of
# :via(File::BOM)
[
{ 'Z' => "\x{feff}u" },
{ 'Z' => "\x{e9}" },
{ 'Z' => "N\x{11b}\x{10d}\x{ed}" }
],
"UT05 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e2.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT06 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2, explicit encoding option"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8',
via => ':via(File::BOM)'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
# This time BOM is specified in the encoding parameter => no mess
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT07 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding with opts"
);
# R/W
my $tmpf = &get_non_existent_temp_file_name();
my $csvtmp = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf
)->write();
# We switch column name to 'Y' to 100% guarantee no confusion with previous tests
$csv = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
fields_hr => { 'Y' => 'U' }
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT08 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2"
);
$csvtmp = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
out_encoding => 'latin2'
)->write();
is( $csvtmp->get_in_encoding(),
'UTF-8', "UT09 - t/e3.csv: verify encoding detection" );
# We switch column name to 'Y' to 100% guarantee no confusion with previous tests
$csv = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => 'latin2',
fields_hr => { 'Y' => 'U' }
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT10 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2, output latin2"
);
unlink $tmpf;
}
# * ************ *
# * latin* files *
# * ************ *
{
note("");
note("[LA]tin* tests");
# R/O
my $csv = Text::AutoCSV->new(
in_file => "t/${ww}e4.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'latin1'
);
my $all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{bf}\x{bf}\x{ed}" } ],
"LA01 - read CSV latin1, explicit encoding"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e5.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'latin2'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"LA02 - read CSV latin2, explicit encoding"
);
# R/W
my $tmpf = &get_non_existent_temp_file_name();
my $csvtmp = Text::AutoCSV->new(
in_file => "t/${ww}e5.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'latin2',
out_file => $tmpf
)->write();
$csv = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => 'latin2',
fields_hr => { 'Y' => 'U' }
);
$all = [ $csv->get_hr_all() ];
is_deeply(
croak_if_error => 0,
sep_char => ","
);
is( $csv2->get_in_encoding(),
'latin1', "EN12 - t/e4.csv: detect UTF-8 by default after rewrite" );
my $c1 = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
encoding => "UTF-8, latin1"
);
is( $c1->get_in_encoding(),
'UTF-8', "EN13 - t/e3.csv: detect UTF-8 with opts" );
$all = [ $c1->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN14 - t/e3.csv: detect UTF-8 with opts (2)"
);
my $c2 = Text::AutoCSV->new(
in_file => "t/${ww}e4.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
encoding => "UTF-8, latin1"
);
is( $c2->get_in_encoding(),
'latin1', "EN15 - t/e4.csv: detect latin1 with opts" );
$all = [ $c2->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N¿¿í" } ],
"EN16 - t/e4.csv: detect latin1 with opts (2)"
);
my $c3 = Text::AutoCSV->new(
in_file => "t/${ww}e6.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
encoding => "UTF-16LE, UTF-8, latin1"
);
is( $c3->get_in_encoding(),
'UTF-16LE', "EN17 - t/e6.csv: detect UTF-16LE with opts" );
$all = [ $c3->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN18 - t/e6.csv: detect UTF-16LE with opts (2)"
);
$c3 = Text::AutoCSV->new(
in_file => "t/${ww}e7.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
encoding => "UTF-16LE, UTF-8, latin1"
);
is( $c3->get_in_encoding(),
'UTF-16LE', "EN19 - t/e7.csv: detect UTF-16LE with opts (BOM)" );
$all = [ $c3->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN20 - t/e6.csv: detect UTF-16LE with opts (2) (BOM)"
);
$c1->write();
my $c1r = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => "UTF-8, latin1"
);
is( $c1r->get_in_encoding(),
'UTF-8', "EN21 - t/e3.csv: detect UTF-8 with opts, rewritten" );
$all = [ $c1r->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN22 - t/e3.csv: detect UTF-8 with opts, rewritten (2)"
);
$c2->write();
my $c2r = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => "UTF-8, latin1"
);
is( $c2r->get_in_encoding(),
'latin1', "EN23 - t/e4.csv: detect latin1 with opts, rewritten" );
$all = [ $c2r->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N¿¿í" } ],
"EN24 - t/e4.csv: detect latin1 with opts, rewritten (2)"
);
$c3->write();
my $c3r = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => "UTF-16LE, UTF-8, latin1"
);
#
# FIXME FIXME FIXME
#
SKIP: {
if ($OS_IS_PLAIN_WINDOWS) {
skip( "OS is plain Windows: skipping tests EN25 and EN26", 2 );
}
is( $c3r->get_in_encoding(),
'UTF-16LE',
"EN25 - t/e7.csv: detect UTF-16LE with opts, rewritten" );
$all = [ $c3r->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN26 - t/e6.csv: detect UTF-16LE with opts, rewritten (2)"
);
}
( run in 0.598 second using v1.01-cache-2.11-cpan-39bf76dae61 )