BOM results from the CPAN

Text-AutoCSV

view release on metacpan or search on metacpan

use File::Temp qw(tmpnam);

if ($DEVTIME) {
    note("");
    note("***");
    note("***");
    note("***  !! WARNING !!");
    note("***");
    note("***  SET \$DEVTIME TO 0 BEFORE RELEASING THIS CODE TO PRODUCTION");
    note("***  RIGHT NOW, \$DEVTIME IS EQUAL TO $DEVTIME");
    note("***");
    note("***");
    note("");
}

can_ok( 'Text::AutoCSV', ('new') );

# * *********** *
# * UTF-8 files *
# * *********** *

{
    note("");
    note("[UT]F-8 tests");

    # R/O

    my $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e1.csv",
        croak_if_error => 0,
        sep_char       => ","
    );
    my $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "A\x{bf}\x{ed}" } ],
        "UT01 - t/e1.csv: read CSV UTF8 chars that are latin1"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e2.csv",
        croak_if_error => 0,
        sep_char       => ","
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "UT02 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e3.csv",
        croak_if_error => 0,
        sep_char       => ","
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "UT03 - t/e2.csv: read CSV UTF8 + BOM chars that are latin1+latin2"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e2.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'UTF-8'
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT04 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2, explicit encoding"
    );

    $csv = Text::AutoCSV->new(
        in_file             => "t/${ww}e3.csv",
        croak_if_error      => 0,
        sep_char            => ",",
        encoding            => 'UTF-8',
        via                 => '',
        has_headers         => 0,
        fields_column_names => ['Z']
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,

        # BOM appears here as explicit via discards the use of
        #   :via(File::BOM)
        [
            { 'Z' => "\x{feff}u" },
            { 'Z' => "\x{e9}" },
            { 'Z' => "N\x{11b}\x{10d}\x{ed}" }
        ],
"UT05 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e2.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'UTF-8'
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT06 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2, explicit encoding option"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e3.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'UTF-8',
        via            => ':via(File::BOM)'
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,

        # This time BOM is specified in the encoding parameter => no mess
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT07 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding with opts"
    );

    # R/W

    my $tmpf   = &get_non_existent_temp_file_name();
    my $csvtmp = Text::AutoCSV->new(
        in_file        => "t/${ww}e3.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf
    )->write();

# We switch column name to 'Y' to 100% guarantee no confusion with previous tests
    $csv = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        fields_hr      => { 'Y' => 'U' }
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
        "UT08 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2"
    );

    $csvtmp = Text::AutoCSV->new(
        in_file        => "t/${ww}e3.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf,
        out_encoding   => 'latin2'
    )->write();
    is( $csvtmp->get_in_encoding(),
        'UTF-8', "UT09 - t/e3.csv: verify encoding detection" );

# We switch column name to 'Y' to 100% guarantee no confusion with previous tests
    $csv = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'latin2',
        fields_hr      => { 'Y' => 'U' }
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT10 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2, output latin2"
    );

    unlink $tmpf;
}

# * ************ *
# * latin* files *
# * ************ *

{
    note("");
    note("[LA]tin* tests");

    # R/O

    my $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e4.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'latin1'
    );
    my $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{bf}\x{bf}\x{ed}" } ],
        "LA01 - read CSV latin1, explicit encoding"
    );

    $csv = Text::AutoCSV->new(
        in_file        => "t/${ww}e5.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'latin2'
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "LA02 - read CSV latin2, explicit encoding"
    );

    # R/W

    my $tmpf   = &get_non_existent_temp_file_name();
    my $csvtmp = Text::AutoCSV->new(
        in_file        => "t/${ww}e5.csv",
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'latin2',
        out_file       => $tmpf
    )->write();
    $csv = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => 'latin2',
        fields_hr      => { 'Y' => 'U' }
    );
    $all = [ $csv->get_hr_all() ];
    is_deeply(

t/03-enc.t view on Meta::CPAN

        croak_if_error => 0,
        sep_char       => ","
    );
    is( $csv2->get_in_encoding(),
        'latin1', "EN12 - t/e4.csv: detect UTF-8 by default after rewrite" );

    my $c1 = Text::AutoCSV->new(
        in_file        => "t/${ww}e3.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf,
        encoding       => "UTF-8, latin1"
    );
    is( $c1->get_in_encoding(),
        'UTF-8', "EN13 - t/e3.csv: detect UTF-8 with opts" );
    $all = [ $c1->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "EN14 - t/e3.csv: detect UTF-8 with opts (2)"
    );
    my $c2 = Text::AutoCSV->new(
        in_file        => "t/${ww}e4.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf,
        encoding       => "UTF-8, latin1"
    );
    is( $c2->get_in_encoding(),
        'latin1', "EN15 - t/e4.csv: detect latin1 with opts" );
    $all = [ $c2->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N¿¿í" } ],
        "EN16 - t/e4.csv: detect latin1 with opts (2)"
    );
    my $c3 = Text::AutoCSV->new(
        in_file        => "t/${ww}e6.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf,
        encoding       => "UTF-16LE, UTF-8, latin1"
    );
    is( $c3->get_in_encoding(),
        'UTF-16LE', "EN17 - t/e6.csv: detect UTF-16LE with opts" );
    $all = [ $c3->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "EN18 - t/e6.csv: detect UTF-16LE with opts (2)"
    );

    $c3 = Text::AutoCSV->new(
        in_file        => "t/${ww}e7.csv",
        croak_if_error => 0,
        sep_char       => ",",
        out_file       => $tmpf,
        encoding       => "UTF-16LE, UTF-8, latin1"
    );
    is( $c3->get_in_encoding(),
        'UTF-16LE', "EN19 - t/e7.csv: detect UTF-16LE with opts (BOM)" );
    $all = [ $c3->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "EN20 - t/e6.csv: detect UTF-16LE with opts (2) (BOM)"
    );

    $c1->write();
    my $c1r = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => "UTF-8, latin1"
    );
    is( $c1r->get_in_encoding(),
        'UTF-8', "EN21 - t/e3.csv: detect UTF-8 with opts, rewritten" );
    $all = [ $c1r->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
        "EN22 - t/e3.csv: detect UTF-8 with opts, rewritten (2)"
    );
    $c2->write();
    my $c2r = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => "UTF-8, latin1"
    );
    is( $c2r->get_in_encoding(),
        'latin1', "EN23 - t/e4.csv: detect latin1 with opts, rewritten" );
    $all = [ $c2r->get_hr_all() ];
    is_deeply(
        $all,
        [ { 'U' => "\x{e9}" }, { 'U' => "N¿¿í" } ],
        "EN24 - t/e4.csv: detect latin1 with opts, rewritten (2)"
    );
    $c3->write();
    my $c3r = Text::AutoCSV->new(
        in_file        => $tmpf,
        croak_if_error => 0,
        sep_char       => ",",
        encoding       => "UTF-16LE, UTF-8, latin1"
    );

    #
    # FIXME FIXME FIXME
    #
  SKIP: {

        if ($OS_IS_PLAIN_WINDOWS) {
            skip( "OS is plain Windows: skipping tests EN25 and EN26", 2 );
        }

        is( $c3r->get_in_encoding(),
            'UTF-16LE',
            "EN25 - t/e7.csv: detect UTF-16LE with opts, rewritten" );
        $all = [ $c3r->get_hr_all() ];
        is_deeply(
            $all,
            [ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
            "EN26 - t/e6.csv: detect UTF-16LE with opts, rewritten (2)"
        );
    }

( run in 0.598 second using v1.01-cache-2.11-cpan-39bf76dae61 )