File-BOM
view release on metacpan or search on metacpan
t/01..bom.t view on Meta::CPAN
#!/usr/bin/perl
use strict;
use warnings;
use lib qw( t/lib );
use Test::More;
use Test::Framework;
use Encode qw( encode decode :fallback_all );
use Fcntl qw( :seek );
our @encodings;
BEGIN {
# encodings to use in unseekable test
@encodings = qw( UTF-8 UTF-16LE UTF-16BE UTF-32LE UTF-32BE );
plan tests => 11 + (@test_files * 14) + (@encodings * 4);
use_ok("File::BOM", ':all');
}
# Ignore known harmless warning
local $SIG{__WARN__} = sub {
my $warning = "@_";
if ($warning !~ /^UTF-(?:16|32)LE:Partial character/) {
warn $warning;
}
};
for my $file (@test_files) {
my $file_enc = $file2enc{$file};
is(open_bom(FH, $file2path{$file}), $file2enc{$file}, "$file: open_bom returned encoding");
my $expect = $filecontent{$file};
my $line = <FH>;
chomp $line;
is($line, $expect, "$file: test content returned OK");
close FH;
{
# test defuse
open BOMB, '<', $file2path{$file}
or die "Couldn't read '$file2path{$file}': $!";
my $enc = defuse BOMB;
is($enc, $file_enc, "$file: defuse returns correct encoding ($enc)");
$line = <BOMB>;
chomp $line;
is($line, $expect, "$file: defused version content OK");
close BOMB;
}
open FH, '<', $file2path{$file};
my $first_line;
{
local $/ = $fileeol{$file};
$first_line = <FH>;
chomp $first_line;
}
seek(FH, 0, SEEK_SET);
is(get_encoding_from_filehandle(FH), $file_enc, "$file: get_encoding_from_filehandle returned correct encoding");
my($enc, $offset) = get_encoding_from_bom($first_line);
is($enc, $file_enc, "$file: get_encoding_from_bom also worked");
{
my $decoded = $enc ? decode($enc, substr($first_line, $offset))
: $first_line;
is($decoded, $expect, "$file: .. and offset worked with substr()");
}
#
# decode_from_bom()
#
my $result = decode_from_bom($first_line, 'UTF-8', FB_CROAK);
is($result, $expect, "$file: decode_from_bom() scalar context");
{
# with default
my $default = 'UTF-8';
my $expect_enc = $file_enc || $default;
my($decoded, $got_enc) = decode_from_bom($first_line, $default, FB_CROAK);
is($decoded, $expect, "$file: decode_from_bom() list context");
is($got_enc, $expect_enc, "$file: decode_from_bom() list context encoding");
}
{
# without default
my $expect_enc = $file_enc;
my($decoded, $got_enc) = decode_from_bom($first_line, undef, FB_CROAK);
is($decoded, $expect, "$file: decode_from_bom() list context, no default");
is($got_enc, $expect_enc, "$file: decode_from_bom() list context encoding, no default");
}
seek(FH, 0, SEEK_SET);
($enc, my $spill) = get_encoding_from_stream(FH);
$line = <FH>; chomp $line;
is($enc, $file_enc, "$file: get_encoding_from_stream()");
$line = $spill . $line;
$line = decode($enc, $line) if $enc;
is($line, $expect, "$file: read OK after get_encoding_from_stream");
close FH;
}
# Test unseekable
SKIP: {
my $tests = 4 * @encodings;
skip "mkfifo not supported on this platform", $tests
unless $fifo_supported;
skip "mkfifo tests skipped on cygwin, set TEST_FIFO to enable them", $tests
if $^O eq 'cygwin' && !$ENV{'TEST_FIFO'};
for my $encoding (@encodings) {
my($pid, $fifo, $enc, $spill, $result);
# We need two copies of this as the encode below is destructive!
my $expected = my $test = "Testing \x{2170}, \x{2171}, \x{2172}\n";
my $bytes = $enc2bom{$encoding}
. encode($encoding, $test, FB_CROAK);
($pid, $fifo) = write_fifo($bytes);
($enc, $spill) = open_bom(my $fh, $fifo);
$result = $spill . <$fh>;
close $fh;
waitpid($pid, 0);
unlink $fifo;
is($enc, $encoding, "Read BOM correctly in unseekable $encoding file");
is($result, $expected, "Read $encoding data from unseekable source");
# Now test defuse too
($pid, $fifo) = write_fifo($bytes);
open($fh, '<:utf8', $fifo) or die "Couldn't read '$fifo': $!";
($enc, $spill) = defuse $fh;
$result = $spill . <$fh>;
close $fh;
waitpid($pid, 0);
unlink $fifo;
is($enc, $encoding, "defused fifo OK ($encoding)");
is($result, $expected, "read defused fifo OK ($encoding)")
or diag(
"Hex dump:\n".
"Got: ". hexdump($result) ."\n".
"Expected: ". hexdump($expected) ."\n".
"Spillage: ". hexdump($spill)
);
}
}
# Test broken BOM
{
my $broken_content = "\xff\xffThis file has a broken BOM";
my $broken_file = 't/data/broken_bom.txt';
my($enc, $spill) = open_bom(my $fh, $broken_file);
is($enc, '', "open_bom on file with broken BOM has no encoding");
{
my $line = <$fh>;
chomp $line;
is($line, $broken_content, "handle with broken BOM returns as expected");
}
SKIP: {
skip "mkfifo not supported on this platform", 3
unless $fifo_supported;
skip "mkfifo tests skipped on cygwin, set TEST_FIFO to enable them", 3
if $^O eq 'cygwin' && !$ENV{'TEST_FIFO'};
my($pid, $fifo) = write_fifo($broken_content);
open my $fh, '<', $fifo or die "Cannot read fifo '$fifo': $!";
my($enc, $spill) = get_encoding_from_filehandle($fh);
is($enc, '', "get_encoding_from_filehandle() on unseekable file broken bom");
ok($spill, ".. spillage was produced");
is($spill . <$fh>, $broken_content, "spillage + content as expected");
close $fh;
waitpid($pid, 0);
unlink $fifo;
}
}
# Test internals
is(File::BOM::_get_char_length('UTF-8', 0xe5), 3, '_get_char_length() on UTF-8 start byte (3)');
is(File::BOM::_get_char_length('UTF-8', 0xd5), 2, '_get_char_length() on UTF-8 start byte (2)');
is(File::BOM::_get_char_length('UTF-8', 0x7f), 1, '_get_char_langth() on UTF-8 single byte char');
is(File::BOM::_get_char_length('', ''), undef, '_get_char_length() on undef');
is(File::BOM::_get_char_length('UTF-32BE', ''), 4, '_get_char_length() on UTF-32');
__END__
vim: ft=perl
( run in 0.618 second using v1.01-cache-2.11-cpan-39bf76dae61 )