App-MHFS
view release on metacpan or search on metacpan
t/01-util.t view on Meta::CPAN
}
foreach my $overlong (@overlong) {
push @tests, [$overlong, length($overlong), 1, ord(decode('utf8', $overlong, Encode::LEAVE_SRC))];
}
foreach my $toolong (@toolong) {
push @tests, [$toolong, 1, 1];
}
foreach my $truncated (@truncated) {
my @test = (@$truncated, 1);
push @tests, \@test;
}
foreach my $test (@tests) {
my ($bytes, $bytelength, $is_replacement, $codepoint) = @$test;
my $display = uc(unpack("H*", $bytes));
my $peeked = MHFS::Util::_peek_utf8_codepoint($bytes);
my $message = "_peek_utf8_codepoint $display ". ($is_replacement ? 'is' : 'isnt'). ' U+FFFD';
$is_replacement ? is($peeked->{codepoint}, 0xFFFD, $message) : isnt($peeked->{codepoint}, 0xFFFD, $message);
is($peeked->{bytelength}, $bytelength, "_peek_utf8_codepoint $display bytelength is as expected");
if (defined $codepoint) {
is($peeked->{codepoint}, $codepoint, "_peek_utf8_codepoint $display codepoint matches decode(utf8)");
}
}
}
{
my $result = MHFS::Util::surrogatepairtochar("\x{D83C}", "\x{DF84}");
is(ord($result), 0x1F384, "Converting surrogate pair for $result (U+1F384)");
}
{
my $result = MHFS::Util::surrogatepairtochar("\x{D800}", "\x{DC00}");
is(ord($result), 0x10000, 'First possible surrogate pair combination');
}
{
my $result = MHFS::Util::surrogatepairtochar("\x{DBFF}", "\x{DFFF}");
is(ord($result), 0x10FFFF, 'Last possible surrogate pair combination');
}
{
my $result = MHFS::Util::surrogatecodepointpairtochar(0xD83C, 0xDF84);
is(ord($result), 0x1F384, "Converting surrogate pair for $result (U+1F384)");
}
{
my $result = MHFS::Util::surrogatecodepointpairtochar(0xD800, 0xDC00);
is(ord($result), 0x10000, 'First possible surrogate pair combination');
}
{
my $result = MHFS::Util::surrogatecodepointpairtochar(0xDBFF, 0xDFFF);
is(ord($result), 0x10FFFF, 'Last possible surrogate pair combination');
}
{
my $result = get_printable_utf8('A'.chr(0xFF).'B');
is($result, 'A'.chr(0xFFFD).'B', 'Valid invalid valid');
}
{
my $result = get_printable_utf8("A\xED\xA0\xBC\xED\xBE\x84B");
is($result, 'A'.chr(0x1F384).'B', 'Valid low surrogate high surrogate valid');
}
{
# verify truncated sequences are replaced as expected
my @tests = (
["\xC2", chr(0xFFFD)],
["\xC2A", chr(0xFFFD).'A'],
["\xE0\xA0", chr(0xFFFD)],
["\xE0\xA0A", chr(0xFFFD).'A'],
["\xF0\x90\x80", chr(0xFFFD)],
["\xF0\x90\x80A", chr(0xFFFD).'A'],
);
foreach my $test (@tests) {
my $display = uc(unpack("H*", $test->[0]));
is(get_printable_utf8($test->[0]), $test->[1], "get_printable_utf8 truncated sequences decode as expected");
}
}
{
my $fname = 'test_read_text_file.txt';
if(open(my $fh, '>:raw', $fname)) {
print $fh 'A'.chr(0xFF).'B';
close($fh);
my $text = do {
local $SIG{__WARN__} = sub {};
read_text_file_lossy($fname)
};
is($text, 'A'.chr(0xFFFD).'B', 'read_text_file_lossy Valid invalid valid');
my $message = 'read_text_file throws on invalid file';
try {
read_text_file($fname);
fail($message);
} catch ($e) {
pass($message);
}
unlink($fname);
}
}
{
my $fname = 'test_write_text_file.txt';
my $message = 'write_text_file throws on invalid text';
my $input = "A\x{D800}B";
try {
{
local $SIG{__WARN__} = sub {};
write_text_file($fname, $input);
}
fail($message);
} catch ($e) {
pass($message);
}
unlink($fname);
try {
{
local $SIG{__WARN__} = sub {};
write_text_file_lossy($fname, $input);
}
my $text = read_text_file($fname);
is($text, 'A'.chr(0xFFFD).'B', 'write_text_file_lossy Valid invalid valid');
} catch ($e) {
fail("write_text_file_lossy does not crash");
}
unlink($fname);
( run in 0.815 second using v1.01-cache-2.11-cpan-e1769b4cff6 )