App-MHFS

 view release on metacpan or  search on metacpan

t/01-util.t  view on Meta::CPAN

    }
    foreach my $overlong (@overlong) {
        push @tests, [$overlong, length($overlong), 1, ord(decode('utf8', $overlong, Encode::LEAVE_SRC))];
    }
    foreach my $toolong (@toolong) {
        push @tests, [$toolong, 1, 1];
    }
    foreach my $truncated (@truncated) {
        my @test = (@$truncated, 1);
        push @tests, \@test;
    }

    foreach my $test (@tests) {
        my ($bytes, $bytelength, $is_replacement, $codepoint) = @$test;
        my $display = uc(unpack("H*", $bytes));
        my $peeked = MHFS::Util::_peek_utf8_codepoint($bytes);
        my $message = "_peek_utf8_codepoint $display ". ($is_replacement ? 'is' : 'isnt'). ' U+FFFD';
        $is_replacement ? is($peeked->{codepoint}, 0xFFFD, $message) : isnt($peeked->{codepoint}, 0xFFFD, $message);
        is($peeked->{bytelength}, $bytelength, "_peek_utf8_codepoint $display bytelength is as expected");
        if (defined $codepoint) {
            is($peeked->{codepoint}, $codepoint, "_peek_utf8_codepoint $display codepoint matches decode(utf8)");
        }
    }
}

{
    my $result = MHFS::Util::surrogatepairtochar("\x{D83C}", "\x{DF84}");
    is(ord($result), 0x1F384, "Converting surrogate pair for $result (U+1F384)");
}
{
    my $result = MHFS::Util::surrogatepairtochar("\x{D800}", "\x{DC00}");
    is(ord($result), 0x10000, 'First possible surrogate pair combination');
}
{
    my $result = MHFS::Util::surrogatepairtochar("\x{DBFF}", "\x{DFFF}");
    is(ord($result), 0x10FFFF, 'Last possible surrogate pair combination');
}

{
    my $result = MHFS::Util::surrogatecodepointpairtochar(0xD83C, 0xDF84);
    is(ord($result), 0x1F384, "Converting surrogate pair for $result (U+1F384)");
}
{
    my $result = MHFS::Util::surrogatecodepointpairtochar(0xD800, 0xDC00);
    is(ord($result), 0x10000, 'First possible surrogate pair combination');
}
{
    my $result = MHFS::Util::surrogatecodepointpairtochar(0xDBFF, 0xDFFF);
    is(ord($result), 0x10FFFF, 'Last possible surrogate pair combination');
}

{
    my $result = get_printable_utf8('A'.chr(0xFF).'B');
    is($result, 'A'.chr(0xFFFD).'B', 'Valid invalid valid');
}
{
    my $result = get_printable_utf8("A\xED\xA0\xBC\xED\xBE\x84B");
    is($result, 'A'.chr(0x1F384).'B', 'Valid low surrogate high surrogate valid');
}
{
    # verify truncated sequences are replaced as expected
    my @tests = (
        ["\xC2", chr(0xFFFD)],
        ["\xC2A", chr(0xFFFD).'A'],
        ["\xE0\xA0", chr(0xFFFD)],
        ["\xE0\xA0A", chr(0xFFFD).'A'],
        ["\xF0\x90\x80", chr(0xFFFD)],
        ["\xF0\x90\x80A", chr(0xFFFD).'A'],
    );
    foreach my $test (@tests) {
        my $display = uc(unpack("H*", $test->[0]));
        is(get_printable_utf8($test->[0]), $test->[1], "get_printable_utf8 truncated sequences decode as expected");
    }
}

{
    my $fname = 'test_read_text_file.txt';
    if(open(my $fh, '>:raw', $fname)) {
        print $fh 'A'.chr(0xFF).'B';
        close($fh);
        my $text = do {
            local $SIG{__WARN__} = sub {};
            read_text_file_lossy($fname)
        };
        is($text,  'A'.chr(0xFFFD).'B', 'read_text_file_lossy Valid invalid valid');
        my $message = 'read_text_file throws on invalid file';
        try {
            read_text_file($fname);
            fail($message);
        } catch ($e) {
            pass($message);
        }
        unlink($fname);
    }
}

{
    my $fname = 'test_write_text_file.txt';
    my $message = 'write_text_file throws on invalid text';
    my $input = "A\x{D800}B";
    try {
        {
            local $SIG{__WARN__} = sub {};
            write_text_file($fname, $input);
        }
        fail($message);
    } catch ($e) {
        pass($message);
    }
    unlink($fname);
    try {
        {
            local $SIG{__WARN__} = sub {};
            write_text_file_lossy($fname, $input);
        }
        my $text = read_text_file($fname);
        is($text,  'A'.chr(0xFFFD).'B', 'write_text_file_lossy Valid invalid valid');
    } catch ($e) {
        fail("write_text_file_lossy does not crash");
    }
    unlink($fname);



( run in 0.815 second using v1.01-cache-2.11-cpan-e1769b4cff6 )