XML-Parser

 view release on metacpan or  search on metacpan

t/utf8_handling.t  view on Meta::CPAN

    ok( utf8::is_utf8($got),
        'CJK text: has UTF-8 flag' );
    is( length($got), 2,
        'CJK text: length is 2 characters' );
    is( $got, "\x{4e16}\x{754c}",
        'CJK text: matches expected Unicode string' );
}

# ===== Default handler: UTF-8 flag preserved =====
{
    my $default_text = '';
    my $p = XML::Parser->new(
        Handlers => { Default => sub { $default_text .= $_[1] } },
    );
    $p->parse($xml);

    ok( utf8::is_utf8($default_text),
        'Default handler: text has UTF-8 flag' );
    like( $default_text, qr/\x{e9}l\x{e8}ve/,
        'Default handler: contains expected UTF-8 text' );
}

# ===== Comment handler: UTF-8 flag preserved =====
{
    my $xml_comment = qq(<?xml version="1.0" encoding="UTF-8"?>\n)
        . qq(<doc><!-- caf\xc3\xa9 --></doc>);
    utf8::downgrade($xml_comment);

    my $comment_text;
    my $p = XML::Parser->new(
        Handlers => { Comment => sub { $comment_text = $_[1] } },
    );
    $p->parse($xml_comment);

    ok( utf8::is_utf8($comment_text),
        'Comment handler: text has UTF-8 flag' );
    like( $comment_text, qr/caf\x{e9}/,
        'Comment handler: contains expected UTF-8 text' );
}

# ===== Processing instruction handler: UTF-8 flag preserved =====
{
    my $xml_pi = qq(<?xml version="1.0" encoding="UTF-8"?>\n)
        . qq(<doc><?mytarget caf\xc3\xa9?></doc>);
    utf8::downgrade($xml_pi);

    my $pi_data;
    my $p = XML::Parser->new(
        Handlers => { Proc => sub { $pi_data = $_[2] } },
    );
    $p->parse($xml_pi);

    ok( utf8::is_utf8($pi_data),
        'Proc handler: PI data has UTF-8 flag' );
    is( $pi_data, "caf\x{e9}",
        'Proc handler: PI data matches expected' );
}

# ===== CDATA section: UTF-8 preserved =====
{
    my $xml_cdata = qq(<?xml version="1.0" encoding="UTF-8"?>\n)
        . qq(<doc><![CDATA[caf\xc3\xa9]]></doc>);
    utf8::downgrade($xml_cdata);

    my $cdata_text = '';
    my $p = XML::Parser->new(
        Handlers => { Char => sub { $cdata_text .= $_[1] } },
    );
    $p->parse($xml_cdata);

    ok( utf8::is_utf8($cdata_text),
        'CDATA: text has UTF-8 flag' );
    is( $cdata_text, "caf\x{e9}",
        'CDATA: text matches expected' );
}

done_testing();



( run in 0.575 second using v1.01-cache-2.11-cpan-e1769b4cff6 )