XML-Parser

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

      Releases on tag push, enabling downstream notification via
      GitHub's release watch
    - PR #209 Update AUTHORS POD in Parser.pm and Expat.pm to reflect
      full maintainer history
    - PR #210 Add CI badge to POD via =for markdown directive so it
      survives README.md regeneration
    - Rename README to README.md and regenerate from POD

  Maintenance:
    - PR #208 Modernize 10 legacy test files from print-ok style to
      Test::More (cdata, finish, deep_nesting, xml_escape, partial,
      char_end_doc, current_length, combine_chars, utf8_stream, defaulted)

2.53 2026-03-25 (by Todd Rinaldo)

  Fixes:
    - PR #202 GH #201 Fix detection of bare glob filehandles (*FH) in
      Expat::parse; previously only glob references were recognized

  Maintenance:
    - PR #198 Modernize encoding.t from print-ok style to Test::More

Changes  view on Meta::CPAN

	- Ken Beesley <ken.beesley@xrce.xerox.com> discovered that
	  declarations in the external subset are not sent to registered
	  handlers when there is no internal subset.
	- Fixed parse_dtd to work when entity values or attribute defaults
	  are so large that they might be broken across multiple calls to
	  the default handler.
	- For lwp_ext_ent_handler, use URI::URL instead of URI so that old
	  5.004 installations will work with it.
2.25 Fri Jul 23 06:23:43 EDT 1999
	- Now using Version 1990709 of expat. No local patches.
	- Numerous people reported a SEGV problem when running t/cdata
	  on various platforms and versions of perl. The problem was
	  introduced with the setHandlers change. In some cases an
	  un-initialized value was being returned.
	- Added an additional external entity handler, lwp_ext_ent_handler,
	  that deals with general URIs. It is installed instead of the
	  "file only" handler if the LWP package is installed.
2.24  Thu Jul  8 23:05:50 EDT 1999
	- KangChan Lee <dolphin@comeng.chungnam.ac.kr> supplied the
	  EUC-KR encoding map.
	- Enno Derksen <enno@att.com> forwarded reports by Jon Eisenzopf

MANIFEST  view on Meta::CPAN

share/windows-1252.enc		cp1252-WinLatin1 binary encoding map
share/windows-1255.enc		hebrew binary encoding map
share/x-euc-jp-jisx0221.enc	X-euc-jp-jisx0221 encoding map
share/x-euc-jp-unicode.enc	X-euc-jp-unicode encoding map
share/x-sjis-cp932.enc		x-sjis-cp932 encoding map
share/x-sjis-jdk117.enc		x-sjis-jdk117 encoding map
share/x-sjis-jisx0221.enc	x-sjis-jisx0221 encoding map
share/x-sjis-unicode.enc	x-sjis-unicode encoding map
t/astress.t			Test script
t/bare_glob_filehandle.t
t/cdata.t			Test script
t/char_end_doc.t
t/checklib_findcc.t
t/checklib_tmpdir.t
t/combine_chars.t		Test script
t/context_tracking.t
t/coverage_gaps.t
t/current_byte.t
t/current_length.t		Test script
t/debug_multibyte.t
t/decl.t			Test script

samples/REC-xml-19980210.xml  view on Meta::CPAN

The target names "<code>XML</code>", "<code>xml</code>", and so on are
reserved for standardization in this or future versions of this
specification.
The 
XML <termref def='dt-notation'>Notation</termref> mechanism
may be used for
formal declaration of PI targets.
</p>
</div2>
 
<div2 id='sec-cdata-sect'>
<head>CDATA Sections</head>
 
<p><termdef id="dt-cdsection" term="CDATA Section"><term>CDATA sections</term>
may occur 
anywhere character data may occur; they are
used to escape blocks of text containing characters which would
otherwise be recognized as markup.  CDATA sections begin with the
string "<code>&lt;![CDATA[</code>" and end with the string
"<code>]]&gt;</code>":
<scrap lang="ebnf">

samples/xmlfilter  view on Meta::CPAN


my %keep_el;
my @keep_elpat;

my %drop_att;
my %keep_att;

my $always_true = sub { 1; };
my $root_element = '';

my $in_cdata = 0;

# Process options

while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) {
    my $opt = shift;

    if ( $opt eq '-root' ) {
        $pass = 0;
    }
    elsif ( $opt eq '+root' ) {

samples/xmlfilter  view on Meta::CPAN

    ErrorContext => 2,
    Handlers     => {
        Start => \&start_handler,
        End   => \&end_handler
    }
);

if ($pass) {
    $p->setHandlers(
        Char       => \&char_handler,
        CdataStart => \&cdata_start,
        CdataEnd   => \&cdata_end
    );
}

$p->parsefile($doc);

print "</$root_element>\n"
  unless $pass;

################
## End of main

samples/xmlfilter  view on Meta::CPAN

        $sub    = $keep_sub;
    }

    if (   defined( $elref->{$el} )
        or &$sub($el)
        or check_atts( $attref, @_ ) ) {
        $pass = !$pass;
        if ($pass) {
            $xp->setHandlers(
                Char       => \&char_handler,
                CdataStart => \&cdata_start,
                CdataEnd   => \&cdata_end
            );
        }
        else {
            $xp->setHandlers(
                Char       => 0,
                CdataStart => 0,
                CdataEnd   => 0
            );
        }
        push( @togglestack, $xp->depth );

samples/xmlfilter  view on Meta::CPAN


    if ($pass) {
        print "</$el>";
    }

    if ( @togglestack and $togglestack[-1] == $xp->depth ) {
        $pass = !$pass;
        if ($pass) {
            $xp->setHandlers(
                Char       => \&char_handler,
                CdataStart => \&cdata_start,
                CdataEnd   => \&cdata_end
            );
        }
        else {
            $xp->setHandlers(
                Char       => 0,
                CdataStart => 0,
                CdataEnd   => 0
            );
        }

samples/xmlfilter  view on Meta::CPAN

    }

}    # End end_handler

sub char_handler {
    my ( $xp, $text ) = @_;

    if ( length($text) ) {

        $text = $xp->xml_escape( $text, '>' )
          unless $in_cdata;

        print $text;
    }
}    # End char_handler

sub cdata_start {
    my $xp = shift;

    print '<![CDATA[';
    $in_cdata = 1;
}

sub cdata_end {
    my $xp = shift;

    print ']]>';
    $in_cdata = 0;
}

sub check_atts {
    return $attcheck unless $attcheck;

    my $ref = shift;

    while (@_) {
        my $id  = shift;
        my $val = shift;

t/cdata.t  view on Meta::CPAN

use strict;
use warnings;
use Test::More tests => 2;
use XML::Parser;

# Test 1: module loads
ok( 1, 'XML::Parser loaded' );

# Test 2: CDATA section content is correctly captured
my $cdata_part = "<<< & > '' << &&&>&&&&;<";
my $doc        = "<foo> hello <![CDATA[$cdata_part]]> there</foo>";
my $acc        = '';

my $parser = XML::Parser->new(
    ErrorContext => 2,
    Handlers     => {
        CdataStart => sub { $_[0]->setHandlers( Char => sub { $acc .= $_[1] } ) },
        CdataEnd   => sub { $_[0]->setHandlers( Char => 0 ) },
    }
);

$parser->parse($doc);

is( $acc, $cdata_part, 'CDATA section content captured correctly' );

t/expat_xs_coverage.t  view on Meta::CPAN

    $p->parse($xml);
    # 'text1' and 'text2' should be suppressed by skip_until,
    # 'text3' should appear after resume
    my $text = join('', @chars);
    like($text, qr/text3/, 'skip_until + Char: text after resume point is delivered');
    unlike($text, qr/text2/, 'skip_until + Char: text during skip is suppressed');
}

# ===== skip_until with CdataSection handlers (suspend L1253 / resume L1291) =====
{
    my @cdata_starts;
    my $xml = '<r><a/><b><![CDATA[skipped]]></b><c><![CDATA[seen]]></c></r>';

    my $p = XML::Parser->new(
        Handlers => {
            Start => sub {
                my ($xp, $el) = @_;
                if ($el eq 'a') {
                    $xp->skip_until(4);  # Skip past 'b'
                }
            },
            CdataStart => sub { push @cdata_starts, 1 },
            Char       => sub { },  # suppress output
        },
    );
    $p->parse($xml);
    # The CDATA in <b> should be skipped, the one in <c> should fire
    is(scalar @cdata_starts, 1, 'skip_until + CdataStart: only post-skip CDATA fires');
}

# ===== skip_until with Unparsed and Notation handlers (suspend L1259,1264 / resume L1295,1299) =====
# DTD events fire before document body, so skip_until from Init skips them.
{
    my @notation_names;
    my @unparsed_names;
    my $xml = <<'XML';
<!DOCTYPE doc [
<!NOTATION gif SYSTEM "image/gif">

t/g_void.t  view on Meta::CPAN

    $called{Proc_target} = $target;
    $called{Proc_data} = $data;
}

sub h_comment {
    my ($p, $str) = @_;
    $called{Comment}++;
    $called{Comment_data} = $str;
}

sub h_cdata_start {
    my ($p) = @_;
    $called{CdataStart}++;
}

sub h_cdata_end {
    my ($p) = @_;
    $called{CdataEnd}++;
}

sub h_default {
    my ($p, $str) = @_;
    $called{Default}++;
}

# --- Test 1: Basic handlers (Char, Start, End, Proc, Comment, CdataStart, CdataEnd, Default) ---

my $doc1 = <<'XML';
<?xml version="1.0"?>
<root id="test1">
  <?mytarget mydata?>
  <!-- a comment -->
  <child>Hello world</child>
  <![CDATA[cdata content]]>
</root>
XML

%called = ();
my $p1 = XML::Parser->new(
    Handlers => {
        Start      => \&h_start,
        End        => \&h_end,
        Char       => \&h_char,
        Proc       => \&h_proc,
        Comment    => \&h_comment,
        CdataStart => \&h_cdata_start,
        CdataEnd   => \&h_cdata_end,
    }
);
$p1->parse($doc1);

ok($called{Start} && $called{Start} >= 2, 'Start handler called for elements');
is($called{Start_att}, 'test1', 'Start handler receives attributes');
ok($called{End} && $called{End} >= 2, 'End handler called');
is($called{End_el}, 'root', 'End handler receives element name');
ok($called{Char}, 'Char handler called');
like($called{Char_data}, qr/Hello world/, 'Char handler receives text content');
like($called{Char_data}, qr/cdata content/, 'Char handler receives CDATA text');
is($called{Proc}, 1, 'Proc handler called once');
is($called{Proc_target}, 'mytarget', 'Proc handler receives target');
like($called{Proc_data}, qr/mydata/, 'Proc handler receives data');
is($called{Comment}, 1, 'Comment handler called once');
like($called{Comment_data}, qr/a comment/, 'Comment handler receives comment text');
is($called{CdataStart}, 1, 'CdataStart handler called');
is($called{CdataEnd}, 1, 'CdataEnd handler called');

# --- Test 2: Default handler ---

t/utf8_handling.t  view on Meta::CPAN

    $p->parse($xml_pi);

    ok( utf8::is_utf8($pi_data),
        'Proc handler: PI data has UTF-8 flag' );
    is( $pi_data, "caf\x{e9}",
        'Proc handler: PI data matches expected' );
}

# ===== CDATA section: UTF-8 preserved =====
{
    my $xml_cdata = qq(<?xml version="1.0" encoding="UTF-8"?>\n)
        . qq(<doc><![CDATA[caf\xc3\xa9]]></doc>);
    utf8::downgrade($xml_cdata);

    my $cdata_text = '';
    my $p = XML::Parser->new(
        Handlers => { Char => sub { $cdata_text .= $_[1] } },
    );
    $p->parse($xml_cdata);

    ok( utf8::is_utf8($cdata_text),
        'CDATA: text has UTF-8 flag' );
    is( $cdata_text, "caf\x{e9}",
        'CDATA: text matches expected' );
}

done_testing();



( run in 2.687 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )