XML-Parser
view release on metacpan or search on metacpan
Releases on tag push, enabling downstream notification via
GitHub's release watch
- PR #209 Update AUTHORS POD in Parser.pm and Expat.pm to reflect
full maintainer history
- PR #210 Add CI badge to POD via =for markdown directive so it
survives README.md regeneration
- Rename README to README.md and regenerate from POD
Maintenance:
- PR #208 Modernize 10 legacy test files from print-ok style to
Test::More (cdata, finish, deep_nesting, xml_escape, partial,
char_end_doc, current_length, combine_chars, utf8_stream, defaulted)
2.53 2026-03-25 (by Todd Rinaldo)
Fixes:
- PR #202 GH #201 Fix detection of bare glob filehandles (*FH) in
Expat::parse; previously only glob references were recognized
Maintenance:
- PR #198 Modernize encoding.t from print-ok style to Test::More
- Ken Beesley <ken.beesley@xrce.xerox.com> discovered that
declarations in the external subset are not sent to registered
handlers when there is no internal subset.
- Fixed parse_dtd to work when entity values or attribute defaults
are so large that they might be broken across multiple calls to
the default handler.
- For lwp_ext_ent_handler, use URI::URL instead of URI so that old
5.004 installations will work with it.
2.25 Fri Jul 23 06:23:43 EDT 1999
- Now using Version 1990709 of expat. No local patches.
- Numerous people reported a SEGV problem when running t/cdata
on various platforms and versions of perl. The problem was
introduced with the setHandlers change. In some cases an
un-initialized value was being returned.
- Added an additional external entity handler, lwp_ext_ent_handler,
that deals with general URIs. It is installed instead of the
"file only" handler if the LWP package is installed.
2.24 Thu Jul 8 23:05:50 EDT 1999
- KangChan Lee <dolphin@comeng.chungnam.ac.kr> supplied the
EUC-KR encoding map.
- Enno Derksen <enno@att.com> forwarded reports by Jon Eisenzopf
share/windows-1252.enc cp1252-WinLatin1 binary encoding map
share/windows-1255.enc hebrew binary encoding map
share/x-euc-jp-jisx0221.enc X-euc-jp-jisx0221 encoding map
share/x-euc-jp-unicode.enc X-euc-jp-unicode encoding map
share/x-sjis-cp932.enc x-sjis-cp932 encoding map
share/x-sjis-jdk117.enc x-sjis-jdk117 encoding map
share/x-sjis-jisx0221.enc x-sjis-jisx0221 encoding map
share/x-sjis-unicode.enc x-sjis-unicode encoding map
t/astress.t Test script
t/bare_glob_filehandle.t
t/cdata.t Test script
t/char_end_doc.t
t/checklib_findcc.t
t/checklib_tmpdir.t
t/combine_chars.t Test script
t/context_tracking.t
t/coverage_gaps.t
t/current_byte.t
t/current_length.t Test script
t/debug_multibyte.t
t/decl.t Test script
samples/REC-xml-19980210.xml view on Meta::CPAN
The target names "<code>XML</code>", "<code>xml</code>", and so on are
reserved for standardization in this or future versions of this
specification.
The
XML <termref def='dt-notation'>Notation</termref> mechanism
may be used for
formal declaration of PI targets.
</p>
</div2>
<div2 id='sec-cdata-sect'>
<head>CDATA Sections</head>
<p><termdef id="dt-cdsection" term="CDATA Section"><term>CDATA sections</term>
may occur
anywhere character data may occur; they are
used to escape blocks of text containing characters which would
otherwise be recognized as markup. CDATA sections begin with the
string "<code><![CDATA[</code>" and end with the string
"<code>]]></code>":
<scrap lang="ebnf">
samples/xmlfilter view on Meta::CPAN
my %keep_el;
my @keep_elpat;
my %drop_att;
my %keep_att;
my $always_true = sub { 1; };
my $root_element = '';
my $in_cdata = 0;
# Process options
while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) {
my $opt = shift;
if ( $opt eq '-root' ) {
$pass = 0;
}
elsif ( $opt eq '+root' ) {
samples/xmlfilter view on Meta::CPAN
ErrorContext => 2,
Handlers => {
Start => \&start_handler,
End => \&end_handler
}
);
if ($pass) {
$p->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
$p->parsefile($doc);
print "</$root_element>\n"
unless $pass;
################
## End of main
samples/xmlfilter view on Meta::CPAN
$sub = $keep_sub;
}
if ( defined( $elref->{$el} )
or &$sub($el)
or check_atts( $attref, @_ ) ) {
$pass = !$pass;
if ($pass) {
$xp->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
else {
$xp->setHandlers(
Char => 0,
CdataStart => 0,
CdataEnd => 0
);
}
push( @togglestack, $xp->depth );
samples/xmlfilter view on Meta::CPAN
if ($pass) {
print "</$el>";
}
if ( @togglestack and $togglestack[-1] == $xp->depth ) {
$pass = !$pass;
if ($pass) {
$xp->setHandlers(
Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end
);
}
else {
$xp->setHandlers(
Char => 0,
CdataStart => 0,
CdataEnd => 0
);
}
samples/xmlfilter view on Meta::CPAN
}
} # End end_handler
sub char_handler {
my ( $xp, $text ) = @_;
if ( length($text) ) {
$text = $xp->xml_escape( $text, '>' )
unless $in_cdata;
print $text;
}
} # End char_handler
sub cdata_start {
my $xp = shift;
print '<![CDATA[';
$in_cdata = 1;
}
sub cdata_end {
my $xp = shift;
print ']]>';
$in_cdata = 0;
}
sub check_atts {
return $attcheck unless $attcheck;
my $ref = shift;
while (@_) {
my $id = shift;
my $val = shift;
use strict;
use warnings;
use Test::More tests => 2;
use XML::Parser;
# Test 1: module loads
ok( 1, 'XML::Parser loaded' );
# Test 2: CDATA section content is correctly captured
my $cdata_part = "<<< & > '' << &&&>&&&&;<";
my $doc = "<foo> hello <![CDATA[$cdata_part]]> there</foo>";
my $acc = '';
my $parser = XML::Parser->new(
ErrorContext => 2,
Handlers => {
CdataStart => sub { $_[0]->setHandlers( Char => sub { $acc .= $_[1] } ) },
CdataEnd => sub { $_[0]->setHandlers( Char => 0 ) },
}
);
$parser->parse($doc);
is( $acc, $cdata_part, 'CDATA section content captured correctly' );
t/expat_xs_coverage.t view on Meta::CPAN
$p->parse($xml);
# 'text1' and 'text2' should be suppressed by skip_until,
# 'text3' should appear after resume
my $text = join('', @chars);
like($text, qr/text3/, 'skip_until + Char: text after resume point is delivered');
unlike($text, qr/text2/, 'skip_until + Char: text during skip is suppressed');
}
# ===== skip_until with CdataSection handlers (suspend L1253 / resume L1291) =====
{
my @cdata_starts;
my $xml = '<r><a/><b><![CDATA[skipped]]></b><c><![CDATA[seen]]></c></r>';
my $p = XML::Parser->new(
Handlers => {
Start => sub {
my ($xp, $el) = @_;
if ($el eq 'a') {
$xp->skip_until(4); # Skip past 'b'
}
},
CdataStart => sub { push @cdata_starts, 1 },
Char => sub { }, # suppress output
},
);
$p->parse($xml);
# The CDATA in <b> should be skipped, the one in <c> should fire
is(scalar @cdata_starts, 1, 'skip_until + CdataStart: only post-skip CDATA fires');
}
# ===== skip_until with Unparsed and Notation handlers (suspend L1259,1264 / resume L1295,1299) =====
# DTD events fire before document body, so skip_until from Init skips them.
{
my @notation_names;
my @unparsed_names;
my $xml = <<'XML';
<!DOCTYPE doc [
<!NOTATION gif SYSTEM "image/gif">
$called{Proc_target} = $target;
$called{Proc_data} = $data;
}
sub h_comment {
my ($p, $str) = @_;
$called{Comment}++;
$called{Comment_data} = $str;
}
sub h_cdata_start {
my ($p) = @_;
$called{CdataStart}++;
}
sub h_cdata_end {
my ($p) = @_;
$called{CdataEnd}++;
}
sub h_default {
my ($p, $str) = @_;
$called{Default}++;
}
# --- Test 1: Basic handlers (Char, Start, End, Proc, Comment, CdataStart, CdataEnd, Default) ---
my $doc1 = <<'XML';
<?xml version="1.0"?>
<root id="test1">
<?mytarget mydata?>
<!-- a comment -->
<child>Hello world</child>
<![CDATA[cdata content]]>
</root>
XML
%called = ();
my $p1 = XML::Parser->new(
Handlers => {
Start => \&h_start,
End => \&h_end,
Char => \&h_char,
Proc => \&h_proc,
Comment => \&h_comment,
CdataStart => \&h_cdata_start,
CdataEnd => \&h_cdata_end,
}
);
$p1->parse($doc1);
ok($called{Start} && $called{Start} >= 2, 'Start handler called for elements');
is($called{Start_att}, 'test1', 'Start handler receives attributes');
ok($called{End} && $called{End} >= 2, 'End handler called');
is($called{End_el}, 'root', 'End handler receives element name');
ok($called{Char}, 'Char handler called');
like($called{Char_data}, qr/Hello world/, 'Char handler receives text content');
like($called{Char_data}, qr/cdata content/, 'Char handler receives CDATA text');
is($called{Proc}, 1, 'Proc handler called once');
is($called{Proc_target}, 'mytarget', 'Proc handler receives target');
like($called{Proc_data}, qr/mydata/, 'Proc handler receives data');
is($called{Comment}, 1, 'Comment handler called once');
like($called{Comment_data}, qr/a comment/, 'Comment handler receives comment text');
is($called{CdataStart}, 1, 'CdataStart handler called');
is($called{CdataEnd}, 1, 'CdataEnd handler called');
# --- Test 2: Default handler ---
t/utf8_handling.t view on Meta::CPAN
$p->parse($xml_pi);
ok( utf8::is_utf8($pi_data),
'Proc handler: PI data has UTF-8 flag' );
is( $pi_data, "caf\x{e9}",
'Proc handler: PI data matches expected' );
}
# ===== CDATA section: UTF-8 preserved =====
{
my $xml_cdata = qq(<?xml version="1.0" encoding="UTF-8"?>\n)
. qq(<doc><![CDATA[caf\xc3\xa9]]></doc>);
utf8::downgrade($xml_cdata);
my $cdata_text = '';
my $p = XML::Parser->new(
Handlers => { Char => sub { $cdata_text .= $_[1] } },
);
$p->parse($xml_cdata);
ok( utf8::is_utf8($cdata_text),
'CDATA: text has UTF-8 flag' );
is( $cdata_text, "caf\x{e9}",
'CDATA: text matches expected' );
}
done_testing();
( run in 2.687 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )