HTML-PullParser-Nested
view release on metacpan or search on metacpan
lib/HTML/PullParser/Nested.pm view on Meta::CPAN
=head1 SYNOPSIS
use HTML::PullParser::Nested;
my $p = HTML::PullParser::Nested->new(
doc => \ "<html>...<ul><li>abcd<li>efgh<li>wvyz</ul>...<ul><li>1<li>2<li>9</ul></html>",
start => "'S',tagname,attr,attrseq,text",
end => "'E',tagname,text",
text => "'T',text,is_cdata",
);
while (my $token = $p->get_token()) {
if ($token->[0] eq "S" && $token->[1] eq "ul") {
$p->push_nest($token);
print "List:\n";
while (my $token = $p->get_token()) {
if ($token->[0] eq "S" && $token->[1] eq "li") {
print $p->get_token()->[1], "\n";
}
$p->unget_token($token, $token);
eval {$p->unget_token($token); }; die unless ($@ =~ m/nesting tag underflow/);
},
# Test different argspec with text in it
sub {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "'S',tagname,attr,attrseq,text",
'end' => "'E',tagname,text",
'text' => "'T',text,is_cdata",
);
my $token;
$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "a");
$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "b");
$token = $p->get_token(); die unless ($token->[0] eq "E" && $token->[1] eq "a");
$token = $p->get_token(); die unless ($token->[0] eq "T" && $token->[1] eq "TEXT");
$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "c");
},
# Test different argspec with new order
sub {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "tagname,'S',attr,attrseq,text",
'end' => "tagname,'E',text",
'text' => "text,'T',is_cdata",
);
my $token;
$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "a");
$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "b");
$token = $p->get_token(); die unless ($token->[1] eq "E" && $token->[0] eq "a");
$token = $p->get_token(); die unless ($token->[1] eq "T" && $token->[0] eq "TEXT");
$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "c");
},
# Test argspec using event
sub {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "event,tagname,attr,attrseq,text",
'end' => "event,tagname,text",
'text' => "event,text,is_cdata",
);
my $token;
$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "a");
$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "b");
$token = $p->get_token(); die unless ($token->[0] eq "end" && $token->[1] eq "a");
$token = $p->get_token(); die unless ($token->[0] eq "text" && $token->[1] eq "TEXT");
$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "c");
},
# Test argspec without start
sub {
eval {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'end' => "event,tagname,text",
'text' => "event,text,is_cdata",
);
};
die unless ($@ =~ m/need argspec for start and end/);
},
# Test argspec without event or literal string
sub {
eval {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "tagname,attr,attrseq,text",
'end' => "tagname,text",
'text' => "text,is_cdata",
);
};
die unless ($@ =~ m/need either event or 'string' at a consistent index across all argspecs/);
},
# Test argspec with duplicate literal string (+ no event)
sub {
eval {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "'TAG',tagname,attr,attrseq,text",
'end' => "'TAG',tagname,text",
'text' => "'TEXT',text,is_cdata",
);
};
die unless ($@ =~ m/'string' must be unique across all argspecs/);
},
# Test argspec with event at different locations
sub {
eval {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "event,tagname,attr,attrseq,text",
'end' => "event,tagname,text",
'text' => "text,event,is_cdata",
);
};
die unless ($@ =~ m/need either event or 'string' at a consistent index across all argspecs/);
},
# Test argspec without tagname
sub {
eval {
my $p = HTML::PullParser::Nested->new(
'doc' => \ "<a><b></a>TEXT<c>",
'start' => "event,attr,attrseq,text",
'end' => "event,text",
'text' => "event,text,is_cdata",
);
};
die unless ($@ =~ m/need tagname in argspec for start and end tags/);
},
);
printf "%d..%d\n", 1, scalar @tests;
( run in 0.278 second using v1.01-cache-2.11-cpan-454fe037f31 )