HTML-PullParser-Nested

 view release on metacpan or  search on metacpan

lib/HTML/PullParser/Nested.pm  view on Meta::CPAN



=head1 SYNOPSIS

use HTML::PullParser::Nested;

    my $p = HTML::PullParser::Nested->new(
        doc         => \ "<html>...<ul><li>abcd<li>efgh<li>wvyz</ul>...<ul><li>1<li>2<li>9</ul></html>",
        start       => "'S',tagname,attr,attrseq,text",
        end         => "'E',tagname,text",
        text        => "'T',text,is_cdata",
        );
    
    while (my $token = $p->get_token()) {
        if ($token->[0] eq "S" && $token->[1] eq "ul") {
            $p->push_nest($token);
            print "List:\n";
            while (my $token = $p->get_token()) {
                if ($token->[0] eq "S" && $token->[1] eq "li") {
                    print $p->get_token()->[1], "\n";
                }

t/basic.t  view on Meta::CPAN

	$p->unget_token($token, $token);
	eval {$p->unget_token($token); }; die unless ($@ =~ m/nesting tag underflow/);
    },

    # Test different argspec with text in it
    sub {
	my $p = HTML::PullParser::Nested->new(
	    'doc'         => \ "<a><b></a>TEXT<c>", 
	    'start'       => "'S',tagname,attr,attrseq,text",
	    'end'         => "'E',tagname,text",
	    'text'        => "'T',text,is_cdata",
	    );

	my $token;
	$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "a");
	$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "b");
	$token = $p->get_token(); die unless ($token->[0] eq "E" && $token->[1] eq "a");
	$token = $p->get_token(); die unless ($token->[0] eq "T" && $token->[1] eq "TEXT");
	$token = $p->get_token(); die unless ($token->[0] eq "S" && $token->[1] eq "c");
    },

    # Test different argspec with new order
    sub {
	my $p = HTML::PullParser::Nested->new(
	    'doc'         => \ "<a><b></a>TEXT<c>", 
	    'start'       => "tagname,'S',attr,attrseq,text",
	    'end'         => "tagname,'E',text",
	    'text'        => "text,'T',is_cdata",
	    );

	my $token;
	$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "a");
	$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "b");
	$token = $p->get_token(); die unless ($token->[1] eq "E" && $token->[0] eq "a");
	$token = $p->get_token(); die unless ($token->[1] eq "T" && $token->[0] eq "TEXT");
	$token = $p->get_token(); die unless ($token->[1] eq "S" && $token->[0] eq "c");
    },

    # Test argspec using event
    sub {
	my $p = HTML::PullParser::Nested->new(
	    'doc'         => \ "<a><b></a>TEXT<c>", 
	    'start'       => "event,tagname,attr,attrseq,text",
	    'end'         => "event,tagname,text",
	    'text'        => "event,text,is_cdata",
	    );

	my $token;
	$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "a");
	$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "b");
	$token = $p->get_token(); die unless ($token->[0] eq "end" && $token->[1] eq "a");
	$token = $p->get_token(); die unless ($token->[0] eq "text" && $token->[1] eq "TEXT");
	$token = $p->get_token(); die unless ($token->[0] eq "start" && $token->[1] eq "c");
    },

    # Test argspec without start
    sub {
	eval {
	    my $p = HTML::PullParser::Nested->new(
		'doc'         => \ "<a><b></a>TEXT<c>", 
		'end'         => "event,tagname,text",
		'text'        => "event,text,is_cdata",
		);
	};

	die unless ($@ =~ m/need argspec for start and end/);

    },

    # Test argspec without event or literal string
    sub {
	eval {
	    my $p = HTML::PullParser::Nested->new(
		'doc'         => \ "<a><b></a>TEXT<c>", 
		'start'       => "tagname,attr,attrseq,text",
		'end'         => "tagname,text",
		'text'        => "text,is_cdata",
		);
	};

	die unless ($@ =~ m/need either event or 'string' at a consistent index across all argspecs/);

    },

    # Test argspec with duplicate literal string (+ no event)
    sub {
	eval {
	    my $p = HTML::PullParser::Nested->new(
		'doc'         => \ "<a><b></a>TEXT<c>", 
		'start'       => "'TAG',tagname,attr,attrseq,text",
		'end'         => "'TAG',tagname,text",
		'text'        => "'TEXT',text,is_cdata",
		);
	};

	die unless ($@ =~ m/'string' must be unique across all argspecs/);

    },

    # Test argspec with event at different locations
    sub {
	eval {
	    my $p = HTML::PullParser::Nested->new(
		'doc'         => \ "<a><b></a>TEXT<c>", 
		'start'       => "event,tagname,attr,attrseq,text",
		'end'         => "event,tagname,text",
		'text'        => "text,event,is_cdata",
		);
	};

	die unless ($@ =~ m/need either event or 'string' at a consistent index across all argspecs/);

    },

    # Test argspec without tagname
    sub {
	eval {
	    my $p = HTML::PullParser::Nested->new(
		'doc'         => \ "<a><b></a>TEXT<c>", 
		'start'       => "event,attr,attrseq,text",
		'end'         => "event,text",
		'text'        => "event,text,is_cdata",
		);
	};

	die unless ($@ =~ m/need tagname in argspec for start and end tags/);

    },

    );

printf "%d..%d\n", 1, scalar @tests;



( run in 0.278 second using v1.01-cache-2.11-cpan-454fe037f31 )