HTML-StripScripts-Parser
view release on metacpan or search on metacpan
use strict;
BEGIN {
$^W = 1;
use vars qw(@tests);
@tests = (
[ 'empty', q{}, q{} ],
[ 'space', q{ }, q{ } ],
[ 'plain', q{hello mum}, q{hello mum} ],
[ 'plain nl', qq{hello mum\n}, "hello mum\n" ],
[ 'nonprint', qq{foo\0bar}, "foo bar" ],
[ 'p tag', qq{<p>hello mum\n}, "<p>hello mum\n</p>" ],
[ 'i tag', qq{<i>hello mum\n}, "<i>hello mum\n</i>" ],
[ 'valid p',
q{<ins><p>valid p</p></ins>},
q{<ins><p>valid p</p></ins>}
],
[ 'misplaced tr', q{misplaced <tr>}, q{misplaced <!--filtered-->} ],
[ 'misplaced td', q{misplaced <td>}, q{misplaced <!--filtered-->} ],
[ 'misplaced li', q{misplaced <li>}, q{misplaced <!--filtered-->} ],
[ 'misplaced cdata',
q{<table><tr>misplaced cdata<td>hello},
q{<table><tr></tr></table>misplaced cdata<!--filtered-->hello}
],
[ 'pass emtpy img', q{<img>}, q{<img />} ],
[ 'block img src',
q{<img src="http://example.com/foo.png" />},
q{<img />}
],
[ 'block a href', q{<a href="http://foo.foo/foo">x}, q{<a>x</a>} ],
[ 'block a mailto', q{<a href="mailto:foo@foo.foo">x}, q{<a>x</a>} ],
[ 'unknown tag', q{<foo>}, q{<!--filtered-->} ],
[ 'unknown attr', q{<i foo=foo>}, q{<i></i>} ],
[ 'misplaced close', q{</i>}, q{<!--filtered-->} ],
[ 'br', q{<br>hello</br>}, q{<br />hello<!--filtered-->} ],
[ 'hr width', q{x<hr width=4>y}, q{x<hr width="4" />y} ],
[ 'hr width dq', q{x<hr width="4">y}, q{x<hr width="4" />y} ],
[ 'hr width sq', q{x<hr width='4'>y}, q{x<hr width="4" />y} ],
[ 'hr silly width',
q{x<hr width=18234081234019840138340938410242343144>y},
q{x<hr />y}
],
[ 'hr silly width dq',
q{x<hr width="18234081234019840138340938410242343144">y},
q{x<hr />y}
],
[ 'hr silly width sq',
q{x<hr width='18234081234019840138340938410242343144'>y},
q{x<hr />y}
],
[ 'bad trailing /', q{<i />hello</i>}, q{<i>hello</i>} ],
[ 'good trailing /', q{<br />}, q{<br />} ],
[ 'interleave', q{<i>g<b>h</i>E</b>}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'interleave case', q{<i>g<B>h</i>E</b>},
q{<i>g<b>h</b></i><b>E</b>}
],
[ 'interleave open', q{<i>g<b>h</i>E}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'p close order', q{<p>one<p>two<p>three},
q{<p>one</p><p>two</p><p>three</p>}
],
[ 'p/li close order',
q{<ul><li><p>1<li><p>2</ul>},
q{<ul><li><p>1</p></li><li><p>2</p></li></ul>},
],
[ 'p/li left open',
q{<ul><li><p>1<li><p>2},
q{<ul><li><p>1</p></li><li><p>2</p></li></ul>},
],
[ 'italic p', q{<i>foo<p>bar}, q{<i>foo</i><p>bar</p>} ],
[ 'misplaced close', q{foo</i>}, q{foo<!--filtered-->} ],
# [ 'lonley <', q{<}, q{<} ],
[ 'lonley >', q{>}, q{>} ],
[ 'lonley "', q{"}, q{"} ],
[ 'lonley &', q{&}, q{&} ],
[ 'valid entity', q{<}, q{<} ],
[ 'uppercase entity', q{Þ}, q{Þ} ],
[ 'valid numeric ent', q{{}, '{' ],
[ 'valid hex entity', q{k}, q{k} ],
[ 'unicode numeric', q{ಂ}, q{ಂ} ],
[ 'unicode hex lc', q{뾔}, q{뾔} ],
[ 'unicode hex uc', q{뾔}, q{뾔} ],
[ 'unknown entity', q{&foo;}, q{&foo;} ],
( run in 0.733 second using v1.01-cache-2.11-cpan-62beec7d96d )