HTML-StripScripts-Regex
view release on metacpan or search on metacpan
use strict;
use warnings;
BEGIN {
use vars qw(@tests);
@tests = (
[ 'empty', q{}, q{} ],
[ 'space', q{ }, q{ } ],
[ 'plain', q{hello mum}, q{hello mum} ],
[ 'plain nl', qq{hello mum\n}, "hello mum\n" ],
[ 'nonprint', qq{foo\0bar}, "foo bar" ],
[ 'p tag', qq{<p>hello mum\n}, "<p>hello mum\n</p>" ],
[ 'i tag', qq{<i>hello mum\n}, "<i>hello mum\n</i>" ],
[ 'valid p', q{<ins><p>valid p</p></ins>}, q{<ins><p>valid p</p></ins>} ],
[ 'misplaced tr', q{misplaced <tr>}, q{misplaced <!--filtered-->} ],
[ 'misplaced td', q{misplaced <td>}, q{misplaced <!--filtered-->} ],
[ 'misplaced li', q{misplaced <li>}, q{misplaced <!--filtered-->} ],
[ 'misplaced cdata', q{<table><tr>misplaced cdata<td>hello}, q{<table><tr></tr></table>misplaced cdata<!--filtered-->hello} ],
[ 'pass emtpy img', q{<img>}, q{<img />} ],
[ 'block img src', q{<img src="http://example.com/foo.png" />}, q{<img />} ],
[ 'block a href', q{<a href="http://foo.foo/foo">x}, q{<a>x</a>} ],
[ 'block a mailto', q{<a href="mailto:foo@foo.foo">x}, q{<a>x</a>} ],
[ 'unknown tag', q{<foo>}, q{<!--filtered-->} ],
[ 'unknown attr', q{<i foo=foo>}, q{<i></i>} ],
[ 'misplaced close', q{</i>}, q{<!--filtered-->} ],
[ 'br', q{<br>hello</br>}, q{<br />hello<!--filtered-->} ],
[ 'hr width', q{x<hr width=4>y}, q{x<hr width="4" />y} ],
[ 'hr width dq', q{x<hr width="4">y}, q{x<hr width="4" />y} ],
[ 'hr width sq', q{x<hr width='4'>y}, q{x<hr width="4" />y} ],
[ 'hr silly width', q{x<hr width=18234081234019840138340938410242343144>y}, q{x<hr />y} ],
[ 'hr silly width dq', q{x<hr width="18234081234019840138340938410242343144">y}, q{x<hr />y} ],
[ 'hr silly width sq', q{x<hr width='18234081234019840138340938410242343144'>y}, q{x<hr />y} ],
[ 'bad trailing /', q{<i />hello</i>}, q{<i>hello</i>} ],
[ 'good trailing /', q{<br />}, q{<br />} ],
[ 'interleave', q{<i>g<b>h</i>E</b>}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'interleave case', q{<i>g<B>h</i>E</b>}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'interleave open', q{<i>g<b>h</i>E}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'p close order', q{<p>one<p>two<p>three}, q{<p>one</p><p>two</p><p>three</p>} ],
[ 'p/li close order', q{<ul><li><p>1<li><p>2</ul>}, q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
[ 'p/li left open', q{<ul><li><p>1<li><p>2}, q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
[ 'italic p', q{<i>foo<p>bar}, q{<i>foo</i><p>bar</p>} ],
[ 'misplaced close', q{foo</i>}, q{foo<!--filtered-->} ],
[ 'lonley <', q{<}, q{<} ],
[ 'lonley >', q{>}, q{>} ],
[ 'lonley "', q{"}, q{"} ],
[ 'lonley &', q{&}, q{&} ],
[ 'valid entity', q{<}, q{<} ],
[ 'uppercase entity', q{Þ}, q{Þ} ],
[ 'valid numeric ent', q{{}, '{' ],
[ 'valid hex entity', q{k}, q{k} ],
[ 'unicode numeric', q{ಂ}, q{ಂ} ],
[ 'unicode hex lc', q{뾔}, q{뾔} ],
[ 'unicode hex uc', q{뾔}, q{뾔} ],
[ 'unknown entity', q{&foo;}, q{&foo;} ],
[ 'nasty entity', q{ &{foo}; }, q{ &{foo}; } ],
[ 'minus entity', q{&foo-foo;}, q{&foo-foo;} ],
[ 'underscore entity', q{&foo_foo;}, q{&foo_foo;} ],
[ 'overlong entity', q{&littlesquigglethingwithalinethroughit;}, q{&littlesquigglethingwithalinethroughit;} ],
[ 'overlong hex', q{�}, q{&#x7FB20A4E;} ],
[ 'overlong decimal', q{�}, q{&#349850348;} ],
[ '-ve decimal', q{&#-7;}, q{&#-7;} ],
[ '+ve decimal', q{&#+7;}, q{&#+7;} ],
[ 'invalid numeric', q{&#o777;}, q{&#o777;} ],
[ '<<script>', q{<<script>}, q{<<!--filtered-->} ],
[ '< script>', q{< script>}, q{< script>} ],
[ '<>', q{<>}, q{<>} ],
[ '><', q{><}, q{><} ],
[ '<<', q{<<}, q{<<} ],
[ '>>', q{>>}, q{>>} ],
[ '< >', q{< >}, q{< >} ],
[ '</>', q{</>}, q{</>} ],
[ '<_foo>', q{<_foo>}, q{<_foo>} ],
[ 'nest pre', q{<pre>foo<pre>bar}, q{<pre>foo</pre><pre>bar</pre>} ],
[ 'nest pre with i', q{<pre><i>foo<pre>bar}, q{<pre><i>foo</i></pre><pre>bar</pre>} ],
[ 'ins block level', q{xxxx<ins><p>foo</p></ins>yyyy}, q{xxxx<ins><p>foo</p></ins>yyyy} ],
[ 'ins inline level', q{<i>foo<ins>FOO</ins>bar</i>}, q{<i>foo<ins>FOO</ins>bar</i>} ],
( run in 0.332 second using v1.01-cache-2.11-cpan-3d66aa2751a )