HTML-StripScripts-Regex
view release on metacpan or search on metacpan
[ 'interleave case', q{<i>g<B>h</i>E</b>}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'interleave open', q{<i>g<b>h</i>E}, q{<i>g<b>h</b></i><b>E</b>} ],
[ 'p close order', q{<p>one<p>two<p>three}, q{<p>one</p><p>two</p><p>three</p>} ],
[ 'p/li close order', q{<ul><li><p>1<li><p>2</ul>}, q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
[ 'p/li left open', q{<ul><li><p>1<li><p>2}, q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
[ 'italic p', q{<i>foo<p>bar}, q{<i>foo</i><p>bar</p>} ],
[ 'misplaced close', q{foo</i>}, q{foo<!--filtered-->} ],
[ 'lonley <', q{<}, q{<} ],
[ 'lonley >', q{>}, q{>} ],
[ 'lonley "', q{"}, q{"} ],
[ 'lonley &', q{&}, q{&} ],
[ 'valid entity', q{<}, q{<} ],
[ 'uppercase entity', q{Þ}, q{Þ} ],
[ 'valid numeric ent', q{{}, '{' ],
[ 'valid hex entity', q{k}, q{k} ],
[ 'unicode numeric', q{ಂ}, q{ಂ} ],
[ 'unicode hex lc', q{뾔}, q{뾔} ],
[ 'unicode hex uc', q{뾔}, q{뾔} ],
[ 'unknown entity', q{&foo;}, q{&foo;} ],
[ 'nasty entity', q{ &{foo}; }, q{ &{foo}; } ],
[ 'minus entity', q{&foo-foo;}, q{&foo-foo;} ],
[ 'underscore entity', q{&foo_foo;}, q{&foo_foo;} ],
[ 'overlong entity', q{&littlesquigglethingwithalinethroughit;}, q{&littlesquigglethingwithalinethroughit;} ],
[ 'overlong hex', q{�}, q{&#x7FB20A4E;} ],
[ 'overlong decimal', q{�}, q{&#349850348;} ],
[ '-ve decimal', q{&#-7;}, q{&#-7;} ],
[ '+ve decimal', q{&#+7;}, q{&#+7;} ],
[ 'invalid numeric', q{&#o777;}, q{&#o777;} ],
[ '<<script>', q{<<script>}, q{<<!--filtered-->} ],
[ '< script>', q{< script>}, q{< script>} ],
[ '<>', q{<>}, q{<>} ],
[ '><', q{><}, q{><} ],
[ '<<', q{<<}, q{<<} ],
[ '>>', q{>>}, q{>>} ],
[ '< >', q{< >}, q{< >} ],
[ '</>', q{</>}, q{</>} ],
[ '<_foo>', q{<_foo>}, q{<_foo>} ],
[ 'nest pre', q{<pre>foo<pre>bar}, q{<pre>foo</pre><pre>bar</pre>} ],
[ 'nest pre with i', q{<pre><i>foo<pre>bar}, q{<pre><i>foo</i></pre><pre>bar</pre>} ],
[ 'ins block level', q{xxxx<ins><p>foo</p></ins>yyyy}, q{xxxx<ins><p>foo</p></ins>yyyy} ],
[ 'ins inline level', q{<i>foo<ins>FOO</ins>bar</i>}, q{<i>foo<ins>FOO</ins>bar</i>} ],
[ 'ins inline2block', q{x<i><ins><p>foo</p></ins></i>}, q{x<i><ins></ins></i><p>foo</p><!--filtered--><!--filtered-->} ],
[ 'del block level', q{xxxx<del><p>foo</p></del>yyyy}, q{xxxx<del><p>foo</p></del>yyyy} ],
[ 'del inline level', q{<i>foo<del>FOO</del>bar</i>}, q{<i>foo<del>FOO</del>bar</i>} ],
[ 'del inline2block', q{x<i><del><p>foo</p></del></i>}, q{x<i><del></del></i><p>foo</p><!--filtered--><!--filtered-->} ],
[ 'nested a', q{<a>foo<a>bar</a></a>}, q{<a>foo<!--filtered-->bar</a><!--filtered-->} ],
[ 'sneaky nested a', q{<a>f<i>o<b>g<a>o</a>b</b>r</i>x</a>}, q{<a>f<i>o<b>g<!--filtered-->o</b></i></a>b<!--filtered-->r<!--filtered-->x<!--filtered-->} ],
[ 'strip comment', q{x<i>y<!-- hello -->foo}, q{x<i>y<!--filtered-->foo</i>} ],
[ 'strip comment 2', q{x<i>y<<!-- hello -->foo}, q{x<i>y<<!--filtered-->foo</i>} ],
[ 'strip meta ?', q{foo<?foo jibber>bar}, q{foo<!--filtered-->bar} ],
[ 'strip meta !', q{foo<!foo jibber>bar}, q{foo<!--filtered-->bar} ],
[ 'bare comment', q{x<!-- hello -->y}, q{x<!--filtered-->y} ],
[ 'SSI', q{foo<!--# exec "/tmp/grunion" -->pah}, q{foo<!--filtered-->pah} ],
[ 'SSI unclosed', q{foo<!--# exec "/tmp/grunion"}, q{foo<!--# exec "/tmp/grunion"} ],
[ 'SSI misclosed', q{foo<!--# exec "/tmp/grunion" >}, q{foo<!--filtered-->} ],
[ 'xml metatag', q{x<?xml version="1.0" encoding="utf-8"?>y}, q{x<!--filtered-->y} ],
[ 'doctype', <<'END', "<!--filtered-->\n" ],
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
END
[ 'trailing garbage', q{<i /(&(&(&(*%&^^*%&*&%)>hello}, q{<i>hello</i>} ],
[ 'newline confusion', qq{<foo>\n<foo>}, qq{<!--filtered-->\n<!--filtered-->} ],
[ 'dual attr', q{<font color=red size=2>foo}, q{<font color="red" size="2">foo</font>} ],
[ 'dual attr bad', q{<font color=red size=2 foo=4>foo}, q{<font color="red" size="2">foo</font>} ],
[ 'dual attr empty', q{<font color=red foo="" size=2>foo}, q{<font color="red" size="2">foo</font>} ],
[ 'dual attr noval', q{<font color=red foo size=2>foo}, q{<font color="red" size="2">foo</font>} ],
[ 'dual attr mixed', q{<font color="red" size='2'>foo}, q{<font color="red" size="2">foo</font>} ],
[ 'dual attr 1st bad', q{<font color="$-" size="3">foo}, q{<font size="3">foo</font>} ],
[ 'dual attr 2nd bad', q{<font color="red" size="fish">foo}, q{<font color="red">foo</font>} ],
[ 'attr mixed case', q{<FoNt COLOR="red" size="fish">foo}, q{<font color="red">foo</font>} ],
[ 'attr upper case', q{<FONT COLOR="red" SIZE="fish">foo</FONT>}, q{<font color="red">foo</font>} ],
[
'heavy duty de-interleave',
q{<u>x<font size=4 color=red>y<i>b<b><font color=blue style="background-color: pink">X</u>Y},
q{<u>x<font color="red" size="4">y<i>b<b><font color="blue" style="background-color:pink">X}.
q{</font></b></i></font></u><font color="red" size="4"><i><b><font color="blue" style="background-color:pink">Y}.
q{</font></b></i></font>}
],
[
'tags in pre',
q{<pre>}.
q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn>}.
q{<code><q><samp><kbd><var><del><cite><abbr><acronym><a>foo},
q{<pre>}.
q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn>}.
q{<code><q><samp><kbd><var><del><cite><abbr><acronym><a>foo}.
q{</a></acronym></abbr></cite></del></var></kbd></samp></q></code></dfn>}.
q{</strong></ins></em></strike></s></u></b></i></tt></span></pre>}
],
[
'interleave i/a',
q{<i><a><tt>foo</a>},
q{<i><a><tt>foo</tt></a></i>},
],
[
'tags in i',
q{<i><a>}.
q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn><big><small>}.
q{<font size="3" face="Helvetica" color="#FFFFFF">}.
q{<code><q><samp><kbd><var><del><cite><abbr><acronym><sub><sup><nobr>foo},
q{<i><a>}.
q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn><big><small>}.
q{<font color="#FFFFFF" face="Helvetica" size="3">}.
q{<code><q><samp><kbd><var><del><cite><abbr><acronym><sub><sup><nobr>foo}.
q{</nobr></sup></sub></acronym></abbr></cite></del></var></kbd></samp></q></code>}.
q{</font>}.
q{</small></big></dfn></strong></ins></em></strike></s></u></b></i></tt></span>}.
q{</a></i>}
],
( run in 1.297 second using v1.01-cache-2.11-cpan-119454b85a5 )