HTML-StripScripts-Regex

 view release on metacpan or  search on metacpan

t/50flow.t  view on Meta::CPAN

  [ 'interleave case',   q{<i>g<B>h</i>E</b>}, q{<i>g<b>h</b></i><b>E</b>} ],
  [ 'interleave open',   q{<i>g<b>h</i>E},     q{<i>g<b>h</b></i><b>E</b>} ],
  [ 'p close order',     q{<p>one<p>two<p>three}, q{<p>one</p><p>two</p><p>three</p>} ],
  [ 'p/li close order',  q{<ul><li><p>1<li><p>2</ul>}, q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
  [ 'p/li left open',    q{<ul><li><p>1<li><p>2},      q{<ul><li><p>1</p></li><li><p>2</p></li></ul>}, ],
  [ 'italic p',          q{<i>foo<p>bar}, q{<i>foo</i><p>bar</p>} ],
  [ 'misplaced close',   q{foo</i>}, q{foo<!--filtered-->} ],

  [ 'lonley <',          q{<}, q{&lt;} ],
  [ 'lonley >',          q{>}, q{&gt;} ],
  [ 'lonley "',          q{"}, q{&quot;} ],
  [ 'lonley &',          q{&}, q{&amp;} ],
  [ 'valid entity',      q{&lt;}, q{&lt;} ],
  [ 'uppercase entity',  q{&THORN;}, q{&THORN;} ],
  [ 'valid numeric ent', q{&#123;}, '{' ],
  [ 'valid hex entity',  q{&#x6B;}, q{k} ],
  [ 'unicode numeric',   q{&#3202;}, q{&#3202;} ],
  [ 'unicode hex lc',    q{&#xBF94;}, q{&#xBF94;} ],
  [ 'unicode hex uc',    q{&#XBF94;}, q{&#xBF94;} ],
  [ 'unknown entity',    q{&foo;}, q{&foo;} ],
  [ 'nasty entity',      q{ &{foo}; }, q{ &amp;{foo}; } ],
  [ 'minus entity',      q{&foo-foo;}, q{&amp;foo-foo;} ],
  [ 'underscore entity', q{&foo_foo;}, q{&amp;foo_foo;} ],
  [ 'overlong entity',   q{&littlesquigglethingwithalinethroughit;}, q{&amp;littlesquigglethingwithalinethroughit;} ],
  [ 'overlong hex',      q{&#x7FB20A4E;}, q{&amp;#x7FB20A4E;} ],
  [ 'overlong decimal',  q{&#349850348;}, q{&amp;#349850348;} ],
  [ '-ve decimal',       q{&#-7;}, q{&amp;#-7;} ],
  [ '+ve decimal',       q{&#+7;}, q{&amp;#+7;} ],
  [ 'invalid numeric',   q{&#o777;}, q{&amp;#o777;} ],
  [ '<<script>',         q{<<script>}, q{&lt;<!--filtered-->} ],
  [ '< script>',         q{< script>}, q{&lt; script&gt;} ],
  [ '<>',                q{<>}, q{&lt;&gt;} ],
  [ '><',                q{><}, q{&gt;&lt;} ],
  [ '<<',                q{<<}, q{&lt;&lt;} ],
  [ '>>',                q{>>}, q{&gt;&gt;} ],
  [ '< >',               q{< >}, q{&lt; &gt;} ],
  [ '</>',               q{</>}, q{&lt;/&gt;} ],
  [ '<_foo>',            q{<_foo>}, q{&lt;_foo&gt;} ],

  [ 'nest pre',          q{<pre>foo<pre>bar}, q{<pre>foo</pre><pre>bar</pre>} ],
  [ 'nest pre with i',   q{<pre><i>foo<pre>bar}, q{<pre><i>foo</i></pre><pre>bar</pre>} ],
  [ 'ins block level',   q{xxxx<ins><p>foo</p></ins>yyyy}, q{xxxx<ins><p>foo</p></ins>yyyy} ],
  [ 'ins inline level',  q{<i>foo<ins>FOO</ins>bar</i>}, q{<i>foo<ins>FOO</ins>bar</i>} ],
  [ 'ins inline2block',  q{x<i><ins><p>foo</p></ins></i>}, q{x<i><ins></ins></i><p>foo</p><!--filtered--><!--filtered-->} ],
  [ 'del block level',   q{xxxx<del><p>foo</p></del>yyyy}, q{xxxx<del><p>foo</p></del>yyyy} ],
  [ 'del inline level',  q{<i>foo<del>FOO</del>bar</i>}, q{<i>foo<del>FOO</del>bar</i>} ],
  [ 'del inline2block',  q{x<i><del><p>foo</p></del></i>}, q{x<i><del></del></i><p>foo</p><!--filtered--><!--filtered-->} ],
  [ 'nested a',          q{<a>foo<a>bar</a></a>}, q{<a>foo<!--filtered-->bar</a><!--filtered-->} ],
  [ 'sneaky nested a',   q{<a>f<i>o<b>g<a>o</a>b</b>r</i>x</a>}, q{<a>f<i>o<b>g<!--filtered-->o</b></i></a>b<!--filtered-->r<!--filtered-->x<!--filtered-->} ],

  [ 'strip comment',     q{x<i>y<!-- hello -->foo}, q{x<i>y<!--filtered-->foo</i>} ],
  [ 'strip comment 2',   q{x<i>y<<!-- hello -->foo}, q{x<i>y&lt;<!--filtered-->foo</i>} ],
  [ 'strip meta ?',      q{foo<?foo jibber>bar}, q{foo<!--filtered-->bar} ],
  [ 'strip meta !',      q{foo<!foo jibber>bar}, q{foo<!--filtered-->bar} ],
  [ 'bare comment',      q{x<!-- hello -->y}, q{x<!--filtered-->y} ],
  [ 'SSI',               q{foo<!--# exec "/tmp/grunion" -->pah}, q{foo<!--filtered-->pah} ],
  [ 'SSI unclosed',      q{foo<!--# exec "/tmp/grunion"}, q{foo&lt;!--# exec &quot;/tmp/grunion&quot;} ],
  [ 'SSI misclosed',     q{foo<!--# exec "/tmp/grunion" >}, q{foo<!--filtered-->} ],
  [ 'xml metatag',       q{x<?xml version="1.0" encoding="utf-8"?>y}, q{x<!--filtered-->y} ],
  [ 'doctype',           <<'END', "<!--filtered-->\n" ],
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
END
  [ 'trailing garbage',  q{<i /(&(&(&(*%&^^*%&*&%)>hello}, q{<i>hello</i>} ],
  [ 'newline confusion', qq{<foo>\n<foo>}, qq{<!--filtered-->\n<!--filtered-->} ],

  [ 'dual attr',         q{<font color=red size=2>foo}, q{<font color="red" size="2">foo</font>} ],
  [ 'dual attr bad',     q{<font color=red size=2 foo=4>foo}, q{<font color="red" size="2">foo</font>} ],
  [ 'dual attr empty',   q{<font color=red foo="" size=2>foo}, q{<font color="red" size="2">foo</font>} ],
  [ 'dual attr noval',   q{<font color=red foo size=2>foo}, q{<font color="red" size="2">foo</font>} ],
  [ 'dual attr mixed',   q{<font color="red" size='2'>foo}, q{<font color="red" size="2">foo</font>} ],
  [ 'dual attr 1st bad', q{<font color="$-" size="3">foo}, q{<font size="3">foo</font>} ],
  [ 'dual attr 2nd bad', q{<font color="red" size="fish">foo}, q{<font color="red">foo</font>} ],
  [ 'attr mixed case',   q{<FoNt COLOR="red" size="fish">foo}, q{<font color="red">foo</font>} ],
  [ 'attr upper case',   q{<FONT COLOR="red" SIZE="fish">foo</FONT>}, q{<font color="red">foo</font>} ],


  [
    'heavy duty de-interleave',
    q{<u>x<font size=4 color=red>y<i>b<b><font color=blue style="background-color: pink">X</u>Y},
    q{<u>x<font color="red" size="4">y<i>b<b><font color="blue" style="background-color:pink">X}.
    q{</font></b></i></font></u><font color="red" size="4"><i><b><font color="blue" style="background-color:pink">Y}.
    q{</font></b></i></font>}
  ],
  
  [
    'tags in pre',
    q{<pre>}.
    q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn>}.
    q{<code><q><samp><kbd><var><del><cite><abbr><acronym><a>foo},

    q{<pre>}.
    q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn>}.
    q{<code><q><samp><kbd><var><del><cite><abbr><acronym><a>foo}.
    q{</a></acronym></abbr></cite></del></var></kbd></samp></q></code></dfn>}.
    q{</strong></ins></em></strike></s></u></b></i></tt></span></pre>}
  ],

  [
    'interleave i/a',
    q{<i><a><tt>foo</a>},
    q{<i><a><tt>foo</tt></a></i>},
  ],

  [
    'tags in i',
    q{<i><a>}.
    q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn><big><small>}.
    q{<font size="3" face="Helvetica" color="#FFFFFF">}.
    q{<code><q><samp><kbd><var><del><cite><abbr><acronym><sub><sup><nobr>foo},

    q{<i><a>}.
    q{<br /><span><tt><i><b><u><s><strike><em><ins><strong><dfn><big><small>}.
    q{<font color="#FFFFFF" face="Helvetica" size="3">}.
    q{<code><q><samp><kbd><var><del><cite><abbr><acronym><sub><sup><nobr>foo}.
    q{</nobr></sup></sub></acronym></abbr></cite></del></var></kbd></samp></q></code>}.
    q{</font>}.
    q{</small></big></dfn></strong></ins></em></strike></s></u></b></i></tt></span>}.
    q{</a></i>}
  ],



( run in 1.297 second using v1.01-cache-2.11-cpan-119454b85a5 )