HTML-Laundry
view release on metacpan or search on metacpan
lib/HTML/Laundry/Rules.pm view on Meta::CPAN
=cut
sub acceptable_e {
my $self = shift;
my @acceptable = qw(
a abbr acronym address area b bdo big blockquote
br button caption center cite code col colgroup dd
del dfn dir div dl dt em fieldset font form
h1 h2 h3 h4 h5 h6 hr i img input ins kbd
label legend li map menu ol optgroup option p
pre q s samp select small span strike strong
sub sup table tbody td textarea tfoot th thead
tr tt u ul var wbr
);
my %acceptable = map { ( $_, 1 ) } @acceptable;
return \%acceptable;
}
=head2 empty_e
t/parser_default.t view on Meta::CPAN
require_ok('HTML::Laundry');
my $l1 = HTML::Laundry->new({ notidy => 1 });
isa_ok($l1, 'HTML::Laundry', 'New object isa HTML::Laundry');
my $plaintext = 'She was the youngest of the two daughters of a most affectionate, indulgent father...';
is( $l1->clean($plaintext), $plaintext, 'Short plain text passes through cleanly');
$plaintext = 'She was the youngest of the two daughters of a most affectionate, indulgent father; and had, in consequence of her sister\'s marriage, been mistress of his house from a very early period. Her mother had died too long ago for her to have...
is( $l1->clean($plaintext), $plaintext, 'Longer plain text passes through cleanly');
my $kurosawa = q[Akira Kurosawa (KyÅ«jitai: 黿¾¤ æ, Shinjitai: 黿²¢ æ Kurosawa Akira, 23 March 1910 â 6 September 1998) was a legendary Japanese filmmaker, producer, screenwriter and editor];
is( $l1->clean($kurosawa), $kurosawa, 'UTF-8 text passes through cleanly');
my $valid = '<p class="opening">She was the youngest of the two daughters of a most affectionate, indulgent <a href="#footnote1">father</a>; and had, in consequence of her sister\'s marriage, been mistress of his house from a very early period. Her m...
is( $l1->clean($valid), $valid, 'Validating text passes through cleanly');
is( $l1->clean('<p></p>'), '<p></p>', 'Non-empty tag passes through cleanly');
is( $l1->clean('<br />'), '<br />', 'Empty tag passes through cleanly');
is( $l1->clean('<br / >'), '<br />', 'Empty tag with whitespace passes through cleanly');
is( $l1->clean('<p />'), '<p></p>', 'Non-empty tag passed in as empty is normalized to non-empty format');
is( $l1->clean('<br></br>'), '<br />', 'Empty tag passed in as non-empty is normalized to empty format');
is( $l1->clean('<br class="foo" />'), '<br class="foo" />', 'Empty tag attribute is preserved');
is( $l1->clean('<p class="foo"></p>'), '<p class="foo"></p>', 'Non-empty tag attribute is preserved');
t/ruleset_minimal.t view on Meta::CPAN
rules => 'HTML::Laundry::Rules::Minimal' });
my @ok = qw( a b br blockquote code em i li ol p pre strong u ul );
my %ok = map { $_ => 1 } @ok;
my @e = (
'a', 'abbr', 'acronym', 'address', 'area', 'b', 'bdo', 'big', 'blockquote',
'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd',
'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd',
'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p',
'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead',
'tr', 'tt', 'u', 'ul', 'var', 'wbr'
);
foreach my $e ( @e ) {
if ( $ok{$e} and $e ne 'br' ) {
# The only allowed empty element in this ruleset is <br />
is( $l1->clean("<$e></$e>"), "<$e></$e>", "element $e is not sanitized");
} elsif ( $ok{$e} ) {
t/sanitize_default.t view on Meta::CPAN
'', 'DOCTYPE declaration is stripped');
is( $l1->clean('<p class="xyzzy" plugh="plover">Her situation in life, the character of her father and mother, her own person and disposition, were all equally against her.</p>'),
'<p class="xyzzy">Her situation in life, the character of her father and mother, her own person and disposition, were all equally against her.</p>',
'Unknown attribute is stripped, but known attribute remains' );
my @e = (
'a', 'abbr', 'acronym', 'address', 'area', 'b', 'bdo', 'big', 'blockquote',
'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd',
'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd',
'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p',
'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead',
'tr', 'tt', 'u', 'ul', 'var', 'wbr'
);
my %empty = (
area => 1,
br => 1,
col => 1,
hr => 1,
t/tidy_default.t view on Meta::CPAN
my $plaintext = 'She was the youngest of the two daughters of a most affectionate, indulgent father...';
is( $l->clean($plaintext), $plaintext, 'Short plain text passes through cleanly');
$plaintext = q{She had been a friend and companion such as few possessed: intelligent, well-informed, useful, gentle, knowing all the ways of the family, interested in all its concerns, and peculiarly interested in herself, in every pleasure, eve...
is( $l->clean($plaintext), $plaintext, 'Longer plain text passes through cleanly');
TODO: {
# HTML::Tidy 1.56 fixes unicode support
local $TODO = "HTML::Tidy version dependent. Install HTML::Tidy 1.56 or greater"
unless eval { HTML::Tidy->VERSION(1.56) };
my $kurosawa_chars = Encode::encode('UTF-8', q[Akira Kurosawa (KyÅ«jitai: 黿¾¤ æ, Shinjitai: 黿²¢ æ Kurosawa Akira, 23 March 1910 â 6 September 1998) was a legendary Japanese filmmaker, producer, screenwriter and editor]);
my $kurosawa_bytes = Encode::decode('UTF-8', $kurosawa_chars);
is( $l->clean($kurosawa_chars), $kurosawa_bytes, 'UTF-8 text passes through cleanly');
};
my $valid = q{<p>} . $plaintext . q{</p>};
is( $l->clean($valid), $valid, 'Validating HTML passes through cleanly');
TODO: {
local $TODO = "libtidy version dependent - figure out how to check";
is( $l->clean('<div></div>'), q{}, 'No-content elements are stripped...');
is( $l->clean('<div foo="bar"></div>'), q{<div id="foo"></div>}, '...unless they have attributes');
t/tidy_libxml.t view on Meta::CPAN
};
}
my $l = HTML::Laundry->new();
is( $l->{tidy_engine}, q{HTML::Tidy::libXML},
'Laundry uses HTML::Tidy::libXML as tidying engine if HTML::Tidy is unavailable'
);
my $plaintext = 'She was the youngest of the two daughters of a most affectionate, indulgent father...';
is( $l->clean($plaintext), $plaintext, 'Short plain text passes through cleanly');
$plaintext = q{She had been a friend and companion such as few possessed: intelligent, well-informed, useful, gentle, knowing all the ways of the family, interested in all its concerns, and peculiarly interested in herself, in every pleasure, eve...
is( $l->clean($plaintext), $plaintext, 'Longer plain text passes through cleanly');
my $kurosawa = q[Akira Kurosawa (KyÅ«jitai: 黿¾¤ æ, Shinjitai: 黿²¢ æ Kurosawa Akira, 23 March 1910 â 6 September 1998) was a legendary Japanese filmmaker, producer, screenwriter and editor];
is( $l->clean($kurosawa), $kurosawa, 'UTF-8 text passes through cleanly');
my $valid = q{<p>} . $plaintext . q{</p>};
is( $l->clean($valid), $valid, 'Validating HTML passes through cleanly');
is( $l->clean('<p></p>'), '<p/>', 'libXML collapses no-content paragraph tags to empty element');
is( $l->clean('<br />'), '<br/>', 'Empty tag passes through cleanly');
is( $l->clean('<br / >'), '<br/>', 'Empty tag with whitespace passes through cleanly');
is( $l->clean('<br></br>'), '<br/>', 'Empty tag passed in as non-empty is normalized to empty format');
is( $l->clean('<br class="foo" />'), '<br class="foo"/>', 'Empty tag attribute is preserved');
is( $l->clean('<p class="foo"></p>'), '<p class="foo"/>', 'Non-empty tag attribute is preserved');
# Actual tidying begins
( run in 1.374 second using v1.01-cache-2.11-cpan-49f99fa48dc )