HTML-Laundry
view release on metacpan or search on metacpan
lib/HTML/Laundry.pm view on Meta::CPAN
my $p = $self->{parser};
my $cp = $self->{cdata_parser};
$p->parse($chunk);
if ( !$in_cdata && !$unacceptable_count ) {
$p->eof();
}
if ( $in_cdata && !$local_unacceptable_count ) {
$cp->eof();
}
my $output = $self->gen_output;
$cp->eof(); # Clear buffer if we haven't already
if ($cdata_dirty) { # Overkill to get out of CDATA parser state
$self->{parser} = HTML::Parser->new(
api_version => 3,
start_h =>
[ sub { $self->_tag_start_handler(@_) }, 'tagname,attr' ],
end_h => [ sub { $self->_tag_end_handler(@_) }, 'tagname,attr' ],
text_h => [ sub { $self->_text_handler(@_) }, 'dtext,is_cdata' ],
empty_element_tags => 1,
marked_sections => 1,
);
}
else {
$p->eof(); # Clear buffer if we haven't already
}
return $output;
}
=head2 base_uri
Used to get or set the base_uri property, used in URI rebasing.
my $base_uri = $l->base_uri; # returns current base_uri
$l->base_uri(q{http://example.com}); # return 'http://example.com'
$l->base_uri(''); # unsets base_uri
=cut
sub base_uri {
my ( $self, $new_base ) = @_;
if ( defined $new_base and !ref $new_base ) {
$self->{base_uri} = $new_base;
}
return $self->{base_uri};
}
sub _run_callbacks {
my $self = shift;
my $action = shift;
return unless $action;
my $type = $action . q{_callback};
for my $callback ( @{ $self->{$type} } ) {
my $result = $callback->( $self, @_ );
return unless $result;
}
return 1;
}
=head2 gen_output
Used to generate the final, XHTML output from the internal stack of text and
tag tokens. Generally meant to be used internally, but potentially useful for
callbacks that require a snapshot of what the output would look like
before the cleaning process is complete.
my $xhtml = $l->gen_output;
=cut
sub gen_output {
my $self = shift;
if ( !$self->_run_callbacks( q{output}, \@fragments ) ) {
return q{};
}
my $output = join '', @fragments;
if ( $self->{tidy} ) {
if ( $self->{tidy_engine} eq q{HTML::Tidy} ) {
$output = $self->{tidy}->clean($output);
$self->{tidy}->clear_messages;
}
elsif ( $self->{tidy_engine} eq q{HTML::Tidy::libXML} ) {
my $clean
= $self->{tidy}
->clean( $self->{tidy_head} . $output . $self->{tidy_foot},
'UTF-8', 1 );
$output = substr( $clean, length $self->{tidy_head} );
$output = substr( $output, 0, -1 * length $self->{tidy_foot} );
}
}
if ( $self->{trim_trailing_whitespace} ) {
$output =~ s/\s+$//;
}
return $output;
}
=head2 empty_elements
Returns a list of the Laundry object's known empty elements: elements such
as <img /> or <br /> which must not contain any children.
=cut
sub empty_elements {
my ( $self, $listref ) = @_;
if ($listref) {
my @list = @{$listref};
my %empty = map { ( $_, 1 ) } @list;
$self->{empty_e} = \%empty;
}
return keys %{ $self->{empty_e} };
}
=head2 remove_empty_element
Removes an element (or, if given an array reference, multiple elements) from
the "empty elements" list maintained by the Laundry object.
$l->remove_empty_element(['img', 'br']); # Let's break XHTML!
This will not affect the acceptable/unacceptable status of the elements.
( run in 2.507 seconds using v1.01-cache-2.11-cpan-411bb0df24b )