HTML-Laundry

 view release on metacpan or  search on metacpan

lib/HTML/Laundry.pm  view on Meta::CPAN

    my $p  = $self->{parser};
    my $cp = $self->{cdata_parser};
    $p->parse($chunk);
    if ( !$in_cdata && !$unacceptable_count ) {
        $p->eof();
    }
    if ( $in_cdata && !$local_unacceptable_count ) {
        $cp->eof();
    }
    my $output = $self->gen_output;
    $cp->eof();    # Clear buffer if we haven't already
    if ($cdata_dirty) {    # Overkill to get out of CDATA parser state
        $self->{parser} = HTML::Parser->new(
            api_version => 3,
            start_h =>
                [ sub { $self->_tag_start_handler(@_) }, 'tagname,attr' ],
            end_h => [ sub { $self->_tag_end_handler(@_) }, 'tagname,attr' ],
            text_h => [ sub { $self->_text_handler(@_) }, 'dtext,is_cdata' ],
            empty_element_tags => 1,
            marked_sections    => 1,
        );
    }
    else {
        $p->eof();         # Clear buffer if we haven't already
    }
    return $output;
}

=head2 base_uri

Used to get or set the base_uri property, used in URI rebasing.

    my $base_uri = $l->base_uri; # returns current base_uri
    $l->base_uri(q{http://example.com}); # return 'http://example.com'
    $l->base_uri(''); # unsets base_uri

=cut

sub base_uri {
    my ( $self, $new_base ) = @_;
    if ( defined $new_base and !ref $new_base ) {
        $self->{base_uri} = $new_base;
    }
    return $self->{base_uri};
}

sub _run_callbacks {
    my $self   = shift;
    my $action = shift;
    return unless $action;
    my $type = $action . q{_callback};
    for my $callback ( @{ $self->{$type} } ) {
        my $result = $callback->( $self, @_ );
        return unless $result;
    }
    return 1;
}

=head2 gen_output

Used to generate the final, XHTML output from the internal stack of text and 
tag tokens. Generally meant to be used internally, but potentially useful for
callbacks that require a snapshot of what the output would look like
before the cleaning process is complete.

    my $xhtml = $l->gen_output;

=cut

sub gen_output {
    my $self = shift;
    if ( !$self->_run_callbacks( q{output}, \@fragments ) ) {
        return q{};
    }
    my $output = join '', @fragments;
    if ( $self->{tidy} ) {
        if ( $self->{tidy_engine} eq q{HTML::Tidy} ) {
            $output = $self->{tidy}->clean($output);
            $self->{tidy}->clear_messages;
        }
        elsif ( $self->{tidy_engine} eq q{HTML::Tidy::libXML} ) {
            my $clean
                = $self->{tidy}
                ->clean( $self->{tidy_head} . $output . $self->{tidy_foot},
                'UTF-8', 1 );
            $output = substr( $clean, length $self->{tidy_head} );
            $output = substr( $output, 0, -1 * length $self->{tidy_foot} );
        }
    }
    if ( $self->{trim_trailing_whitespace} ) {
        $output =~ s/\s+$//;
    }
    return $output;
}

=head2 empty_elements

Returns a list of the Laundry object's known empty elements: elements such
as <img /> or <br /> which must not contain any children.

=cut

sub empty_elements {
    my ( $self, $listref ) = @_;
    if ($listref) {
        my @list = @{$listref};
        my %empty = map { ( $_, 1 ) } @list;
        $self->{empty_e} = \%empty;
    }
    return keys %{ $self->{empty_e} };
}

=head2 remove_empty_element

Removes an element (or, if given an array reference, multiple elements) from
the "empty elements" list maintained by the Laundry object.

    $l->remove_empty_element(['img', 'br']); # Let's break XHTML!
    
This will not affect the acceptable/unacceptable status of the elements.



( run in 2.507 seconds using v1.01-cache-2.11-cpan-411bb0df24b )