HTML-DOM

 view release on metacpan or  search on metacpan

lib/HTML/DOM/_TreeBuilder.pm  view on Meta::CPAN

        ## would ever leave any WS nodes in the tree.
        ## If not, then there's no reason to have eof() call
        ## delete_ignorable_whitespace on the tree, is there?
        ##
    #if(@to_close and $self->{'_tighten'} and !$self->{'_ignore_text'} and
    #  ! $to_close[-1]->is_inside('pre', keys %HTML::Tagset::isCDATA_Parent)
    #) {  # if tightenable
    #  my($children, $e_tag);
    #  foreach my $e (reverse @to_close) { # going top-down
    #    last if 'pre' eq ($e_tag = $e->{'_tag'}) or
    #     $HTML::Tagset::isCDATA_Parent{$e_tag};
    #
    #    if(
    #      $children = $e->{'_content'}
    #      and @$children      # has children
    #      and !ref($children->[-1])
    #      and $children->[-1] =~ m<^\s+$>s # last node is all-WS
    #      and
    #        (
    #         # has a tightable parent:
    #         $HTML::DOM::_TreeBuilder::canTighten{ $e_tag }
    #         or
    #          ( # has a tightenable left sibling:
    #            @$children > 1 and
    #            ref($children->[-2])
    #            and $HTML::DOM::_TreeBuilder::canTighten{ $children->[-2]{'_tag'} }
    #          )
    #        )
    #    ) {
    #      pop @$children;
    #      #print $indent, "Popping a terminal WS node from ", $e->{'_tag'},
    #      #  " (", $e->address, ") while exiting.\n" if DEBUG;
    #    }
    #  }
    #}

        foreach my $e (@to_close) {

            # Call the applicable callback, if any
            $ptag = $e->{'_tag'};
            &{         $self->{"_tweak_$ptag"}
                    || $self->{'_tweak_*'}
                    || next }( map $_, $e, $ptag, $self );
            print $indent, "Back from tweaking.\n" if DEBUG;
            last
                if $self->{ '_stunted'
                    };    # in case one of the handlers called stunt
        }
        return @to_close;
    }
}

#==========================================================================
{
    my ( $indent, $nugget );

    sub text {
        return if $_[0]{'_stunted'};

        # Accept a "here's a text token" signal from HTML::Parser.
        my ( $self, $text, $is_cdata ) = @_;

        # the >3.0 versions of Parser may pass a cdata node.
        # Thanks to Gisle Aas for pointing this out.

        return unless length $text;    # I guess that's always right

        my $ignore_text         = $self->{'_ignore_text'};
        my $no_space_compacting = $self->{'_no_space_compacting'};
        my $no_expand_entities  = $self->{'_no_expand_entities'};
        my $pos                 = $self->{'_pos'} || $self;

        HTML::Entities::decode($text)
            unless $ignore_text
                || $is_cdata
                || $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} }
                || $no_expand_entities;

        #my($indent, $nugget);
        if (DEBUG) {

           # optimization -- don't figure out depth unless we're in debug mode
            my @lineage_tags = $pos->lineage_tag_names;
            $indent = '  ' x ( 1 + @lineage_tags );

            $nugget
                = ( length($text) <= 25 )
                ? $text
                : ( substr( $text, 0, 25 ) . '...' );
            $nugget =~ s<([\x00-\x1F])>
                 <'\\x'.(unpack("H2",$1))>eg;
            print $indent, "Proposing a new text node ($nugget) under ",
                join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) )
                || 'Root',
                ".\n";

            #} else {
            #  $indent = ' ';
        }

        my $ptag;
        if ($HTML::Tagset::isCDATA_Parent{ $ptag = $pos->{'_tag'} }

            #or $pos->is_inside('pre')
            or $pos->is_inside( 'pre', 'textarea' )
            )
        {
            return if $ignore_text;
            $pos->push_content($text);
        }
        else {

            # return unless $text =~ /\S/;  # This is sometimes wrong

            if ( !$self->{'_implicit_tags'} || $text !~ /[^\n\r\f\t ]/ ) {

                # don't change anything
            }
            elsif ( $ptag eq 'head' or $ptag eq 'noframes' ) {
                if ( $self->{'_implicit_body_p_tag'} ) {
                    print $indent,
                        " * Text node under \U$ptag\E closes \U$ptag\E, implicates BODY and P.\n"
                        if DEBUG > 1;
                    $self->end( \$ptag );
                    $pos = $self->{'_body'}
                        ? ( $self->{'_pos'}
                            = $self->{'_body'} )    # expected case
                        : $self->insert_element( 'body', 1 );
                    $pos = $self->insert_element( 'p', 1 );
                }
                else {
                    print $indent,
                        " * Text node under \U$ptag\E closes, implicates BODY.\n"
                        if DEBUG > 1;
                    $self->end( \$ptag );



( run in 2.314 seconds using v1.01-cache-2.11-cpan-524268b4103 )