HTML-DOM
view release on metacpan or search on metacpan
lib/HTML/DOM/_TreeBuilder.pm view on Meta::CPAN
## would ever leave any WS nodes in the tree.
## If not, then there's no reason to have eof() call
## delete_ignorable_whitespace on the tree, is there?
##
#if(@to_close and $self->{'_tighten'} and !$self->{'_ignore_text'} and
# ! $to_close[-1]->is_inside('pre', keys %HTML::Tagset::isCDATA_Parent)
#) { # if tightenable
# my($children, $e_tag);
# foreach my $e (reverse @to_close) { # going top-down
# last if 'pre' eq ($e_tag = $e->{'_tag'}) or
# $HTML::Tagset::isCDATA_Parent{$e_tag};
#
# if(
# $children = $e->{'_content'}
# and @$children # has children
# and !ref($children->[-1])
# and $children->[-1] =~ m<^\s+$>s # last node is all-WS
# and
# (
# # has a tightable parent:
# $HTML::DOM::_TreeBuilder::canTighten{ $e_tag }
# or
# ( # has a tightenable left sibling:
# @$children > 1 and
# ref($children->[-2])
# and $HTML::DOM::_TreeBuilder::canTighten{ $children->[-2]{'_tag'} }
# )
# )
# ) {
# pop @$children;
# #print $indent, "Popping a terminal WS node from ", $e->{'_tag'},
# # " (", $e->address, ") while exiting.\n" if DEBUG;
# }
# }
#}
foreach my $e (@to_close) {
# Call the applicable callback, if any
$ptag = $e->{'_tag'};
&{ $self->{"_tweak_$ptag"}
|| $self->{'_tweak_*'}
|| next }( map $_, $e, $ptag, $self );
print $indent, "Back from tweaking.\n" if DEBUG;
last
if $self->{ '_stunted'
}; # in case one of the handlers called stunt
}
return @to_close;
}
}
#==========================================================================
{
my ( $indent, $nugget );
sub text {
return if $_[0]{'_stunted'};
# Accept a "here's a text token" signal from HTML::Parser.
my ( $self, $text, $is_cdata ) = @_;
# the >3.0 versions of Parser may pass a cdata node.
# Thanks to Gisle Aas for pointing this out.
return unless length $text; # I guess that's always right
my $ignore_text = $self->{'_ignore_text'};
my $no_space_compacting = $self->{'_no_space_compacting'};
my $no_expand_entities = $self->{'_no_expand_entities'};
my $pos = $self->{'_pos'} || $self;
HTML::Entities::decode($text)
unless $ignore_text
|| $is_cdata
|| $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} }
|| $no_expand_entities;
#my($indent, $nugget);
if (DEBUG) {
# optimization -- don't figure out depth unless we're in debug mode
my @lineage_tags = $pos->lineage_tag_names;
$indent = ' ' x ( 1 + @lineage_tags );
$nugget
= ( length($text) <= 25 )
? $text
: ( substr( $text, 0, 25 ) . '...' );
$nugget =~ s<([\x00-\x1F])>
<'\\x'.(unpack("H2",$1))>eg;
print $indent, "Proposing a new text node ($nugget) under ",
join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) )
|| 'Root',
".\n";
#} else {
# $indent = ' ';
}
my $ptag;
if ($HTML::Tagset::isCDATA_Parent{ $ptag = $pos->{'_tag'} }
#or $pos->is_inside('pre')
or $pos->is_inside( 'pre', 'textarea' )
)
{
return if $ignore_text;
$pos->push_content($text);
}
else {
# return unless $text =~ /\S/; # This is sometimes wrong
if ( !$self->{'_implicit_tags'} || $text !~ /[^\n\r\f\t ]/ ) {
# don't change anything
}
elsif ( $ptag eq 'head' or $ptag eq 'noframes' ) {
if ( $self->{'_implicit_body_p_tag'} ) {
print $indent,
" * Text node under \U$ptag\E closes \U$ptag\E, implicates BODY and P.\n"
if DEBUG > 1;
$self->end( \$ptag );
$pos = $self->{'_body'}
? ( $self->{'_pos'}
= $self->{'_body'} ) # expected case
: $self->insert_element( 'body', 1 );
$pos = $self->insert_element( 'p', 1 );
}
else {
print $indent,
" * Text node under \U$ptag\E closes, implicates BODY.\n"
if DEBUG > 1;
$self->end( \$ptag );
( run in 2.314 seconds using v1.01-cache-2.11-cpan-524268b4103 )