HTML-DOM
view release on metacpan or search on metacpan
lib/HTML/DOM.pm view on Meta::CPAN
if(
$tag eq 'link'
) {
HTML'DOM'Element'Link'_reset_style_sheet(
$elem
);
}
# If a form is being closed, determine
# whether it is closed implicitly and set
# the current form and magic form
# accordingly.
if($tag eq 'form') {
pop
@{$$doc_elem{_HTML_DOM_cf}||[]};
delete $$doc_elem{_HTML_DOM_etif}
or $$doc_elem{_HTML_DOM_mg_f}
= $elem
}
# If a formie is being closed, create a
# magic association where appropriate.
if(!$$doc_elem{_HTML_DOM_no_mg}
and $tag =~ /^(?:
button|(?:
fieldse|inpu|(?:obj|sel)ec
)t|label|textarea
)\z/x
and $$doc_elem{_HTML_DOM_mg_f}
and !$$doc_elem{_HTML_DOM_cf}
||!@{$$doc_elem{_HTML_DOM_cf}}) {
$elem->form(
$$doc_elem{_HTML_DOM_mg_f}
);
$doc_elem->ownerDocument->
magic_forms(1);
}
my $event_offsets = delete
$elem->{_HTML_DOM_tb_event_offsets}
or return;
_create_events(
$doc_elem, $elem, $event_offsets
);
},
))
->ignore_ignorable_whitespace(0); # stop eof()'s cleanup
$tb->store_comments(1); # from changing an
$tb->unbroken_text(1); # necessary, con- # elem_han-
# sidering what # dler's view
# _tweak_~text does # of the tree
# Web browsers preserve whitespace, at least from the point
# of view of the DOM; but the main reason we are using this
# option is that a parser for innerHTML doesnât know
# whether the nodes will be inserted in a <pre>.
no_space_compacting $tb 1;
$tb->handler(text => "text", # so we can get line
"self, text, is_cdata, offset"); # numbers for scripts
$tb->handler(start => "start",
"self, tagname, attr, attrseq, offset, tokenpos");
$tb->handler((declaration=>)x2,'self,tagname,tokens,text');
$tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'};
my %opts = @_;
$tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms};
# used by an elementâs innerHTML
# We have to copy it like this, because our circular ref-
# erence is thus: $tb -> object -> closure -> $tb
# We canât weaken $tb without a copy of it, because it is
# the only reference to the object.
my $life_raft = $tb; weaken $tb; $tb;
}
sub start {
return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit
my $tokenpos = pop;
my $offset = pop;
my %event_offsets;
my $attr_names = pop;
for(0..$#$attr_names) {
$$attr_names[$_] =~ /^on(.*)/is
and $event_offsets{$1} =
$$tokenpos[$_*4 + 4] + $offset;
}
my $elem = (my $self = shift)->SUPER::start(@_);
$_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] },
$elem;
return $elem unless %event_offsets;
if(!$HTML::Tagset::emptyElement{$_[0]}) { # container
$$elem{_HTML_DOM_tb_event_offsets} =
\%event_offsets;
} else {
_create_events(
$self,
$elem,
\%event_offsets,
);
}
return $elem;
}
sub _create_events {
my ($doc_elem,$elem,$event_offsets) = @_;
defined(my $event_attr_handler =
$doc_elem->ownerDocument->event_attr_handler)
or return;
for(keys %$event_offsets) {
my $l =
&$event_attr_handler(
$elem,
lib/HTML/DOM.pm view on Meta::CPAN
# avoid having HTML::Parser read it again, even if we could
# use binmode.
require Encode;
$_->write(Encode::decode($encoding, $contents)), $_->close,
$_->{_HTML_DOM_cs} = $encoding
for shift;
return 1;
}
sub charset {
my $old = (my$ self = shift)->{_HTML_DOM_cs};
$self->{_HTML_DOM_cs} = shift if @_;
$old;
}
sub write {
my $self = shift;
if($$self{_HTML_DOM_buffered}) {
# Although we call this buffered, itâs actually not. Before
# version 0.040, a recursive call to ->write on the same
# doc object would simply record the HTML code in a buffer
# that was processed when the elem handler that made the
# inner call to ->write finished. Every elem handler would
# have a wrapper (created in the elem_handler sub above)
# that took care of this after calling the handler, by cre-
# ating a new, temporary, parser object that would call the
# start/end, etc., methods of our tree builder.
#
# This approach stops JS code like this from working (yes,
# there *are* websites with code like this!):
# document.write("<img id=img1>")
# document.getElementById("img1").src="..."
#
# So, now we take care of creating a new parser immedi-
# ately. This does mean, however that we end up with mul-
# tiple parser objects floating around in the case of
# nested <scripts>. So we have to be careful to create and
# delete them at the right time.
# $$self{_HTML_DOM_buffered} actually contains a number
# indicating the number of nested calls to ->write.
my $level = $$self{_HTML_DOM_buffered};
local $$self{_HTML_DOM_buffered} = $level + 1;
my($doc_elem) = $$self{_HTML_DOM_parser};
# These handlers delegate the handling to methods of
# *another* HTML::Parser object.
my $p = $$self{_HTML_DOM_p}[$level-1] ||=
HTML::Parser->new(
start_h => [
sub { $doc_elem->start(@_) },
'tagname, attr, attrseq'
],
end_h => [
sub { $doc_elem->end(@_) },
'tagname, text'
],
text_h => [
sub { $doc_elem->text(@_) },
'text, is_cdata'
],
);
$p->unbroken_text(1); # push_content, which is called by
# H:TB:text, won't concatenate two
# text portions if the first one
# is a node.
$p->parse(shift);
# We canât get rid of our parser at this point, as a subse-
# quent ->write call from the same nested level (e.g., from
# the same <script> block) will need the same one, in case
# what we are parsing ends with a partial token. But if the
# calling elem handler finishes (e.g., if we reach a
# </script>), then we need to remove it, so we have
# elem_handler do that for us.
}
else {
my $parser
= $$self{_HTML_DOM_parser}
|| ($self->open, $$self{_HTML_DOM_parser});
local $$self{_HTML_DOM_buffered} = 1;
$parser->parse($_) for @_;
}
$self->_modified;
return # nothing;
}
sub writeln { shift->write(@_,"\n") }
sub close {
my $a = (my $self = shift)->{_HTML_DOM_parser};
return unless $a;
# We canât use eval { $a->eof } because that would catch errors
# that are meant to propagate (a nasty bug [the so-called
# âcontentâoffsetâ bug] was hidden because of an eval in ver-
# sion 0.010).
# return unless $a->can('eof');
$a->eof(@_);
delete $$self{_HTML_DOM_parser};
$a->elementify;
return # nothing;
}
sub open {
(my $self = shift)->detach_content;
# We have to use push_content instead of simply putting it there
# ourselves, because push_content takes care of weakening the
# parent (and that code doesnât belong in this package).
$self->push_content(
my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML
);
delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>};
return unless $self->{_HTML_DOM_elem_handlers};
( run in 0.839 second using v1.01-cache-2.11-cpan-524268b4103 )