HTML-DOM

 view release on metacpan or  search on metacpan

lib/HTML/DOM.pm  view on Meta::CPAN


				if(
				 $tag eq 'link'
				) {
				 HTML'DOM'Element'Link'_reset_style_sheet(
				  $elem
				 );
				}

				# If a  form  is  being  closed,  determine
				# whether it is closed implicitly and set
				# the  current  form  and  magic  form
				# accordingly.
				if($tag eq 'form') {
					pop
					 @{$$doc_elem{_HTML_DOM_cf}||[]};
				 	delete $$doc_elem{_HTML_DOM_etif}
					 or $$doc_elem{_HTML_DOM_mg_f}
					  = $elem
				}

				# If a formie is being closed, create a
				# magic association where appropriate.
				if(!$$doc_elem{_HTML_DOM_no_mg}
				   and $tag =~ /^(?:
				    button|(?:
				     fieldse|inpu|(?:obj|sel)ec
				    )t|label|textarea
				   )\z/x
				   and $$doc_elem{_HTML_DOM_mg_f}
				   and  !$$doc_elem{_HTML_DOM_cf}
				      ||!@{$$doc_elem{_HTML_DOM_cf}}) {
					$elem->form(
					 $$doc_elem{_HTML_DOM_mg_f}
					);
					$doc_elem->ownerDocument->
					 magic_forms(1);
				}

				my $event_offsets = delete
				    $elem->{_HTML_DOM_tb_event_offsets}
				  or return;
				_create_events(
					$doc_elem, $elem, $event_offsets
				);
			 },
		 ))
		   ->ignore_ignorable_whitespace(0); # stop eof()'s cleanup
		$tb->store_comments(1);                # from changing an
		$tb->unbroken_text(1); # necessary, con-  # elem_han-
		                     # sidering what        # dler's view
		                   # _tweak_~text does       # of the tree

		# Web browsers preserve whitespace, at least from the point
		# of view of the DOM; but the main reason we are using this
		# option is that  a  parser  for  innerHTML  doesn’t  know
		# whether the nodes will be inserted in a <pre>.
		no_space_compacting $tb 1;

		$tb->handler(text => "text",         # so we can get line
		    "self, text, is_cdata, offset"); # numbers for scripts
		$tb->handler(start => "start",
		  "self, tagname, attr, attrseq, offset, tokenpos");
		$tb->handler((declaration=>)x2,'self,tagname,tokens,text');

		$tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'};

		my %opts = @_;
		$tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms};
		  # used by an element’s innerHTML

		# We have to copy it like this, because our circular ref-
		# erence is thus:  $tb  ->  object  ->  closure  ->  $tb
		# We can’t weaken $tb without a copy of it, because it is
		# the only reference to the object.
		my $life_raft = $tb; weaken $tb; $tb;
	}

	sub start {
		return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit
		
		my $tokenpos = pop;
		my $offset = pop;
		my %event_offsets;
		my $attr_names = pop;
		for(0..$#$attr_names) {
			$$attr_names[$_] =~ /^on(.*)/is
				and $event_offsets{$1} =
					$$tokenpos[$_*4 + 4] + $offset;
		}

		my $elem = (my $self = shift)->SUPER::start(@_);
		
		$_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] },
		 $elem;

		return $elem unless %event_offsets;

		if(!$HTML::Tagset::emptyElement{$_[0]}) { # container
			$$elem{_HTML_DOM_tb_event_offsets} =
				\%event_offsets;
		} else {
			_create_events(
				$self,
				$elem,
				\%event_offsets,
			);
		}

		return $elem;
	}

	sub _create_events {
		my ($doc_elem,$elem,$event_offsets) = @_;
		defined(my $event_attr_handler =
		  $doc_elem->ownerDocument->event_attr_handler)
		  or return;
		for(keys %$event_offsets) {
			my $l =
			&$event_attr_handler(
				$elem,

lib/HTML/DOM.pm  view on Meta::CPAN

	# avoid having HTML::Parser read it again, even if we could
	# use binmode.
	require Encode;
	$_->write(Encode::decode($encoding, $contents)), $_->close,
	$_->{_HTML_DOM_cs} = $encoding
		for shift;
	return 1;
}

sub charset {
	my $old = (my$ self = shift)->{_HTML_DOM_cs};
	$self->{_HTML_DOM_cs} = shift if @_;
	$old;
}

sub write {
	my $self = shift;
	if($$self{_HTML_DOM_buffered}) {
		# Although we call this buffered, it’s actually not. Before
		# version 0.040,  a recursive call to ->write  on the same
		# doc object would simply record the HTML code in a buffer
		# that was processed when the elem handler that  made  the
		# inner call to ->write finished. Every elem handler would
		# have a wrapper  (created in the elem_handler  sub above)
		# that took care of this after calling the handler, by cre-
		# ating a new, temporary, parser object that would call the
		# start/end, etc., methods of our tree builder.
		#
		# This approach stops JS code like this from working (yes,
		# there *are* websites with code like this!):
		#   document.write("<img id=img1>")
		#   document.getElementById("img1").src="..."
		#
		# So, now we take care of creating a new parser immedi-
		# ately. This does mean, however that we end up with mul-
		# tiple parser objects floating around  in  the  case  of
		# nested <scripts>. So we have to be careful to create and
		# delete them at the right time.

		# $$self{_HTML_DOM_buffered} actually contains a number
		# indicating the number of nested calls to ->write.
		my $level = $$self{_HTML_DOM_buffered};
		local $$self{_HTML_DOM_buffered} = $level + 1;

		my($doc_elem) = $$self{_HTML_DOM_parser};

		# These handlers delegate the handling to methods of
		# *another* HTML::Parser object.
		my $p = $$self{_HTML_DOM_p}[$level-1] ||=
		 HTML::Parser->new(
		  start_h => [ 
		    sub { $doc_elem->start(@_) },
		   'tagname, attr, attrseq'
		  ],
		  end_h => [ 
		    sub { $doc_elem->end(@_) },
		   'tagname, text'
		  ],
		  text_h => [ 
		    sub { $doc_elem->text(@_) },
		   'text, is_cdata'
		  ],
		 );

		$p->unbroken_text(1); # push_content, which is called by
		                     # H:TB:text, won't concatenate two
		                   # text portions if the  first  one
		                  # is a node.

		$p->parse(shift);

		# We can’t get rid of our parser at this point, as a subse-
		# quent ->write call from the same nested level (e.g., from
		# the same <script> block) will need the same one,  in case
		# what we are parsing ends with a partial token. But if the
		# calling elem handler  finishes  (e.g.,  if  we  reach  a
		# </script>),  then we need to remove  it,  so  we  have
		# elem_handler do that for us.
	}
	else {
		my $parser
		 = $$self{_HTML_DOM_parser}
		   || ($self->open, $$self{_HTML_DOM_parser});
		local $$self{_HTML_DOM_buffered} = 1;
		$parser->parse($_) for @_;
	}
	$self->_modified;
	return # nothing;
}

sub writeln { shift->write(@_,"\n") }

sub close {
	my $a = (my $self = shift)->{_HTML_DOM_parser};
	return unless $a;

	# We can’t use eval { $a->eof } because that would catch errors
	# that are meant to propagate  (a  nasty  bug  [the  so-called
	# ‘content—offset’ bug] was hidden because of an eval in ver-
	#  sion 0.010).
#	return unless $a->can('eof');
	                             
	$a->eof(@_);
	delete $$self{_HTML_DOM_parser};
	$a->elementify;
	return # nothing;
}

sub open {
	(my $self = shift)->detach_content;

	# We have to use push_content instead of simply putting it there
	# ourselves,  because push_content  takes care of weakening the
	# parent  (and that code  doesn’t  belong  in  this  package).
	$self->push_content(
	 my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML
	);

	delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>};

	return unless $self->{_HTML_DOM_elem_handlers};



( run in 0.839 second using v1.01-cache-2.11-cpan-524268b4103 )