HTML-Microformats

 view release on metacpan or  search on metacpan

lib/HTML/Microformats/Mixin/Parser.pm  view on Meta::CPAN

# destructive nature of this function, make sure that you only
# use it on a clone of the real node.
sub _destroyer
{
	my $self = shift;
	my $element = shift;
	
	# Classes to be destroyed
	my @containers = qw(mfo vcard adr geo vcalendar vevent vtodo valarm
		vfreebusy hfeed hentry hslice hreview hresume xfolkentry biota haudio
		hmeasure hangle hmoney hlisting vtodo-list figure hproduct hnews);
	my %C;
	foreach my $c (@containers) { $C{$c}=1; }
	
	# Some classes may be retained, optionally.
	foreach my $c (@_)          { $C{$c}=0; }

	# Assemble them all into the regular expression of death.
	@containers = ();
	foreach my $c (keys %C) { push @containers, $c if $C{$c}; }
	my $regexp = join '|', @containers;
	$regexp = "\\b($regexp)\\b";
	$regexp =~ s/\-/\\\-/g;
	
	# Destroy child elements matching the regular expression.
	foreach my $e ($element->getElementsByTagName('*'))
	{	
		next if $e == $element;
		
		if ($e->getAttribute('class') =~ /$regexp/)
		{
			$self->_destroy_element($e);
			my $newclass = $e->getAttribute('class');
			$newclass =~ s/$regexp//g;
			$e->setAttribute('class', $newclass);
			$e->removeAttribute('class') unless length $newclass;
		}
	}
}

sub _destroy_element
{
	my $self    = shift;
	my $element = shift;
	
	foreach my $c ($element->getElementsByTagName('*'))
	{
		$c->removeAttribute('class');
		$c->removeAttribute('rel');
		$c->removeAttribute('rev');
	}
}

sub _expand_patterns
{
	my $self = shift;
	my $root = shift || $self->element;
	my $max_include_loops = shift || 2;

	# Expand microformat include pattern.
	my $incl_iterations = 0;
	my $replacements = 1;
	while (($incl_iterations < $max_include_loops) && $replacements)
	{
		$replacements = $self->_expand_include_pattern($root) + $self->_expand_include_pattern_2($root);
		$incl_iterations++;
	}
	
	# Table cell headers pattern.
	$self->_expand_table_header_pattern($root);	

	# Magical data-X class pattern.
	$self->_expand_dataX_class_pattern($root);	
}

sub _expand_dataX_class_pattern
{
	my $self = shift;
	my $node = shift;

	return
		unless $self->context->has_profile('http://purl.org/uF/pattern-data-class/1');
		
	foreach my $kid ($node->getElementsByTagName('*'))
	{
		my $classes = $kid->getAttribute('class');
		$classes =~ s/(^\s+|\s+$)//g;
		$classes =~ s/\s+/ /g;
		my @classes = split / /, $classes;
		map s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg, @classes;
		my @dataClasses = grep /^data\-/, @classes;
		next unless (@dataClasses);
		
		my $val = '';
		foreach my $d (@dataClasses)
		{
			$val = $d unless ((length $val) > (length $d));
		}
		
		$val =~ s/^data\-//;
		$kid->setAttribute('data-cpan-html-microformats-content', $val);
	}
}

sub _expand_table_header_pattern
{
	my $self = shift;
	my $node = shift;
	
	# Add node itself to list!
	my @elements = $node->getElementsByTagName('td');
	if (('XML::LibXML::Element' eq ref $node) && $node->tagName =~ /^t[dh]$/i)
		{ unshift @elements, $node; }
		
	foreach my $tag (@elements)
	{
		next unless length $tag->getAttribute('headers');
		
		my $headers = $tag->getAttribute('headers');
		$headers =~ s/(^\s+|\s+$)//g;
		$headers =~ s/\s+/ /g;
		my @headers = split / /, $headers;
		
		foreach my $H (@headers)
		{
			my $Htag = searchID($H, $self->context->document);



( run in 1.013 second using v1.01-cache-2.11-cpan-71847e10f99 )