HTML-Microformats
view release on metacpan or search on metacpan
lib/HTML/Microformats/Mixin/Parser.pm view on Meta::CPAN
# destructive nature of this function, make sure that you only
# use it on a clone of the real node.
sub _destroyer
{
my $self = shift;
my $element = shift;
# Classes to be destroyed
my @containers = qw(mfo vcard adr geo vcalendar vevent vtodo valarm
vfreebusy hfeed hentry hslice hreview hresume xfolkentry biota haudio
hmeasure hangle hmoney hlisting vtodo-list figure hproduct hnews);
my %C;
foreach my $c (@containers) { $C{$c}=1; }
# Some classes may be retained, optionally.
foreach my $c (@_) { $C{$c}=0; }
# Assemble them all into the regular expression of death.
@containers = ();
foreach my $c (keys %C) { push @containers, $c if $C{$c}; }
my $regexp = join '|', @containers;
$regexp = "\\b($regexp)\\b";
$regexp =~ s/\-/\\\-/g;
# Destroy child elements matching the regular expression.
foreach my $e ($element->getElementsByTagName('*'))
{
next if $e == $element;
if ($e->getAttribute('class') =~ /$regexp/)
{
$self->_destroy_element($e);
my $newclass = $e->getAttribute('class');
$newclass =~ s/$regexp//g;
$e->setAttribute('class', $newclass);
$e->removeAttribute('class') unless length $newclass;
}
}
}
sub _destroy_element
{
my $self = shift;
my $element = shift;
foreach my $c ($element->getElementsByTagName('*'))
{
$c->removeAttribute('class');
$c->removeAttribute('rel');
$c->removeAttribute('rev');
}
}
sub _expand_patterns
{
my $self = shift;
my $root = shift || $self->element;
my $max_include_loops = shift || 2;
# Expand microformat include pattern.
my $incl_iterations = 0;
my $replacements = 1;
while (($incl_iterations < $max_include_loops) && $replacements)
{
$replacements = $self->_expand_include_pattern($root) + $self->_expand_include_pattern_2($root);
$incl_iterations++;
}
# Table cell headers pattern.
$self->_expand_table_header_pattern($root);
# Magical data-X class pattern.
$self->_expand_dataX_class_pattern($root);
}
sub _expand_dataX_class_pattern
{
my $self = shift;
my $node = shift;
return
unless $self->context->has_profile('http://purl.org/uF/pattern-data-class/1');
foreach my $kid ($node->getElementsByTagName('*'))
{
my $classes = $kid->getAttribute('class');
$classes =~ s/(^\s+|\s+$)//g;
$classes =~ s/\s+/ /g;
my @classes = split / /, $classes;
map s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg, @classes;
my @dataClasses = grep /^data\-/, @classes;
next unless (@dataClasses);
my $val = '';
foreach my $d (@dataClasses)
{
$val = $d unless ((length $val) > (length $d));
}
$val =~ s/^data\-//;
$kid->setAttribute('data-cpan-html-microformats-content', $val);
}
}
sub _expand_table_header_pattern
{
my $self = shift;
my $node = shift;
# Add node itself to list!
my @elements = $node->getElementsByTagName('td');
if (('XML::LibXML::Element' eq ref $node) && $node->tagName =~ /^t[dh]$/i)
{ unshift @elements, $node; }
foreach my $tag (@elements)
{
next unless length $tag->getAttribute('headers');
my $headers = $tag->getAttribute('headers');
$headers =~ s/(^\s+|\s+$)//g;
$headers =~ s/\s+/ /g;
my @headers = split / /, $headers;
foreach my $H (@headers)
{
my $Htag = searchID($H, $self->context->document);
( run in 1.013 second using v1.01-cache-2.11-cpan-71847e10f99 )