HTML-Untemplate
view release on metacpan or search on metacpan
lib/HTML/Linear.pm view on Meta::CPAN
package HTML::Linear;
# ABSTRACT: represent HTML::Tree as a flat list
use strict;
use utf8;
use warnings qw(all);
use Digest::SHA qw(sha256);
use Moo;
use MooX::Types::MooseLike::Base qw(:all);
extends 'HTML::TreeBuilder';
use HTML::Linear::Element;
use HTML::Linear::Path;
## no critic (ProtectPrivateSubs, RequireFinalReturn)
our $VERSION = '0.019'; # VERSION
has _list => (
is => 'ro',
isa => ArrayRef[InstanceOf('HTML::Linear::Element')],
default => sub { [] },
);
sub _add_element { push @{shift->_list}, shift }
sub as_list { @{shift->_list} }
sub count_elements { 0 + @{shift->_list} }
sub get_element { shift->_list->[shift] }
has _shrink => (
is => 'rwp',
isa => Bool,
default => sub { 0 },
);
sub set_shrink { shift->_set__shrink(1) }
sub unset_shrink { shift->_set__shrink(0) }
has _strict => (
is => 'rwp',
isa => Bool,
default => sub { 0 },
);
sub set_strict { shift->_set__strict(1) }
sub unset_strict { shift->_set__strict(0) }
has _uniq => (is => 'ro', isa => HashRef[Str], default => sub { {} });
has _path_count => (is => 'ro', isa => HashRef[Str], default => sub { {} });
after eof => sub {
my ($self) = @_;
$self->deparse($self, []);
if ($self->_shrink) {
my %short;
for my $elem ($self->as_list) {
my @rpath = reverse $elem->as_xpath;
my $i = 0;
unless ($self->_strict) {
for (; $i <= $#rpath; $i++) {
last if $elem->path->[-1 - $i]->is_groupable;
}
}
for my $j ($i .. $#rpath) {
my $key = sha256(join '' => @rpath[0 .. $j]);
$short{$key}{offset} = $#rpath - $j;
push @{$short{$key}{elem}}, $elem;
++$short{$key}{accumulator}{$elem->as_xpath};
}
}
for my $key (sort { $short{$b}{offset} <=> $short{$a}{offset} } keys %short) {
next if 1 < keys %{$short{$key}{accumulator}};
for my $elem (@{$short{$key}{elem}}) {
next if $elem->trim_at;
$elem->trim_at($short{$key}{offset});
}
}
}
};
sub add_element {
my ($self, $elem) = @_;
$elem->index($self->_path_count->{join ',', $elem->path}++);
$elem->index_map($self->_uniq);
$self->_add_element($elem);
}
sub deparse {
my ($self, $node, $path) = @_;
my $level = HTML::Linear::Path->new({
address => $node->address,
attributes => {
map {
m{^[_/]}x
? ()
: (lc, $node->attr($_))
} $node->all_attr_names
},
strict => $self->_strict,
tag => $node->tag,
( run in 1.834 second using v1.01-cache-2.11-cpan-39bf76dae61 )