HTML-Untemplate

 view release on metacpan or  search on metacpan

lib/HTML/Linear.pm  view on Meta::CPAN

package HTML::Linear;
# ABSTRACT: represent HTML::Tree as a flat list
use strict;
use utf8;
use warnings qw(all);

use Digest::SHA qw(sha256);

use Moo;
use MooX::Types::MooseLike::Base qw(:all);
extends 'HTML::TreeBuilder';

use HTML::Linear::Element;
use HTML::Linear::Path;

## no critic (ProtectPrivateSubs, RequireFinalReturn)

our $VERSION = '0.019'; # VERSION


has _list       => (
    is          => 'ro',
    isa         => ArrayRef[InstanceOf('HTML::Linear::Element')],
    default     => sub { [] },
);

sub _add_element { push @{shift->_list}, shift }
sub as_list { @{shift->_list} }
sub count_elements { 0 + @{shift->_list} }
sub get_element { shift->_list->[shift] }


has _shrink => (
    is          => 'rwp',
    isa         => Bool,
    default     => sub { 0 },
);

sub set_shrink { shift->_set__shrink(1) }
sub unset_shrink { shift->_set__shrink(0) }


has _strict => (
    is          => 'rwp',
    isa         => Bool,
    default     => sub { 0 },
);

sub set_strict { shift->_set__strict(1) }
sub unset_strict { shift->_set__strict(0) }


has _uniq       => (is => 'ro', isa => HashRef[Str], default => sub { {} });


has _path_count => (is => 'ro', isa => HashRef[Str], default => sub { {} });


after eof => sub {
    my ($self) = @_;

    $self->deparse($self, []);

    if ($self->_shrink) {
        my %short;
        for my $elem ($self->as_list) {
            my @rpath = reverse $elem->as_xpath;
            my $i = 0;
            unless ($self->_strict) {
                for (; $i <= $#rpath; $i++) {
                    last if $elem->path->[-1 - $i]->is_groupable;
                }
            }
            for my $j ($i .. $#rpath) {
                my $key = sha256(join '' => @rpath[0 .. $j]);
                $short{$key}{offset} = $#rpath - $j;
                push @{$short{$key}{elem}}, $elem;
                ++$short{$key}{accumulator}{$elem->as_xpath};
            }
        }

        for my $key (sort { $short{$b}{offset} <=> $short{$a}{offset} } keys %short) {
            next if 1 < keys %{$short{$key}{accumulator}};
            for my $elem (@{$short{$key}{elem}}) {
                next if $elem->trim_at;
                $elem->trim_at($short{$key}{offset});
            }
        }
    }
};


sub add_element {
    my ($self, $elem) = @_;

    $elem->index($self->_path_count->{join ',', $elem->path}++);
    $elem->index_map($self->_uniq);

    $self->_add_element($elem);
}


sub deparse {
    my ($self, $node, $path) = @_;

    my $level = HTML::Linear::Path->new({
        address     => $node->address,
        attributes  => {
            map {
                m{^[_/]}x
                    ? ()
                    : (lc, $node->attr($_))
            } $node->all_attr_names
        },
        strict      => $self->_strict,
        tag         => $node->tag,



( run in 1.834 second using v1.01-cache-2.11-cpan-39bf76dae61 )