Data-TableReader-Decoder-HTML
view release on metacpan or search on metacpan
lib/Data/TableReader/Decoder/HTML.pm view on Meta::CPAN
}
sub Data::TableReader::Decoder::HTML::_Iter::dataset_idx {
${ shift->_fields->{table_i} }
}
sub Data::TableReader::Decoder::HTML::_Iter::progress {
my $f= shift->_fields;
return ! $f->{total_records}? 0
: (( $f->{table_record_ofs} + ${$f->{row_i}} ) / $f->{total_records});
}
sub Data::TableReader::Decoder::HTML::_Iter::tell {
my $f= shift->_fields;
return [ ${$f->{table_i}}, ${$f->{row_i}} ];
}
sub Data::TableReader::Decoder::HTML::_Iter::seek {
my ($self, $to)= @_;
my $f= $self->_fields;
${$f->{table_i}}= $to->[0];
${$f->{row_i}}= $to->[1];
${$f->{table}}= $f->{tables}[${$f->{table_i}}] || [];
# re-calculate table_record_ofs
my $t= 0; $t += @$_ for @{$f->{tables}}[0 .. $to->[1]-1];
$f->{table_record_ofs}= $t;
1;
}
sub Data::TableReader::Decoder::HTML::_Iter::next_dataset {
my $f= $_[0]->_fields;
return 0 if ${$f->{table_i}} >= $#{$f->{tables}};
$_[0]->seek([ ${$f->{table_i}}+1, 0 ]);
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Data::TableReader::Decoder::HTML - Access the tables of an HTML document
=head1 VERSION
version 0.020
=head1 DESCRIPTION
This decoder iterates the <TR> tags of the <TABLE>s of an HTML file.
=head1 METHODS
=head2 parse
Unfortunately, I'm not aware of any HTML parsers that properly parse a stream on demand rather
than using callbacks, so this module simply parses all the HTML up-front and iterates the perl
data structure. This would be a problem if you have more HTML than can fit into memory
comfortably. Buf if that's the case, you have bigger problems ;-)
This method is called automatically the first time you invoke the iterator. You might choose
to call it earlier in order to report errors better.
=head2 iterator
my $iterator= $decoder->iterator;
Return an L<iterator|Data::TableReader::Iterator> which returns each row of the table as an
arrayref. The iterator supports C<< $i->next_dataset >> to move to the next table element.
=head1 AUTHOR
Michael Conrad <mike@nrdvana.net>
=head1 CONTRIBUTOR
=for stopwords Christian Walde
Christian Walde <walde.christian@gmail.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2024 by Michael Conrad.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
( run in 0.985 second using v1.01-cache-2.11-cpan-fa01517f264 )