Data-TableReader-Decoder-HTML

 view release on metacpan or  search on metacpan

lib/Data/TableReader/Decoder/HTML.pm  view on Meta::CPAN

}

sub Data::TableReader::Decoder::HTML::_Iter::dataset_idx {
	${ shift->_fields->{table_i} }
}

sub Data::TableReader::Decoder::HTML::_Iter::progress {
	my $f= shift->_fields;
	return ! $f->{total_records}? 0
		: (( $f->{table_record_ofs} + ${$f->{row_i}} ) / $f->{total_records});
}

sub Data::TableReader::Decoder::HTML::_Iter::tell {
	my $f= shift->_fields;
	return [ ${$f->{table_i}}, ${$f->{row_i}} ];
}

sub Data::TableReader::Decoder::HTML::_Iter::seek {
	my ($self, $to)= @_;
	my $f= $self->_fields;
	${$f->{table_i}}= $to->[0];
	${$f->{row_i}}= $to->[1];
	${$f->{table}}= $f->{tables}[${$f->{table_i}}] || [];
	# re-calculate table_record_ofs
	my $t= 0; $t += @$_ for @{$f->{tables}}[0 .. $to->[1]-1];
	$f->{table_record_ofs}= $t;
	1;
}

sub Data::TableReader::Decoder::HTML::_Iter::next_dataset {
	my $f= $_[0]->_fields;
	return 0 if ${$f->{table_i}} >= $#{$f->{tables}};
	$_[0]->seek([ ${$f->{table_i}}+1, 0 ]);
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Data::TableReader::Decoder::HTML - Access the tables of an HTML document

=head1 VERSION

version 0.020

=head1 DESCRIPTION

This decoder iterates the <TR> tags of the <TABLE>s of an HTML file.

=head1 METHODS

=head2 parse

Unfortunately, I'm not aware of any HTML parsers that properly parse a stream on demand rather
than using callbacks, so this module simply parses all the HTML up-front and iterates the perl
data structure.  This would be a problem if you have more HTML than can fit into memory
comfortably.  Buf if that's the case, you have bigger problems ;-)

This method is called automatically the first time you invoke the iterator.  You might choose
to call it earlier in order to report errors better.

=head2 iterator

  my $iterator= $decoder->iterator;

Return an L<iterator|Data::TableReader::Iterator> which returns each row of the table as an
arrayref.  The iterator supports C<< $i->next_dataset >> to move to the next table element.

=head1 AUTHOR

Michael Conrad <mike@nrdvana.net>

=head1 CONTRIBUTOR

=for stopwords Christian Walde

Christian Walde <walde.christian@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2024 by Michael Conrad.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut



( run in 0.985 second using v1.01-cache-2.11-cpan-fa01517f264 )