Excel-ValueReader-XLSX
view release on metacpan or search on metacpan
lib/Excel/ValueReader/XLSX/Backend/LibXML.pm view on Meta::CPAN
package Excel::ValueReader::XLSX::Backend::LibXML;
use utf8;
use 5.12.1;
use Moose;
use Scalar::Util qw/looks_like_number/;
use XML::LibXML::Reader qw/XML_READER_TYPE_END_ELEMENT/;
use Iterator::Simple qw/iter/;
extends 'Excel::ValueReader::XLSX::Backend';
#======================================================================
# LAZY ATTRIBUTE CONSTRUCTORS
#======================================================================
sub _strings {
my $self = shift;
my $reader = $self->_xml_reader_for_zip_member('xl/sharedStrings.xml');
my @strings;
my $last_string;
NODE:
while ($reader->read) {
next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT;
my $node_name = $reader->name;
if ($node_name eq 'si') {
push @strings, $last_string if defined $last_string;
$last_string = '';
}
elsif ($node_name eq '#text') {
$last_string .= $reader->value;
}
}
push @strings, $last_string if defined $last_string;
return \@strings;
}
sub _workbook_data {
my $self = shift;
my %workbook_data = (sheets => {}, base_year => 1900);
my $sheet_id = 1;
my $reader = $self->_xml_reader_for_zip_member('xl/workbook.xml');
NODE:
while ($reader->read) {
next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT;
if ($reader->name eq 'sheet') {
my $name = $reader->getAttribute('name')
or die "sheet node without name";
$workbook_data{sheets}{$name} = $sheet_id++;
}
elsif ($reader->name eq 'workbookPr' and my $date_attr = $reader->getAttribute('date1904')) {
$workbook_data{base_year} = 1904 if $date_attr eq '1' or $date_attr eq 'true'; # this workbook uses the 1904 calendar
}
elsif ($reader->name eq 'workbookView' and my $active_attr = $reader->getAttribute('activeTab')) {
$workbook_data{active_sheet} = $active_attr + 1 if defined $active_attr;
}
}
return \%workbook_data;
}
sub _date_styles {
my $self = shift;
state $date_style_regex = qr{[dy]|\bmm\b};
my @date_styles;
# read from the styles.xml zip member
my $xml_reader = $self->_xml_reader_for_zip_member('xl/styles.xml');
# start with Excel builtin number formats for dates and times
my @numFmt = $self->Excel_builtin_date_formats;
my $expected_subnode = undef;
# add other date formats explicitly specified in this workbook
NODE:
while ($xml_reader->read) {
next NODE if $xml_reader->nodeType == XML_READER_TYPE_END_ELEMENT;
# special treatment for some specific subtrees -- see 'numFmt' and 'xf' below
if ($expected_subnode) {
my ($name, $depth, $handler) = @$expected_subnode;
if ($xml_reader->name eq $name && $xml_reader->depth == $depth) {
# process that subnode and go to the next node
$handler->();
next NODE;
}
elsif ($xml_reader->depth < $depth) {
# finished handling subnodes; back to regular node treatment
$expected_subnode = undef;
}
}
# regular node treatement
if ($xml_reader->name eq 'numFmts') {
# start parsing nodes for numeric formats
$expected_subnode = [numFmt => $xml_reader->depth+1 => sub {
my $id = $xml_reader->getAttribute('numFmtId');
my $code = $xml_reader->getAttribute('formatCode');
$numFmt[$id] = $code if $id && $code && $code =~ $date_style_regex;
}];
}
elsif ($xml_reader->name eq 'cellXfs') {
# start parsing nodes for cell formats
$expected_subnode = [xf => $xml_reader->depth+1 => sub {
state $xf_count = 0;
my $numFmtId = $xml_reader->getAttribute('numFmtId');
my $code = $numFmt[$numFmtId]; # may be undef
$date_styles[$xf_count++] = $code;
( run in 2.129 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )