Excel-ValueReader-XLSX
view release on metacpan or search on metacpan
lib/Excel/ValueReader/XLSX/Backend/LibXML.pm view on Meta::CPAN
if ($xml_reader->name eq 'dimension') {
$ref = $xml_reader->getAttribute('ref');
last PREAMBLE;
}
}
my ($row_num, $col_num, @rows) = (0, 0);
my ($cell_type, $cell_style, $seen_node);
# dual closure : may be used as an iterator or as a regular sub, depending on $want_iterator. Of course
# it would have been simpler to just write an iterator, and call it in a loop if the client wants all rows
# at once ... but thousands of additional sub calls would slow down the process. So this more complex implementation
# is for the sake of processing speed.
my $get_values = sub {
# in iterator mode, if we have a row ready, just return it
return shift @rows if $want_iterator and @rows > 1;
# otherwise loop on matching nodes
NODE:
while ($xml_reader->read) {
my $node_name = $xml_reader->name;
my $node_type = $xml_reader->nodeType;
$xml_reader->finish and last NODE if $node_name eq 'sheetData' && $node_type == XML_READER_TYPE_END_ELEMENT;
next NODE if $node_type == XML_READER_TYPE_END_ELEMENT;
if ($node_name eq 'row') {
my $prev_row = $row_num;
$row_num = $xml_reader->getAttribute('r') // $row_num+1;
$col_num = 0;
push @rows, [] for 1 .. $row_num-$prev_row;
# in iterator mode, if we have a closed empty row, just return it
return shift @rows if $want_iterator and @rows > 1;
}
elsif ($node_name eq 'c') {
my $A1_cell_ref = $xml_reader->getAttribute('r') // '';
my ($col_A1, $given_row) = ($A1_cell_ref =~ /^([A-Z]+)(\d+)$/);
$given_row //= $row_num;
if ($given_row < $row_num) {die "cell claims to be in row $given_row while current row is $row_num"}
elsif ($given_row > $row_num) {push @rows, [] for 1 .. $given_row-$row_num;
$col_num = 0;
$row_num = $given_row;}
# deal with the col number given in the 'r' attribute, if present
if ($col_A1) {$col_num = $Excel::ValueReader::XLSX::A1_to_num_memoized{$col_A1}
//= Excel::ValueReader::XLSX->A1_to_num($col_A1)}
else {$col_num++}
$cell_type = $xml_reader->getAttribute('t');
$cell_style = $xml_reader->getAttribute('s');
$seen_node = '';
}
elsif ($node_name =~ /^[vtf]$/) {
# remember that we have seen a 'value' or 'text' or 'formula' node
$seen_node = $node_name;
}
elsif ($node_name eq '#text') {
#start processing cell content
my $val = $xml_reader->value;
$cell_type //= '';
if ($seen_node eq 'v') {
if ($cell_type eq 's') {
if (looks_like_number($val)) {
$val = $self->strings->[$val]; # string -- pointer into the global array of shared strings
}
else {
warn "unexpected non-numerical value: $val inside a node of shape <v t='s'>\n";
}
}
elsif ($cell_type eq 'e') {
$val = undef; # error -- silently replace by undef
}
elsif ($cell_type =~ /^(n|d|b|str|)$/) {
# number, date, boolean, formula string or no type : content is already in $val
# if this is a date, replace the numeric value by the formatted date
if ($has_date_formatter && $cell_style && looks_like_number($val) && $val >= 0) {
my $date_style = $self->date_styles->[$cell_style];
$val = $self->formatted_date($val, $date_style) if $date_style;
}
}
else {
# handle unexpected cases
warn "unsupported type '$cell_type' in cell L${row_num}C${col_num}\n";
$val = undef;
}
# insert this value into the last row
$rows[-1][$col_num-1] = $val;
}
elsif ($seen_node eq 't' && $cell_type eq 'inlineStr') {
# inline string -- accumulate all #text nodes until next cell
no warnings 'uninitialized';
$rows[-1][$col_num-1] .= $val;
}
elsif ($seen_node eq 'f') {
# formula -- just ignore it
}
else {
# handle unexpected cases
warn "unexpected text node in cell L${row_num}C${col_num}: $val\n";
}
}
}
# end of XML nodes. In iterator mode, return a row if we have one
return @rows ? shift @rows : undef if $want_iterator;
};
# decide what to return depending on the dual mode
my $retval = $want_iterator ? iter($get_values)
: do {$get_values->(); \@rows}; # run the closure and return the rows
return ($ref, $retval);
}
sub _table_targets {
my ($self, $rel_xml) = @_;
my $xml_reader = $self->_xml_reader($rel_xml);
my @table_targets;
# iterate through XML nodes
NODE:
while ($xml_reader->read) {
my $node_name = $xml_reader->name;
my $node_type = $xml_reader->nodeType;
next NODE if $node_type == XML_READER_TYPE_END_ELEMENT;
if ($node_name eq 'Relationship') {
my $target = $xml_reader->getAttribute('Target');
if ($target =~ m[tables/table(\d+)\.xml]) {
# just store the table id (positive integer)
push @table_targets, $1;
}
}
}
return @table_targets;
}
sub _parse_table_xml {
my ($self, $xml) = @_;
my %table_info;
my $xml_reader = $self->_xml_reader($xml);
# iterate through XML nodes
NODE:
while ($xml_reader->read) {
( run in 1.010 second using v1.01-cache-2.11-cpan-39bf76dae61 )