Excel-ValueReader-XLSX

 view release on metacpan or  search on metacpan

lib/Excel/ValueReader/XLSX/Backend/LibXML.pm  view on Meta::CPAN

package Excel::ValueReader::XLSX::Backend::LibXML;
use utf8;
use 5.12.1;
use Moose;
use Scalar::Util             qw/looks_like_number/;
use XML::LibXML::Reader      qw/XML_READER_TYPE_END_ELEMENT/;
use Iterator::Simple         qw/iter/;

extends 'Excel::ValueReader::XLSX::Backend';

#======================================================================
# LAZY ATTRIBUTE CONSTRUCTORS
#======================================================================

sub _strings {
  my $self = shift;

  my $reader = $self->_xml_reader_for_zip_member('xl/sharedStrings.xml');

  my @strings;
  my $last_string;
 NODE:
  while ($reader->read) {
    next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT;
    my $node_name = $reader->name;

    if ($node_name eq 'si') {
      push @strings, $last_string if defined $last_string;
      $last_string = '';
    }
    elsif ($node_name eq '#text') {
      $last_string .= $reader->value;
    }
  }

  push @strings, $last_string if defined $last_string;

  return \@strings;
}


sub _workbook_data {
  my $self = shift;

  my %workbook_data = (sheets => {}, base_year => 1900);
  my $sheet_id  = 1;

  my $reader = $self->_xml_reader_for_zip_member('xl/workbook.xml');

 NODE:
  while ($reader->read) {
    next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT;

    if ($reader->name eq 'sheet') {
      my $name = $reader->getAttribute('name')
        or die "sheet node without name";
      $workbook_data{sheets}{$name} = $sheet_id++;
    }
    elsif ($reader->name eq 'workbookPr' and my $date_attr = $reader->getAttribute('date1904')) {
      $workbook_data{base_year} = 1904 if $date_attr eq '1' or $date_attr eq 'true'; # this workbook uses the 1904 calendar
    }
    elsif ($reader->name eq 'workbookView' and my $active_attr = $reader->getAttribute('activeTab')) {
      $workbook_data{active_sheet} = $active_attr + 1 if defined $active_attr;
    }
  }

  return \%workbook_data;
}


sub _date_styles {
  my $self = shift;

  state $date_style_regex = qr{[dy]|\bmm\b};
  my @date_styles;

  # read from the styles.xml zip member
  my $xml_reader = $self->_xml_reader_for_zip_member('xl/styles.xml');

  # start with Excel builtin number formats for dates and times
  my @numFmt = $self->Excel_builtin_date_formats;

  my $expected_subnode = undef;

  # add other date formats explicitly specified in this workbook
 NODE:
  while ($xml_reader->read) {
    next NODE if $xml_reader->nodeType == XML_READER_TYPE_END_ELEMENT;

    # special treatment for some specific subtrees -- see 'numFmt' and 'xf' below
    if ($expected_subnode) {
      my ($name, $depth, $handler) = @$expected_subnode;
      if ($xml_reader->name eq $name && $xml_reader->depth == $depth) {
        # process that subnode and go to the next node
        $handler->();
        next NODE;
      }
      elsif ($xml_reader->depth < $depth) {
        # finished handling subnodes; back to regular node treatment
        $expected_subnode = undef;
      }
    }

    # regular node treatement
    if ($xml_reader->name eq 'numFmts') {
      # start parsing nodes for numeric formats
      $expected_subnode = [numFmt => $xml_reader->depth+1 => sub {
                             my $id   = $xml_reader->getAttribute('numFmtId');
                             my $code = $xml_reader->getAttribute('formatCode');
                             $numFmt[$id] = $code if $id && $code && $code =~ $date_style_regex;
                           }];
    }

    elsif ($xml_reader->name eq 'cellXfs') {
      # start parsing nodes for cell formats
      $expected_subnode = [xf => $xml_reader->depth+1 => sub {
                             state $xf_count = 0;
                             my $numFmtId    = $xml_reader->getAttribute('numFmtId');
                             my $code        = $numFmt[$numFmtId]; # may be undef
                             $date_styles[$xf_count++] = $code;



( run in 2.129 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )