Excel-ValueReader-XLSX

 view release on metacpan or  search on metacpan

lib/Excel/ValueReader/XLSX/Backend/LibXML.pm  view on Meta::CPAN

    if ($xml_reader->name eq 'dimension') {
      $ref = $xml_reader->getAttribute('ref');
      last PREAMBLE;
    }
  }



  my ($row_num, $col_num, @rows) = (0, 0);
  my ($cell_type, $cell_style, $seen_node);

  # dual closure : may be used as an iterator or as a regular sub, depending on $want_iterator. Of course
  # it would have been simpler to just write an iterator, and call it in a loop if the client wants all rows
  # at once ... but thousands of additional sub calls would slow down the process. So this more complex implementation
  # is for the sake of processing speed.
  my $get_values = sub {

    # in iterator mode, if we have a row ready, just return it
    return shift @rows if $want_iterator and @rows > 1;

    # otherwise loop on matching nodes
  NODE:
    while ($xml_reader->read) {
      my $node_name = $xml_reader->name;
      my $node_type = $xml_reader->nodeType;

      $xml_reader->finish and last NODE   if $node_name eq 'sheetData' && $node_type == XML_READER_TYPE_END_ELEMENT;
      next NODE                           if $node_type == XML_READER_TYPE_END_ELEMENT;

      if ($node_name eq 'row') {
        my $prev_row = $row_num;
        $row_num     = $xml_reader->getAttribute('r') // $row_num+1;
        $col_num     = 0;
        push @rows, [] for 1 .. $row_num-$prev_row;

        # in iterator mode, if we have a closed empty row, just return it
        return shift @rows if $want_iterator and @rows > 1;
      }

      elsif ($node_name eq 'c') {
        my $A1_cell_ref = $xml_reader->getAttribute('r') // '';
        my ($col_A1, $given_row)  = ($A1_cell_ref =~ /^([A-Z]+)(\d+)$/);

        $given_row //= $row_num;
        if    ($given_row < $row_num) {die "cell claims to be in row $given_row while current row is $row_num"}
        elsif ($given_row > $row_num) {push @rows, [] for 1 .. $given_row-$row_num;
                                       $col_num = 0;
                                       $row_num = $given_row;}

        # deal with the col number given in the 'r' attribute, if present
        if ($col_A1) {$col_num = $Excel::ValueReader::XLSX::A1_to_num_memoized{$col_A1}
                             //= Excel::ValueReader::XLSX->A1_to_num($col_A1)}
        else         {$col_num++}

        $cell_type  = $xml_reader->getAttribute('t');
        $cell_style = $xml_reader->getAttribute('s');
        $seen_node  = '';
      }

      elsif ($node_name =~ /^[vtf]$/) {
        # remember that we have seen a 'value' or 'text' or 'formula' node
        $seen_node = $node_name;
      }

      elsif ($node_name eq '#text') {
        #start processing cell content

        my $val = $xml_reader->value;
        $cell_type //= '';

        if ($seen_node eq 'v')  {
          if ($cell_type eq 's') {
            if (looks_like_number($val)) {
              $val = $self->strings->[$val]; # string -- pointer into the global array of shared strings
            }
            else {
              warn "unexpected non-numerical value: $val inside a node of shape <v t='s'>\n";
            }
          }
          elsif ($cell_type eq 'e') {
            $val = undef; # error -- silently replace by undef
          }
          elsif ($cell_type =~ /^(n|d|b|str|)$/) {
            # number, date, boolean, formula string or no type : content is already in $val

          # if this is a date, replace the numeric value by the formatted date
            if ($has_date_formatter && $cell_style && looks_like_number($val) && $val >= 0) {
              my $date_style = $self->date_styles->[$cell_style];
              $val = $self->formatted_date($val, $date_style)    if $date_style;
            }
          }
          else {
            # handle unexpected cases
            warn "unsupported type '$cell_type' in cell L${row_num}C${col_num}\n";
            $val = undef;
          }

          # insert this value into the last row
          $rows[-1][$col_num-1] = $val;
        }

        elsif ($seen_node eq 't' && $cell_type eq 'inlineStr')  {
          # inline string -- accumulate all #text nodes until next cell
          no warnings 'uninitialized';
          $rows[-1][$col_num-1] .= $val;
        }

        elsif ($seen_node eq 'f')  {
          # formula -- just ignore it
        }

        else {
          # handle unexpected cases
          warn "unexpected text node in cell L${row_num}C${col_num}: $val\n";
        }
      }
    }

    # end of XML nodes. In iterator mode, return a row if we have one
    return @rows ? shift @rows : undef if $want_iterator;
  };

  # decide what to return depending on the dual mode
  my $retval = $want_iterator ? iter($get_values)         
                              : do {$get_values->(); \@rows}; # run the closure and return the rows

  return ($ref, $retval);
}




sub _table_targets {
  my ($self, $rel_xml) = @_;

  my $xml_reader = $self->_xml_reader($rel_xml);

  my @table_targets;

  # iterate through XML nodes
 NODE:
  while ($xml_reader->read) {
    my $node_name = $xml_reader->name;
    my $node_type = $xml_reader->nodeType;
    next NODE if $node_type == XML_READER_TYPE_END_ELEMENT;

    if ($node_name eq 'Relationship') {
      my $target     = $xml_reader->getAttribute('Target');
      if ($target =~ m[tables/table(\d+)\.xml]) {
        # just store the table id (positive integer)
        push @table_targets, $1;
      }
    }
  }

  return @table_targets;
}


sub _parse_table_xml {
  my ($self, $xml) = @_;

  my %table_info;

  my $xml_reader = $self->_xml_reader($xml);

  # iterate through XML nodes
 NODE:
  while ($xml_reader->read) {



( run in 1.010 second using v1.01-cache-2.11-cpan-39bf76dae61 )