Spreadsheet-Edit

 view release on metacpan or  search on metacpan

lib/Spreadsheet/Edit/IO.pm  view on Meta::CPAN

#    confess dvis "($tag) fh is not open \$start_pos" unless $fh;
#    my @layers = PerlIO::get_layers($fh);
#    my $cur_pos = tell($fh);
#    if (! seek($fh, $start_pos//0, SEEK_SET)) {
#      btwN 1,dvis "($tag) **NOT SEEKABLE** \$cur_pos \@layers \$start_pos";
#    } else {
#      if ((my $n = read $fh, my $buffer, 128) != 0) {
#        btwN 1,dvis "($tag) \$cur_pos \$start_pos \@layers First $n items are:\n", Hexify($buffer), "\n";
#      } else {
#        btwN 1,dvis "($tag) \$cur_pos \$start_pos \@layers **APPEARS EMPTY**";
#      }
#      seek($fh, $cur_pos, SEEK_SET) or die "re-seek to $cur_pos: $!";
#    }
#  }
  my sub set_fh_encoding() { # returns true if set
    my $enc = $opts->{input_encoding}; # must already be resolved!
    my $bmode = ":raw:encoding($enc):crlf";
    binmode $fh, $bmode or die "binmode '$bmode' : $!";
    if ($debug) {
      my @layers = PerlIO::get_layers($fh);
      warn dvis 'set_fh_encoding: $fh @layers\n';
    }
  }
  my sub open_input() {
    oops if defined $fh;
    if (defined $$ref2octets) {
      open $fh, "<:raw", $ref2octets or confess "BUG:in-mem open:$!";
      #_dump_fh("open_input TO PREVIOUSLY SLURPED");
    } else {
      my $path = $opts->{inpath_sans_sheet};
      $fh = openhandle($path); # undef unless $path is a file handle
      unless ($fh) {
        open $fh, "<", $path or die "$path : $!";
      }
      binmode($fh);
      #_dump_fh("AAA $path open_input raw");
    }
    if (! seek($fh, 0, SEEK_SET)) {
      oops if defined $$ref2octets;
      _binmode_slurp_and_log($fh, $ref2octets, $debug);
      close $fh;
      $fh = undef;
      open $fh, "<:raw", $ref2octets or confess "BUG:in-mem open:$!";
      #_dump_fh("BBB unseekable, slurped");
    }
    my $bomenc = File::BOM::get_encoding_from_filehandle($fh);
    $start_pos = tell($fh);
    if ($bomenc) {
      btw dvis 'Input has BOM, $bomenc $start_pos' if $debug;
      $opts->{input_encoding} = $bomenc;
      binmode($fh); # unnecessary???
      binmode($fh, ":raw:encoding($bomenc):crlf") or die "binmode: $!";
    }
    #_dump_fh("CCC final");
  }
  my sub determine_input_encoding() {
    # If one encoding was specified by the user or implied by a BOM, use it;
    # otherwise try multiple encodings specified by the user or defaulted
    # until one seems to work.
    $opts->{input_encoding} //= $default_input_encodings;
    my @enclist = split m#,#, $opts->{input_encoding};
    return
      if @enclist == 1;
    _slurp_ifnotslurped($fh, $ref2octets, $debug);
    for my $enc (@enclist) {
      eval { _decode_slurped_data($enc, $ref2octets, $start_pos) };

      if ($@) {
         btw "Input encoding '$enc' did not work...($@)\n" if $debug;
         next;
      }
      btw "Input encoding '$enc' seems to work.\n" if $debug;
      @enclist = ($enc);
      last
    }
    confess "Could not detect encoding of $opts->{inpath_sans_sheet}\n"
      if @enclist > 1;
    $opts->{input_encoding} = $enclist[0];
  } #determine_input_encoding

  my sub readparse_csv(@) {
    my %csvopts = (
      @sane_CSV_read_options,
      defined($opts->{quote_char}) ? (quote_char=>$opts->{quote_char}) : (),
      defined($opts->{sep_char})   ? (sep_char=>$opts->{sep_char})     : (),
      auto_diag => 2, # throw on error
      @_
    );
    $csvopts{escape_char} = $csvopts{quote_char}; # must always be the same

    my $csv = Text::CSV->new (\%csvopts)
              or croak "Text::CSV->new: ", Text::CSV->error_diag(),
                       dvis('\n## %csvopts\n');
    seek($fh, $start_pos, SEEK_SET) or die $!; # skip over possible BOM
    my $rows;
    while (my $F = $csv->getline( $fh )) {
      push(@$rows, $F);
    }
    $rows
  }

  my sub determine_csv_q_sep($) {
    my ($r2rows) = @_;
    return
      if defined($opts->{quote_char}) && defined($opts->{sep_char});

    # Try combinations starting with the most-common '"' and ',' while
    # parsing the file for unsafe unquoted values (throws on syntax error).
    # The expectation is that the first try usually succeeds
    Q:
    for my $q (defined($opts->{quote_char})
                 ? ($opts->{quote_char}) : ("\"", "'")) {
      my $found_q;
      SEP:
      for my $sep (defined($opts->{sep_char})
                     ? ($opts->{sep_char}) : (",","\t")) {
        btw dvisq '--- TRYING $q $sep ---' if $debug;

#        # Preliminary check for an illegal use of the quote char
#        if (defined($chars)
#            && $chars =~ /[^${q}${sep}\x{0D}\x{0A}]



( run in 1.228 second using v1.01-cache-2.11-cpan-71847e10f99 )