Spreadsheet-Edit

 view release on metacpan or  search on metacpan

t/io.pl  view on Meta::CPAN

#!/usr/bin/perl
# *** DO NOT USE Test2 FEATURES becuase this is a sub-script ***
use FindBin qw($Bin);
use lib $Bin;
use t_Common qw/oops/; # strict, warnings, Carp etc.

use t_TestCommon ':no-Test2',
         qw/$verbose $silent $debug dprint dprintf
            bug mycheckeq_literal expect1 mycheck
            verif_no_internals_mentioned
            insert_loc_in_evalstr verif_eval_err
            arrays_eq hash_subset
            @quotes/;

### This doesn't break the no-internals-mentioned test in t/60_all.t
### because 'carp' is never (normally) called, i.e. no warnings to users occur
##$Carp::Verbose = 1; # show backtrace on errors

use t_SSUtils;
use Encode qw/encode decode/;

use Spreadsheet::Edit qw/:all logmsg fmt_sheet cx2let let2cx sheet/;
use Spreadsheet::Edit::IO qw/convert_spreadsheet OpenAsCsv/;
use File::BOM qw/open_bom/;
use Data::Hexify qw/Hexify/;

use Test::Deep::NoTest qw/eq_deeply/;

my $cwd = fastgetcwd;
#my $input_xlsx_path = abs2rel(fast_abs_path("$Bin/../tlib/Test.xlsx"), $cwd);
my $tlib = path("$Bin/../tlib")->absolute;
my $input_xlsx_path = $tlib->child("Test.xlsx");

# Is LibreOffice (or some substitute) installed?
my $can_cvt_spreadsheets    = Spreadsheet::Edit::IO::can_cvt_spreadsheets();
my $can_extract_named_sheet = Spreadsheet::Edit::IO::can_extract_named_sheet();
my $can_extract_allsheets   = Spreadsheet::Edit::IO::can_extract_allsheets();

sub verif_Sheet1(;$){
  my $msg = $_[0] // "";
  eq_deeply(title_rx(), 0) or confess "${msg} title_rx is not 0";
  eq_deeply([@{ title_row() }],["First Name","Last Name","Email","Date","Zipcode"])
    or confess "${msg} Sheet1 titles wrong\n", dvis('${\title_rx()} @{ title_row() }');
  my $exp = ["Françoise-Athénaïs","de Rochechouart","","10/05/1640","00007"];
  eq_deeply([@{ $rows[3] }],$exp)
    or confess "${msg} Sheet1 row 4 is wrong,\n",
               "  got: ",avis(@{ $rows[3] }),"\n",
               "  exp: ",avis(@{ $exp }),
}
sub verif_Another_Sheet(;$) {
  my $msg = $_[0] // "";
  eq_deeply(title_rx(), 0) or confess "${msg} title_rx is not 0";
  eq_deeply([@{ title_row() }],["Input","Output"])
    or confess "Another Sheet titles wrong;",vis([@{ title_row() }]);
  apply {
    my $exp = 100 + $rx - 1;
    eq_deeply($crow{B}, $exp)
      or confess "$msg Another Sheet:Col B, rx $rx is $exp";
  }
}

sub doconvert(@) {
  if (@_ % 2 == 0) {
    #convert_spreadsheet(verbose => $verbose, debug => $debug, @_);
    unshift @_, (verbose => $verbose, debug => $debug, silent => $silent);
    goto &convert_spreadsheet;
  } else {
    my $inpath = shift;
    #convert_spreadsheet($inpath, verbose => $verbose, debug => $debug, @_);
    unshift @_, (verbose => $verbose, debug => $debug, silent => $silent);
    unshift @_, $inpath;
    goto &convert_spreadsheet;
  }
}
sub doread($$) {
  my ($opts, $inpath) = @_;
  sheet undef;
  eq_deeply(sheet(), undef) or confess "sheet() did not return undef";

  eq_deeply(eval{my $dum=$num_cols},undef) or confess "num_cols unexpectedly valid";
  read_spreadsheet {debug => $debug, verbose => $verbose, silent => $silent, %$opts}, $inpath;
  confess "num_cols is not positive" unless $num_cols > 0;
}

# Test the various ways of specifying a sheet name
if ($can_cvt_spreadsheets) {
  if ($can_extract_named_sheet) {
    doread({}, $input_xlsx_path."!Sheet1"); verif_Sheet1();
    doread({sheetname => "Sheet1"}, $input_xlsx_path); verif_Sheet1;
    doread({sheetname => "Sheet1"}, $input_xlsx_path."!Sheet1"); verif_Sheet1;
    doread({sheetname => "Another Sheet"}, $input_xlsx_path."!Another Sheet"); verif_Another_Sheet;
  } else {
    warn "# Skipping extract-by-sheetname because soffice is too old\n" unless $silent;
  }
  if ($can_extract_allsheets) {
    # Extract all sheets
    my $dirpath = Path::Tiny->tempdir;
    doconvert(outpath => $dirpath, allsheets => 1, inpath => $input_xlsx_path, cvt_to => "csv");
    say dvis '###BBB $dirpath ->children : ',avis($dirpath->children) if $debug;
    my $got = [sort map{$_->basename} $dirpath->children];
    my $exp = [sort "Sheet1.csv", "Another Sheet.csv"];
    eq_deeply($got, $exp) or die dvis 'Missing or extra sheets: $got $exp';
  } else {
    warn "# Skipping 'allsheets' test because soffice is too old\n" unless $silent;
  }

  # Round-trip csv -> ods -> csv check
  {
    my $testcsv_path = $tlib->child("Sheet1_unquoted.csv");
    my $exp_chars
      = $testcsv_path->slurp({binmode => ":raw:encoding(UTF-8):crlf"});
    my $h1 = doconvert(inpath => $testcsv_path->canonpath(), cvt_to => "ods");
    my $h2 = doconvert(inpath => $h1->{outpath}, cvt_to => "csv");
    my $got_chars = path($h2->{outpath})->slurp({binmode => ":raw:encoding(UTF-8):crlf"});
    if ($got_chars eq $exp_chars) {
      say "> Round-trip csv->ods->csv succeeded!" unless $silent;
    } else {
      $Data::Dumper::Interp::Foldwidth = 20;
      die "Round-trip csv->ods->csv mismatch:\n",
          ivis('Original: $exp_chars\n'),
          ivis('Result:   $got_chars\n') ;
    }
  }
} else {
  warn "# Spreadsheet tests skipped because soffice is not installed\n" unless $silent;
}

########################################################################
# Do lots of tests using specified test data.
# This is called twice, once with real data and again with empty data
# (the latter to verify that logically-empty files i.e. BOM only, work)
########################################################################
sub do_encoding_tests($) {
  my $exp_chars = shift;
  confess dvis 'Unexpected CR in $exp_chars' if $exp_chars =~ /\x{0D}/;
  my $exp_CRLFchars = $exp_chars =~ s/\n/\x0D\x0A/gr;
  my $emptydata = $exp_chars eq "";

  # Well, we can't prevent CRLF line endings being written on Windows
  # (when writing with :crlf).  This prevents comparing :crlf-written
  # results with our golden files written on Linux.
  #
  # Q: Should CSVs always be written *without* :crlf to be in *nix form??
  # A?: Allow :crlf to be specifed in an arg, e.g. output_binmode ?
  #
  # (OLD:)So first convert the test data .csv
  # to "local" line endings so it will match.
  my $local_testcsv_UTF8 = Path::Tiny->tempfile("local_testcsv_UTF8_XXXXX");
  $local_testcsv_UTF8->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_chars);

  (my $local_testcsv_UTF8CRLF = Path::Tiny->tempfile("local_testcsv_UTF8CRLF_XXXXX"))
    ->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_CRLFchars);

  (my $local_testcsv_UTF16 = Path::Tiny->tempfile("local_testcsv_UTF16_XXXXX"))
    ->spew({binmode => ":raw:encoding(UTF-16)"}, $exp_chars);

  (my $local_testcsv_UTF16BECRLF = Path::Tiny->tempfile("local_testcsv_UTF16BECRLF_XXXXX"))
    ->spew({binmode => ":raw:encoding(UTF-16BE)"}, $exp_CRLFchars);

  (my $local_testcsv_UTF16LE = Path::Tiny->tempfile("local_testcsv_UTF16LE_XXXXX"))
    ->spew({binmode => ":raw:encoding(UTF-16LE)"}, $exp_chars);

  my $local_testcsv_UTF8CRLFWithBOM = Path::Tiny->tempfile("local_testcsv_UTF8CRLFWithBOM_XXXXX");
  {
    $local_testcsv_UTF8CRLF->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_CRLFchars);
    $local_testcsv_UTF8CRLFWithBOM->spew({binmode => ":raw:encoding(UTF-8)"}, "\N{U+feff}");
    $local_testcsv_UTF8CRLFWithBOM->append_utf8($exp_CRLFchars);
  }

  # Confirm that conflicting specs are caught
  eval{my $dum=read_spreadsheet {sheetname => "Sheet1", verbose => $verbose}, $input_xlsx_path."!Another Sheet" };
  die "Conflicting sheetname opt and !suffix not caught" if $@ eq "";

  # "Read" a csv; should be a pass-thru without conversion when possible
  foreach (
     #  path              exp_passthru  input_encoding     output_encoding
     [$local_testcsv_UTF8,           1, undef             ,undef           ],
     [$local_testcsv_UTF8CRLF,       1, undef             ,undef           ],
     [$local_testcsv_UTF8,           0, undef             ,"UTF-16BE"      ],
     [$local_testcsv_UTF8CRLFWithBOM,0, undef             ,undef           ],
     [$local_testcsv_UTF8CRLFWithBOM,0,"UTF-32"           ,undef           ], # BOM overrides
     [$local_testcsv_UTF16,          0,"UTF-32"           ,undef           ], # BOM overrides
     [$local_testcsv_UTF16BECRLF,    0,"UTF-32,UTF-16BE"  ,undef           ], # no BOM
     [$local_testcsv_UTF16LE,        1,"UTF-16LE"         ,"UTF-16LE"      ], # no BOM
     [$local_testcsv_UTF16BECRLF,    1,"UTF-16BE"         ,"UTF-16BE"      ], # no BOM
     [$local_testcsv_UTF16BECRLF, (!$emptydata),"UTF-32,UTF-16BE"  ,"UTF-16BE"      ], # no BOM
     [$local_testcsv_UTF16LE,        0,"UTF-16LE,UTF-32"  ,"UTF-32"        ], # no BOM
     [$local_testcsv_UTF16LE,        0,"UTF-16LE,UTF-32"  ,"UTF-16BE"      ], # no BOM
         ) {
    my ($input_csvpath, $exp_passthru, $input_enc, $output_enc) = @$_;
    my @inenc_opts  = ($input_enc  ? (input_encoding  => $input_enc ) : ());
    my @outenc_opts = ($output_enc ? (output_encoding => $output_enc) : ());

    warn "--- Expect passthru=",!!$exp_passthru," for $input_csvpath ienc=",u($input_enc)," oenc=",u($output_enc),"\n"
      if $verbose;

    ### This is sometimes failing to detect $local_testcsv as a CSV on Solaris ;
    ### try to show enough information to debug it...
    my sub _passthru_test(@) {
      my %debug_opts = @_;
      oops unless exists($debug_opts{debug});
      oops unless exists($debug_opts{verbose});

      # Call convert_spreadsheet() directly, passing inpath as an option
warn "#------------ _passthru_test ---(exp=$exp_passthru)--------\n" if $debug_opts{debug};
warn "#------------ ",qsh($input_csvpath),"\n" if $debug_opts{debug};
#warn "Using tempdir => /tmp/J ...\n";
      my $h_cs = convert_spreadsheet(
                   silent => $silent,
                   inpath => $input_csvpath, cvt_to => 'csv',
#tempdir => "/tmp/J",  ####TEMP TEMP TEMP
                   @inenc_opts, @outenc_opts, %debug_opts);
      my $got_passthru = ($h_cs->{outpath} eq $input_csvpath);
      if (!!$got_passthru ne !!$exp_passthru) {
        die( ($exp_passthru
               ? "Expected null conversion but output is different"
               : "Expected differing output but got pass-thru"),
             "\n",
             qx/(set -x; ls -ld $input_csvpath) 2>&1/,
             qx/(set -x; ls -ld $h_cs->{outpath}) 2>&1/,
             qx/(set -x; od -t x1a $h_cs->{outpath}) 2>&1/,
             " "
        );
      }
      # Re-read the file ourself and check the data
      my $input_data = do{
        my ($benc,$spill) = open_bom(my $inFH, $input_csvpath, ":raw");
        oops if $spill;
        my $enc = $benc || ($input_csvpath =~ s/.*UTF/UTF-/r =~ s/WithBOM|CRLF|_.*//gr);
        binmode($inFH, ":raw:encoding($enc):crlf") or die $!;
        warn dvis '##BBB $input_csvpath $enc $benc $spill\n' if $debug;
        local $/; <$inFH>
      };
      my $reslurp_data = do{
        my $oenc = $h_cs->{encoding} // die "{encoding} not set in result";
        path($h_cs->{outpath})->slurp( {binmode => ":raw:encoding($oenc):crlf"} );
      };
      unless ($input_data eq $reslurp_data) {
        die dvis 'Data from slurp with result encoding does not match!\n'
            .'$input_csvpath $h_cs @inenc_opts @outenc_opts\n\n'
            .'$input_data\n\n'
            .'$reslurp_data\n'
      }

      # Try OpenAsCsv, which by the way calls convert_spreadsheet with a
      # separate inpath arg. It returns a file descriptor to read the data.
      my $h_oac = OpenAsCsv(inpath => $input_csvpath,
                            @inenc_opts, @outenc_opts, %debug_opts);
      die dvis 'OpenAsCsv result {inpath} wrong\n$h_oac\n$h_cs'
        unless $h_oac->{inpath} eq $input_csvpath;
      die dvis 'OpenAsCsv result {encoding} wrong\n$h_oac\n$h_cs'
        unless $h_oac->{encoding} eq $h_cs->{encoding};
      my $oac_fh_data = do{
        my $fh = $h_oac->{fh};
        local $/;
        <$fh>
      };
      die dvis 'Data from OpenAsCsv result fh does not match!\n'
            .'$input_csvpath $h_oac\n@inenc_opts @outenc_opts\n $input_data\n$oac_fh_data'
      unless $input_data eq $oac_fh_data;

      # Try via read_spreadsheet
      read_spreadsheet {@inenc_opts, %debug_opts}, $input_csvpath;
      warn qx/set -x; hexdump -C $input_csvpath/ if $debug && $^O eq 'linux';
      warn dvis '## @${\sheet()}' if $debug;
      if ($emptydata) {
        die "Expected empty spreadsheet ($input_csvpath)" unless @rows == 0;
      } else {
        verif_Sheet1 "($input_csvpath)";
      }
    };

    eval { _passthru_test(debug => $debug, verbose => $verbose) };
    if ($@) {
      warn __FILE__,":",__LINE__," - failed: $@\n";
      die "died" if $debug && $verbose;
      warn "RE_TRYING WITH DEBUG...\n";
      $Carp::Verbose = 1; # redundant with Carp::Always
      require Carp::Always;
      Carp::Always->import();
       _passthru_test(debug => 1, verbose => 1);
      die "should have died by now";
    }

    print "> Passthru test ok for $input_csvpath ienc=",u($input_enc)," oenc=",u($output_enc),"\n"
      unless $silent;
  }

  {



( run in 0.627 second using v1.01-cache-2.11-cpan-5a3173703d6 )