Spreadsheet-Edit
view release on metacpan or search on metacpan
#!/usr/bin/perl
# *** DO NOT USE Test2 FEATURES becuase this is a sub-script ***
use FindBin qw($Bin);
use lib $Bin;
use t_Common qw/oops/; # strict, warnings, Carp etc.
use t_TestCommon ':no-Test2',
qw/$verbose $silent $debug dprint dprintf
bug mycheckeq_literal expect1 mycheck
verif_no_internals_mentioned
insert_loc_in_evalstr verif_eval_err
arrays_eq hash_subset
@quotes/;
### This doesn't break the no-internals-mentioned test in t/60_all.t
### because 'carp' is never (normally) called, i.e. no warnings to users occur
##$Carp::Verbose = 1; # show backtrace on errors
use t_SSUtils;
use Encode qw/encode decode/;
use Spreadsheet::Edit qw/:all logmsg fmt_sheet cx2let let2cx sheet/;
use Spreadsheet::Edit::IO qw/convert_spreadsheet OpenAsCsv/;
use File::BOM qw/open_bom/;
use Data::Hexify qw/Hexify/;
use Test::Deep::NoTest qw/eq_deeply/;
my $cwd = fastgetcwd;
#my $input_xlsx_path = abs2rel(fast_abs_path("$Bin/../tlib/Test.xlsx"), $cwd);
my $tlib = path("$Bin/../tlib")->absolute;
my $input_xlsx_path = $tlib->child("Test.xlsx");
# Is LibreOffice (or some substitute) installed?
my $can_cvt_spreadsheets = Spreadsheet::Edit::IO::can_cvt_spreadsheets();
my $can_extract_named_sheet = Spreadsheet::Edit::IO::can_extract_named_sheet();
my $can_extract_allsheets = Spreadsheet::Edit::IO::can_extract_allsheets();
sub verif_Sheet1(;$){
my $msg = $_[0] // "";
eq_deeply(title_rx(), 0) or confess "${msg} title_rx is not 0";
eq_deeply([@{ title_row() }],["First Name","Last Name","Email","Date","Zipcode"])
or confess "${msg} Sheet1 titles wrong\n", dvis('${\title_rx()} @{ title_row() }');
my $exp = ["Françoise-Athénaïs","de Rochechouart","","10/05/1640","00007"];
eq_deeply([@{ $rows[3] }],$exp)
or confess "${msg} Sheet1 row 4 is wrong,\n",
" got: ",avis(@{ $rows[3] }),"\n",
" exp: ",avis(@{ $exp }),
}
sub verif_Another_Sheet(;$) {
my $msg = $_[0] // "";
eq_deeply(title_rx(), 0) or confess "${msg} title_rx is not 0";
eq_deeply([@{ title_row() }],["Input","Output"])
or confess "Another Sheet titles wrong;",vis([@{ title_row() }]);
apply {
my $exp = 100 + $rx - 1;
eq_deeply($crow{B}, $exp)
or confess "$msg Another Sheet:Col B, rx $rx is $exp";
}
}
sub doconvert(@) {
if (@_ % 2 == 0) {
#convert_spreadsheet(verbose => $verbose, debug => $debug, @_);
unshift @_, (verbose => $verbose, debug => $debug, silent => $silent);
goto &convert_spreadsheet;
} else {
my $inpath = shift;
#convert_spreadsheet($inpath, verbose => $verbose, debug => $debug, @_);
unshift @_, (verbose => $verbose, debug => $debug, silent => $silent);
unshift @_, $inpath;
goto &convert_spreadsheet;
}
}
sub doread($$) {
my ($opts, $inpath) = @_;
sheet undef;
eq_deeply(sheet(), undef) or confess "sheet() did not return undef";
eq_deeply(eval{my $dum=$num_cols},undef) or confess "num_cols unexpectedly valid";
read_spreadsheet {debug => $debug, verbose => $verbose, silent => $silent, %$opts}, $inpath;
confess "num_cols is not positive" unless $num_cols > 0;
}
# Test the various ways of specifying a sheet name
if ($can_cvt_spreadsheets) {
if ($can_extract_named_sheet) {
doread({}, $input_xlsx_path."!Sheet1"); verif_Sheet1();
doread({sheetname => "Sheet1"}, $input_xlsx_path); verif_Sheet1;
doread({sheetname => "Sheet1"}, $input_xlsx_path."!Sheet1"); verif_Sheet1;
doread({sheetname => "Another Sheet"}, $input_xlsx_path."!Another Sheet"); verif_Another_Sheet;
} else {
warn "# Skipping extract-by-sheetname because soffice is too old\n" unless $silent;
}
if ($can_extract_allsheets) {
# Extract all sheets
my $dirpath = Path::Tiny->tempdir;
doconvert(outpath => $dirpath, allsheets => 1, inpath => $input_xlsx_path, cvt_to => "csv");
say dvis '###BBB $dirpath ->children : ',avis($dirpath->children) if $debug;
my $got = [sort map{$_->basename} $dirpath->children];
my $exp = [sort "Sheet1.csv", "Another Sheet.csv"];
eq_deeply($got, $exp) or die dvis 'Missing or extra sheets: $got $exp';
} else {
warn "# Skipping 'allsheets' test because soffice is too old\n" unless $silent;
}
# Round-trip csv -> ods -> csv check
{
my $testcsv_path = $tlib->child("Sheet1_unquoted.csv");
my $exp_chars
= $testcsv_path->slurp({binmode => ":raw:encoding(UTF-8):crlf"});
my $h1 = doconvert(inpath => $testcsv_path->canonpath(), cvt_to => "ods");
my $h2 = doconvert(inpath => $h1->{outpath}, cvt_to => "csv");
my $got_chars = path($h2->{outpath})->slurp({binmode => ":raw:encoding(UTF-8):crlf"});
if ($got_chars eq $exp_chars) {
say "> Round-trip csv->ods->csv succeeded!" unless $silent;
} else {
$Data::Dumper::Interp::Foldwidth = 20;
die "Round-trip csv->ods->csv mismatch:\n",
ivis('Original: $exp_chars\n'),
ivis('Result: $got_chars\n') ;
}
}
} else {
warn "# Spreadsheet tests skipped because soffice is not installed\n" unless $silent;
}
########################################################################
# Do lots of tests using specified test data.
# This is called twice, once with real data and again with empty data
# (the latter to verify that logically-empty files i.e. BOM only, work)
########################################################################
sub do_encoding_tests($) {
my $exp_chars = shift;
confess dvis 'Unexpected CR in $exp_chars' if $exp_chars =~ /\x{0D}/;
my $exp_CRLFchars = $exp_chars =~ s/\n/\x0D\x0A/gr;
my $emptydata = $exp_chars eq "";
# Well, we can't prevent CRLF line endings being written on Windows
# (when writing with :crlf). This prevents comparing :crlf-written
# results with our golden files written on Linux.
#
# Q: Should CSVs always be written *without* :crlf to be in *nix form??
# A?: Allow :crlf to be specifed in an arg, e.g. output_binmode ?
#
# (OLD:)So first convert the test data .csv
# to "local" line endings so it will match.
my $local_testcsv_UTF8 = Path::Tiny->tempfile("local_testcsv_UTF8_XXXXX");
$local_testcsv_UTF8->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_chars);
(my $local_testcsv_UTF8CRLF = Path::Tiny->tempfile("local_testcsv_UTF8CRLF_XXXXX"))
->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_CRLFchars);
(my $local_testcsv_UTF16 = Path::Tiny->tempfile("local_testcsv_UTF16_XXXXX"))
->spew({binmode => ":raw:encoding(UTF-16)"}, $exp_chars);
(my $local_testcsv_UTF16BECRLF = Path::Tiny->tempfile("local_testcsv_UTF16BECRLF_XXXXX"))
->spew({binmode => ":raw:encoding(UTF-16BE)"}, $exp_CRLFchars);
(my $local_testcsv_UTF16LE = Path::Tiny->tempfile("local_testcsv_UTF16LE_XXXXX"))
->spew({binmode => ":raw:encoding(UTF-16LE)"}, $exp_chars);
my $local_testcsv_UTF8CRLFWithBOM = Path::Tiny->tempfile("local_testcsv_UTF8CRLFWithBOM_XXXXX");
{
$local_testcsv_UTF8CRLF->spew({binmode => ":raw:encoding(UTF-8)"}, $exp_CRLFchars);
$local_testcsv_UTF8CRLFWithBOM->spew({binmode => ":raw:encoding(UTF-8)"}, "\N{U+feff}");
$local_testcsv_UTF8CRLFWithBOM->append_utf8($exp_CRLFchars);
}
# Confirm that conflicting specs are caught
eval{my $dum=read_spreadsheet {sheetname => "Sheet1", verbose => $verbose}, $input_xlsx_path."!Another Sheet" };
die "Conflicting sheetname opt and !suffix not caught" if $@ eq "";
# "Read" a csv; should be a pass-thru without conversion when possible
foreach (
# path exp_passthru input_encoding output_encoding
[$local_testcsv_UTF8, 1, undef ,undef ],
[$local_testcsv_UTF8CRLF, 1, undef ,undef ],
[$local_testcsv_UTF8, 0, undef ,"UTF-16BE" ],
[$local_testcsv_UTF8CRLFWithBOM,0, undef ,undef ],
[$local_testcsv_UTF8CRLFWithBOM,0,"UTF-32" ,undef ], # BOM overrides
[$local_testcsv_UTF16, 0,"UTF-32" ,undef ], # BOM overrides
[$local_testcsv_UTF16BECRLF, 0,"UTF-32,UTF-16BE" ,undef ], # no BOM
[$local_testcsv_UTF16LE, 1,"UTF-16LE" ,"UTF-16LE" ], # no BOM
[$local_testcsv_UTF16BECRLF, 1,"UTF-16BE" ,"UTF-16BE" ], # no BOM
[$local_testcsv_UTF16BECRLF, (!$emptydata),"UTF-32,UTF-16BE" ,"UTF-16BE" ], # no BOM
[$local_testcsv_UTF16LE, 0,"UTF-16LE,UTF-32" ,"UTF-32" ], # no BOM
[$local_testcsv_UTF16LE, 0,"UTF-16LE,UTF-32" ,"UTF-16BE" ], # no BOM
) {
my ($input_csvpath, $exp_passthru, $input_enc, $output_enc) = @$_;
my @inenc_opts = ($input_enc ? (input_encoding => $input_enc ) : ());
my @outenc_opts = ($output_enc ? (output_encoding => $output_enc) : ());
warn "--- Expect passthru=",!!$exp_passthru," for $input_csvpath ienc=",u($input_enc)," oenc=",u($output_enc),"\n"
if $verbose;
### This is sometimes failing to detect $local_testcsv as a CSV on Solaris ;
### try to show enough information to debug it...
my sub _passthru_test(@) {
my %debug_opts = @_;
oops unless exists($debug_opts{debug});
oops unless exists($debug_opts{verbose});
# Call convert_spreadsheet() directly, passing inpath as an option
warn "#------------ _passthru_test ---(exp=$exp_passthru)--------\n" if $debug_opts{debug};
warn "#------------ ",qsh($input_csvpath),"\n" if $debug_opts{debug};
#warn "Using tempdir => /tmp/J ...\n";
my $h_cs = convert_spreadsheet(
silent => $silent,
inpath => $input_csvpath, cvt_to => 'csv',
#tempdir => "/tmp/J", ####TEMP TEMP TEMP
@inenc_opts, @outenc_opts, %debug_opts);
my $got_passthru = ($h_cs->{outpath} eq $input_csvpath);
if (!!$got_passthru ne !!$exp_passthru) {
die( ($exp_passthru
? "Expected null conversion but output is different"
: "Expected differing output but got pass-thru"),
"\n",
qx/(set -x; ls -ld $input_csvpath) 2>&1/,
qx/(set -x; ls -ld $h_cs->{outpath}) 2>&1/,
qx/(set -x; od -t x1a $h_cs->{outpath}) 2>&1/,
" "
);
}
# Re-read the file ourself and check the data
my $input_data = do{
my ($benc,$spill) = open_bom(my $inFH, $input_csvpath, ":raw");
oops if $spill;
my $enc = $benc || ($input_csvpath =~ s/.*UTF/UTF-/r =~ s/WithBOM|CRLF|_.*//gr);
binmode($inFH, ":raw:encoding($enc):crlf") or die $!;
warn dvis '##BBB $input_csvpath $enc $benc $spill\n' if $debug;
local $/; <$inFH>
};
my $reslurp_data = do{
my $oenc = $h_cs->{encoding} // die "{encoding} not set in result";
path($h_cs->{outpath})->slurp( {binmode => ":raw:encoding($oenc):crlf"} );
};
unless ($input_data eq $reslurp_data) {
die dvis 'Data from slurp with result encoding does not match!\n'
.'$input_csvpath $h_cs @inenc_opts @outenc_opts\n\n'
.'$input_data\n\n'
.'$reslurp_data\n'
}
# Try OpenAsCsv, which by the way calls convert_spreadsheet with a
# separate inpath arg. It returns a file descriptor to read the data.
my $h_oac = OpenAsCsv(inpath => $input_csvpath,
@inenc_opts, @outenc_opts, %debug_opts);
die dvis 'OpenAsCsv result {inpath} wrong\n$h_oac\n$h_cs'
unless $h_oac->{inpath} eq $input_csvpath;
die dvis 'OpenAsCsv result {encoding} wrong\n$h_oac\n$h_cs'
unless $h_oac->{encoding} eq $h_cs->{encoding};
my $oac_fh_data = do{
my $fh = $h_oac->{fh};
local $/;
<$fh>
};
die dvis 'Data from OpenAsCsv result fh does not match!\n'
.'$input_csvpath $h_oac\n@inenc_opts @outenc_opts\n $input_data\n$oac_fh_data'
unless $input_data eq $oac_fh_data;
# Try via read_spreadsheet
read_spreadsheet {@inenc_opts, %debug_opts}, $input_csvpath;
warn qx/set -x; hexdump -C $input_csvpath/ if $debug && $^O eq 'linux';
warn dvis '## @${\sheet()}' if $debug;
if ($emptydata) {
die "Expected empty spreadsheet ($input_csvpath)" unless @rows == 0;
} else {
verif_Sheet1 "($input_csvpath)";
}
};
eval { _passthru_test(debug => $debug, verbose => $verbose) };
if ($@) {
warn __FILE__,":",__LINE__," - failed: $@\n";
die "died" if $debug && $verbose;
warn "RE_TRYING WITH DEBUG...\n";
$Carp::Verbose = 1; # redundant with Carp::Always
require Carp::Always;
Carp::Always->import();
_passthru_test(debug => 1, verbose => 1);
die "should have died by now";
}
print "> Passthru test ok for $input_csvpath ienc=",u($input_enc)," oenc=",u($output_enc),"\n"
unless $silent;
}
{
( run in 0.627 second using v1.01-cache-2.11-cpan-5a3173703d6 )