Spreadsheet-Read
view release on metacpan or search on metacpan
use warnings;
our $VERSION = "0.95";
sub Version { $VERSION }
use Carp;
use Exporter;
our @ISA = qw( Exporter );
our @EXPORT = qw( ReadData cell2cr cr2cell );
our @EXPORT_OK = qw( parses rows cellrow row add );
use Encode qw( decode );
use List::Util qw( min max );
use File::Temp qw( );
use Data::Dumper;
my @parsers = (
[ csv => "Text::CSV_XS", "0.71" ],
[ csv => "Text::CSV_PP", "1.17" ],
[ csv => "Text::CSV", "1.17" ],
[ ods => "Spreadsheet::ParseODS", "0.26" ],
[ ods => "Spreadsheet::ReadSXC", "0.26" ],
[ sxc => "Spreadsheet::ParseODS", "0.26" ],
[ sxc => "Spreadsheet::ReadSXC", "0.26" ],
[ sxc => "Spreadsheet::ReadSXC__BAD", "0.26" ], # For testing
[ xls => "Spreadsheet::ParseExcel", "0.34" ],
[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
[ xlsm => "Spreadsheet::ParseXLSX", "0.24" ],
[ xlsx => "Spreadsheet::XLSX", "0.13" ],
[ xlsx => "Excel::ValueReader::XLSX", "1.13" ],
# [ prl => "Spreadsheet::Perl", "" ],
[ sc => "Spreadsheet::Read", "0.01" ],
[ gnumeric => "Spreadsheet::ReadGnumeric", "0.2" ],
[ zzz1 => "Z10::Just::For::Testing", "1.23" ],
[ zzz2 => "Z20::Just::For::Testing", "" ],
[ zzz3 => "Z30::Just::For::Testing", "1.00" ],
# Helper modules
[ ios => "IO::Scalar", "" ],
[ dmp => "Data::Peek", "" ],
);
my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
foreach my $p (@parsers) {
my $format = $p->[0];
$can{$format} and next;
$can{$format} = "";
my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
my $min_version = $can{supports}{$preset};
unless ($min_version) {
# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
$can{$format} = "!$preset is not supported for the $format format";
next;
}
if (eval "local \$_; require $preset" and not $@) {
# forcing a parser should still check the version
my $ok;
my $has = $preset->VERSION;
$has =~ s/_[0-9]+$//; # Remove beta-part
if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
my @min = split m/\./ => $1;
$has =~ s/^v//;
my @has = split m/\./ => $has;
$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
(($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
}
else { # normal versions
$ok = $has >= $min_version;
}
$ok or $preset = "!$preset";
}
else {
$preset = "!$preset";
}
$can{$format} = $preset;
}
delete $can{supports};
foreach my $p (@parsers) {
my ($flag, $mod, $vsn) = @$p;
$can{$flag} and next;
eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" and next;
$p->[0] = "! Cannot use $mod version $vsn: $@";
$can{$flag} = $@ =~ m/need to install|can(?:not|'t) locate/i
? 0 # Not found
: ""; # Too old
}
$can{sc} = __PACKAGE__; # SquirrelCalc is built-in
# Define ->get_active_sheet if not defined (yet)
sub _def_gas {
for ([ 0.61, $Spreadsheet::ParseExcel::VERSION, *Spreadsheet::ParseExcel::Workbook::get_active_sheet ],
[ 0.25, $Spreadsheet::ParseODS::VERSION, *Spreadsheet::ParseODS::Workbook::get_active_sheet ],
[ 9.99, $Excel::ValueReader::XLSX::VERSION, *Excel::ValueReader::XLSX::get_active_sheet ],
) {
my ($mv, $v, $cb) = @$_;
defined $v && $v < $mv or next;
defined $cb && defined *{$cb}{CODE} and next;
*{$cb} = sub { undef };
}
} # _def_gas
my $debug = 0;
my %def_opts = (
rc => 1,
cells => 1,
attr => 0,
clip => undef, # $opt{cells};
strip => 0,
pivot => 0,
dtfmt => "yyyy-mm-dd", # Format 14
debug => 0,
passwd => undef,
parser => undef,
sep => undef,
quote => undef,
label => undef,
merge => 0,
);
my @def_attr = (
type => "text",
fgcolor => undef,
bgcolor => undef,
font => undef,
}
}
}
}
for (@{$sheet{cell}}) {
defined or $_ = [];
}
push @data, { %sheet };
if ($sheet{label} eq "-- unlabeled --") {
$sheet{label} = "";
}
else {
$data[0]{sheet}{$sheet{label}} = $#data;
}
}
return _clipsheets \%opt, [ @data ];
}
if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
: $io_fil
? $txt =~ m/\.sc$/
: $txt =~ m/^# .*SquirrelCalc/) {
if ($io_ref) {
local $/;
my $x = <$txt>;
$txt = $x;
}
elsif ($io_fil) {
local $/;
open my $sc, "<", $txt or return;
$txt = <$sc>;
close $sc;
}
$txt =~ m/\S/ or return;
my $label = defined $opt{label} ? $opt{label} : "sheet";
my @data = (
{ type => "sc",
parser => "Spreadsheet::Read",
version => $VERSION,
parsers => [{
type => "sc",
parser => "Spreadsheet::Read",
version => $VERSION,
}],
error => undef,
sheets => 1,
sheet => { $label => 1 },
},
{ parser => 0,
label => $label,
maxrow => 0,
maxcol => 0,
cell => [],
attr => [],
merged => [],
active => 1,
hidden => 0,
},
);
for (split m/\s*[\r\n]\s*/, $txt) {
if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) {
@{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
next;
}
s/^r([0-9]+)c([0-9]+)\s*=\s*// or next;
my ($c, $r) = map { $_ + 1 } $2, $1;
if (m/.* \{(.*)}$/ or m/"(.*)"/) {
my $cell = cr2cell ($c, $r);
$opt{rc} and $data[1]{cell}[$c][$r] = $1;
$opt{cells} and $data[1]{$cell} = $1;
$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
next;
}
# Now only formula's remain. Ignore for now
# r67c7 = [P2L] 2*(1000*r67c5-60)
}
for (@{$data[1]{cell}}) {
defined or $_ = [];
}
return _clipsheets \%opt, [ @data ];
}
if ($opt{parser} ? _parser ($opt{parser}) eq "gnumeric"
: _txt_is_xml ($txt, "http://www.gnumeric.org/v10.dtd")) {
$can{gnumeric} or croak _missing_parser ("gnumeric");
my $gnm = $can{gnumeric}->new (%parser_opts,
attr => $opt{attr},
cells => $opt{cells},
merge => $opt{merge},
rc => $opt{rc},
gzipped_p => $opt{gzipped_p});
return _clipsheets \%opt, $gnm->parse ($txt);
}
if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
: ($txt =~ m/^<\?xml/ or -f $txt)) {
$can{sxc} or croak _missing_parser ("SXC");
ref $txt && $can{sxc}->VERSION <= 0.23 and
croak ("Sorry, references as input are not supported by Spreadsheet::ReadSXC before 0.23");
my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
my $sxc;
if ($txt =~ m/\.(sxc|ods)$/i) {
$debug and print STDERR "Opening \U$1\E $txt $using\n";
$debug and print STDERR __FILE__, "#", __LINE__, "\n";
$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
}
# treat all refs as a filehandle
elsif (ref $txt) {
$debug and print STDERR "Opening SXC filehandle\n";
$sxc = Spreadsheet::ReadSXC::read_sxc_fh ($txt, $sxc_options) or return;
}
elsif ($txt =~ m/\.xml$/i) {
$debug and print STDERR "Opening XML $txt $using\n";
$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
}
# need to test on pattern to prevent stat warning
( run in 2.548 seconds using v1.01-cache-2.11-cpan-71847e10f99 )