Spreadsheet-Read

 view release on metacpan or  search on metacpan

Read.pm  view on Meta::CPAN

use warnings;

our $VERSION = "0.95";
sub  Version { $VERSION }

use Carp;
use Exporter;
our @ISA       = qw( Exporter );
our @EXPORT    = qw( ReadData cell2cr cr2cell );
our @EXPORT_OK = qw( parses rows cellrow row add );

use Encode       qw( decode );
use List::Util   qw( min max );
use File::Temp   qw( );
use Data::Dumper;

my @parsers = (
    [ csv	=> "Text::CSV_XS",		"0.71"	],
    [ csv	=> "Text::CSV_PP",		"1.17"	],
    [ csv	=> "Text::CSV",			"1.17"	],
    [ ods	=> "Spreadsheet::ParseODS",	"0.26"	],
    [ ods	=> "Spreadsheet::ReadSXC",	"0.26"	],
    [ sxc	=> "Spreadsheet::ParseODS",	"0.26"	],
    [ sxc	=> "Spreadsheet::ReadSXC",	"0.26"	],
    [ sxc	=> "Spreadsheet::ReadSXC__BAD",	"0.26"	], # For testing
    [ xls	=> "Spreadsheet::ParseExcel",	"0.34"	],
    [ xlsx	=> "Spreadsheet::ParseXLSX",	"0.24"	],
    [ xlsm	=> "Spreadsheet::ParseXLSX",	"0.24"	],
    [ xlsx	=> "Spreadsheet::XLSX",		"0.13"	],
    [ xlsx	=> "Excel::ValueReader::XLSX",	"1.13"	],
#   [ prl	=> "Spreadsheet::Perl",		""	],
    [ sc	=> "Spreadsheet::Read",		"0.01"	],
    [ gnumeric	=> "Spreadsheet::ReadGnumeric",	"0.2"	],

    [ zzz1	=> "Z10::Just::For::Testing",	"1.23"	],
    [ zzz2	=> "Z20::Just::For::Testing",	""	],
    [ zzz3	=> "Z30::Just::For::Testing",	"1.00"	],

    # Helper modules
    [ ios	=> "IO::Scalar",		""	],
    [ dmp	=> "Data::Peek",		""	],
    );
my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
foreach my $p (@parsers) {
    my $format = $p->[0];
    $can{$format} and next;
    $can{$format} = "";
    my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
    my $min_version = $can{supports}{$preset};
    unless ($min_version) {
	# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
	$can{$format} = "!$preset is not supported for the $format format";
	next;
	}
    if (eval "local \$_; require $preset" and not $@) {
	# forcing a parser should still check the version
	my $ok;
	my $has = $preset->VERSION;
	$has =~ s/_[0-9]+$//;			# Remove beta-part
	if ($min_version =~ m/^v([0-9.]+)/) {	# clumsy versions
	    my @min = split m/\./ => $1;
	    $has =~ s/^v//;
	    my @has = split m/\./ => $has;
	    $ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
		  (($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
	    }
	else {	# normal versions
	    $ok = $has >= $min_version;
	    }
	$ok or $preset = "!$preset";
	}
    else {
	$preset = "!$preset";
	}
    $can{$format} = $preset;
    }
delete $can{supports};
foreach my $p (@parsers) {
    my ($flag, $mod, $vsn) = @$p;
    $can{$flag} and next;
    eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" and next;
    $p->[0] = "! Cannot use $mod version $vsn: $@";
    $can{$flag} = $@ =~ m/need to install|can(?:not|'t) locate/i
	? 0	# Not found
	: "";	# Too old
    }
$can{sc} = __PACKAGE__;	# SquirrelCalc is built-in

# Define ->get_active_sheet if not defined (yet)
sub _def_gas {
    for ([ 0.61, $Spreadsheet::ParseExcel::VERSION,  *Spreadsheet::ParseExcel::Workbook::get_active_sheet	],
	 [ 0.25, $Spreadsheet::ParseODS::VERSION,    *Spreadsheet::ParseODS::Workbook::get_active_sheet		],
	 [ 9.99, $Excel::ValueReader::XLSX::VERSION, *Excel::ValueReader::XLSX::get_active_sheet		],
	 ) {
	my ($mv, $v, $cb) = @$_;
	defined $v && $v < $mv or next;
	defined $cb && defined *{$cb}{CODE} and next;
	*{$cb} = sub { undef };
	}
    } # _def_gas

my $debug = 0;
my %def_opts = (
    rc      => 1,
    cells   => 1,
    attr    => 0,
    clip    => undef, # $opt{cells};
    strip   => 0,
    pivot   => 0,
    dtfmt   => "yyyy-mm-dd", # Format 14
    debug   => 0,
    passwd  => undef,
    parser  => undef,
    sep     => undef,
    quote   => undef,
    label   => undef,
    merge   => 0,
    );
my @def_attr = (
    type    => "text",
    fgcolor => undef,
    bgcolor => undef,
    font    => undef,

Read.pm  view on Meta::CPAN

			    }
			}
		    }
		}
	    for (@{$sheet{cell}}) {
		defined or $_ = [];
		}
	    push @data, { %sheet };
	    if ($sheet{label} eq "-- unlabeled --") {
		$sheet{label} = "";
		}
	    else {
		$data[0]{sheet}{$sheet{label}} = $#data;
		}
	    }
	return _clipsheets \%opt, [ @data ];
	}

    if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
		     : $io_fil
			 ? $txt =~ m/\.sc$/
			 : $txt =~ m/^# .*SquirrelCalc/) {
	if ($io_ref) {
	    local $/;
	    my $x = <$txt>;
	    $txt = $x;
	    }
	elsif ($io_fil) {
	    local $/;
	    open my $sc, "<", $txt or return;
	    $txt = <$sc>;
	    close   $sc;
	    }
	$txt =~ m/\S/ or return;
	my $label = defined $opt{label} ? $opt{label} : "sheet";
	my @data = (
	    {	type	=> "sc",
		parser	=> "Spreadsheet::Read",
		version	=> $VERSION,
		parsers	=> [{
		    type	=> "sc",
		    parser	=> "Spreadsheet::Read",
		    version	=> $VERSION,
		    }],
		error	=> undef,
		sheets	=> 1,
		sheet	=> { $label => 1 },
		},
	    {	parser	=> 0,
		label	=> $label,
		maxrow	=> 0,
		maxcol	=> 0,
		cell	=> [],
		attr	=> [],
		merged  => [],
		active  => 1,
		hidden	=> 0,
		},
	    );

	for (split m/\s*[\r\n]\s*/, $txt) {
	    if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) {
		@{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
		next;
		}
	    s/^r([0-9]+)c([0-9]+)\s*=\s*// or next;
	    my ($c, $r) = map { $_ + 1 } $2, $1;
	    if (m/.* \{(.*)}$/ or m/"(.*)"/) {
		my $cell = cr2cell ($c, $r);
		$opt{rc}    and $data[1]{cell}[$c][$r] = $1;
		$opt{cells} and $data[1]{$cell} = $1;
		$opt{attr}  and $data[1]{attr}[$c + 1][$r] = { @def_attr };
		next;
		}
	    # Now only formula's remain. Ignore for now
	    # r67c7 = [P2L] 2*(1000*r67c5-60)
	    }
	for (@{$data[1]{cell}}) {
	    defined or $_ = [];
	    }
	return _clipsheets \%opt, [ @data ];
	}

    if ($opt{parser} ? _parser ($opt{parser}) eq "gnumeric"
		     : _txt_is_xml ($txt, "http://www.gnumeric.org/v10.dtd")) {
	$can{gnumeric} or croak _missing_parser ("gnumeric");

	my $gnm = $can{gnumeric}->new (%parser_opts,
				       attr      => $opt{attr},
				       cells     => $opt{cells},
				       merge     => $opt{merge},
				       rc        => $opt{rc},
				       gzipped_p => $opt{gzipped_p});
	return _clipsheets \%opt, $gnm->parse ($txt);
	}

    if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
		     : ($txt =~ m/^<\?xml/ or -f $txt)) {
	$can{sxc} or croak _missing_parser ("SXC");

	ref $txt && $can{sxc}->VERSION <= 0.23 and
	    croak ("Sorry, references as input are not supported by Spreadsheet::ReadSXC before 0.23");

	my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
	my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
	my $sxc;
	if ($txt =~ m/\.(sxc|ods)$/i) {
	    $debug and print STDERR "Opening \U$1\E $txt $using\n";
	    $debug and print STDERR __FILE__, "#", __LINE__, "\n";
	    $sxc = Spreadsheet::ReadSXC::read_sxc      ($txt, $sxc_options) or return;
	    }
	# treat all refs as a filehandle
	elsif (ref $txt) {
	    $debug and print STDERR "Opening SXC filehandle\n";
	    $sxc = Spreadsheet::ReadSXC::read_sxc_fh   ($txt, $sxc_options) or return;
	    }
	elsif ($txt =~ m/\.xml$/i) {
	    $debug and print STDERR "Opening XML $txt $using\n";
	    $sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
	    }
	# need to test on pattern to prevent stat warning



( run in 2.548 seconds using v1.01-cache-2.11-cpan-71847e10f99 )