Spreadsheet-Read

 view release on metacpan or  search on metacpan

scripts/xlscat  view on Meta::CPAN

	"       -n [skip]   Number lines (prefix with column number)",
	"                   optionally skip <skip> (header) lines",
	"       -A          Show field attributes in ANSI escapes",
	"       -h[#]       Show # header lines",
	"       -D          Dump each record with Data::Peek or Data::Dumper",
	"        --hash     Like -D but as hash with first row as keys",
	"    Output CSV:",
	"       -c          Output CSV, separator = ','",
	"       -m          Output CSV, separator = ';'",
	$is_grep ? (
	"    Grep options:",
	"       -i          Ignore case",
	"       -w          Match whole words only") : (
	"    Output Index only:",
	"       -i          Show sheet names and size only"),
	"    Output HTML:",
	"       -H          Output HTML",
	"    Selection:",
	"       -S <sheets> Only print sheets <sheets>. 'all' is a valid set",
	"                   Default only prints the first sheet",
	"       -R <rows>   Only print rows    <rows>. Default is 'all'",
	"                   Ranges and lists supported as 2,4-7,8-",
	"                   Trailing - is to end of data",
	"                   Negative rows count from tail -8--2 is allowed",
	"        --head[=n] Alias for -R1..n   where n defaults to 10",
	"        --tail[=n] Alias for -R-n-    where n defaults to 10",
	"        --first=C  Only show rows where column C is a 'new' value",
	"                   (suppress rows where value in column C is seen",
	"                    before in column C). That includes empty fields",
	"       -C <cols>   Only print columns <cols>. Default is 'all'",
	"       -F <flds>   Only fields <flds> e.g. -FA3,B16",
	"    Ordering (column numbers in result set *after* selection):",
	"       --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)",
	"                   +#   - first # lines do not sort (header)",
	"                   #    - order on column # lexical ascending",
	"                   #n   - order on column # numeric ascending",
	"                   #r   - order on column # lexical descending",
	"                   #rn  - order on column # numeric descending",
	"",
	"Examples:",
	"    xlscat   -i foo.xls",
	"    xlscat   --in-sep=: --sort=3n -L /etc/passwd",
	"    xlsgrep  pattern file.ods",
	"";
    @_ and print join "\n", @_, "";
    exit $err;
    } # usage

use Encode     qw( encode decode );
use List::Util qw( first );
use Data::Dumper;
use Spreadsheet::Read;
use Getopt::Long qw(:config bundling noignorecase passthrough);
my $opt_c;		# Generate CSV
my $opt_F = "";		# Fields to print
my $opt_v = 0;		# Verbosity for xlscat
my $opt_d = 0;		# Debug level for Spreadsheet::Read
my $opt_h = 0;		# Number of header lines for grep or -L
my $opt_D = 0;		# Dump: 0 = none, 1 = array, 2 = hash
my $clip  = 1;
my $enc_bom;		# CSV input encoding derived from BOM
my $enc_i;		# Input  encoding
my $enc_o;		# Output encoding
GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { print "$CMD [$VERSION] Spreadsheet::Read [$Spreadsheet::Read::VERSION]\n"; exit 0; },
      "list"		=> sub { list_parsers (); },

    # Input CSV
    "c|csv"		=> sub { $opt_c = "," },
    "m|ms"		=> sub { $opt_c = ";" },
    "insepchar".
     "|in-sep".
     "|in-sep-char=s"	=> \my $sep,	# Input field sep for CSV

    # Input XLS
    "dtfmt".
     "|date-format=s"	=> \my $dtfmt,	# Default date-format for Excel
    "f|formulas!"	=> \my $opt_f,	# Show the formula instead of the value
      "password=s"	=> \my $passwd,	# For encrypted spreadsheets

    # Output
    "i|index".
     "|ignore-case!"	=> \my $opt_i,	# Index (cat) | ignore_case (grep)
    "s|separator|sep".
     "|outsepchar".
     "|out-sep".
     "|out-sep-char=s"	=> \my $opt_s,	# Text separator
    "S|sheets=s"	=> \my $opt_S,	# Sheets to print
    "R|rows=s"		=> \my $opt_R,	# Rows to print
      "head:10"		=> \my $opt_head,
      "tail:10"		=> \my $opt_tail,
      "first=s"		=> \my $opt_first,
    "C|columns=s"	=> \my $opt_C,	# Columns to print
    "F|fields=s"	=> \   $opt_F,
    "L|fit|align!"	=> \my $opt_L,	# Auto-size/align columns
    "B|box|frame!"	=> \my $opt_B,	# Align and add outer frame
    "P|pivot!"		=> \my $pivot,
    "n|number:0"	=> \my $opt_n,	# Prefix lines with column number
    "A|ansi|color!"	=> \my $opt_A,	# Show field colors in ANSI escapes
    "u|unformatted!"	=> \my $opt_u,	# Show unformatted values
    "v|verbose:1"	=> \$opt_v,
    "d|debug:1"		=> \$opt_d,
    "D|dump!"		=> \   $opt_D,	# Use Data::Peek or Data::Dumper
      "hash!"		=> sub { $opt_D = 2 },
    "H|html:1"		=> \my $opt_H,	# Output in HTML
      "noclip"		=> sub { $clip = 0 },
      "strip:3"		=> \my $strip,
      "clip=i"		=> \my $clip_len,
      "sort=s"		=> \my $sort_order,
      "no-empty"	=> \my $skip_empty,
    "N|no-nl:s"		=> \my $opt_N,

    # Encoding
    "e|encoding=s"	=> sub { $enc_i = $enc_o = $_[1] },
    "b|encoding-in=s"	=> \$enc_i,
    "a|encoding-out=s"	=> \$enc_o,
    "U|utf-8|utf8"	=> sub { $enc_o = "utf-8" },

    # Grep
    "w|word!"		=> \my $opt_w,	# Grep words

scripts/xlscat  view on Meta::CPAN

$opt_i && $opt_S and usage 1, "Options i and S are mutually exclusive";
$opt_i && $opt_R and usage 1, "Options i and R are mutually exclusive";
$opt_i && $opt_C and usage 1, "Options i and C are mutually exclusive";
$opt_i && $opt_F and usage 1, "Options i and F are mutually exclusive";
$opt_i && $opt_H and usage 1, "Options i and H are mutually exclusive";
}
$opt_c && $opt_H and usage 1, "Options c and H are mutually exclusive";
$opt_s && $opt_H and usage 1, "Options s and H are mutually exclusive";

my %e = (a => "\a", b => "\b", e => "\e", f => "\f", n => "\n", r => "\r", t => "\t");
$opt_s and $opt_s =~ s/\\+([abefnrt])/$e{$1}/g;
defined $opt_S or $opt_S = $opt_i || $is_grep ? "all" : "1";
defined $opt_N && !length $opt_N  and $opt_N = " ";
$opt_i && !$is_grep && $opt_v < 1 and $opt_v = 1;
$opt_f and $opt_A = 1;

if ($opt_c) {
    $opt_L = 0;	# Cannot align CSV
    if ($opt_s) {
	$opt_c = $opt_s;
	$opt_s = undef;
	}
    $opt_c =~ m/^1?$/ and $opt_c = ",";
    $opt_c = Text::CSV_XS->new ({
	binary   => 1,
	sep_char => $opt_c,
	eol      => "\r\n",
	});
    }

# Debugging. Prefer Data::Peek over Data::Dumper if available
{   my $dp = eval { require Data::Peek; 1 };
    sub ddumper {
	$dp ? Data::Peek::DDumper (@_)
	    : print STDERR Dumper (@_);
	} # ddumper
    }

my @RDarg;
for (reverse 0 .. $#ARGV) {
    $ARGV[$_] =~ m/^--([-\w]+)(?:=(.*))?$/ or next;
    push @RDarg, $1, defined $2 ? $2 : 1;
    $RDarg[-2] =~ tr/-/_/;
    splice @ARGV, $_, 1;
    }

my $pattern;
if ($is_grep) {
    $pattern = shift or usage 1;
    $opt_w and $pattern = "\\b$pattern\\b";
    $opt_i and $pattern = "(?i:$pattern)";
    $pattern = qr{$pattern};
    $opt_v > 1 and warn "Matching on $pattern\n";
    }

my $file = shift;
if (defined $file and $file ne "-") {
    $opt_v > 1 and warn "Using $file as input\n";
    -f $file or usage 1, "the file argument is not a regular file";
    -s $file or usage 1, "the file is empty";
    if ($file =~ m/\.csv$/i and open my $fh, "<", $file) {	# Auto-BOM
	my $l = <$fh>;
	close $fh;
	   if ($l =~ s/^\x00\x00\xfe\xff//) { $enc_bom = "utf-32be"   }
	elsif ($l =~ s/^\xff\xfe\x00\x00//) { $enc_bom = "utf-32le"   }
	elsif ($l =~ s/^\xfe\xff//)         { $enc_bom = "utf-16be"   }
	elsif ($l =~ s/^\xff\xfe//)         { $enc_bom = "utf-16le"   }
	elsif ($l =~ s/^\xef\xbb\xbf//)     { $enc_bom = "utf-8"      }
	elsif ($l =~ s/^\xf7\x64\x4c//)     { $enc_bom = "utf-1"      }
	elsif ($l =~ s/^\xdd\x73\x66\x73//) { $enc_bom = "utf-ebcdic" }
	elsif ($l =~ s/^\x0e\xfe\xff//)     { $enc_bom = "scsu"       }
	elsif ($l =~ s/^\xfb\xee\x28//)     { $enc_bom = "bocu-1"     }
	elsif ($l =~ s/^\x84\x31\x95\x33//) { $enc_bom = "gb-18030"   }
	elsif ($l =~ s/^\x{feff}//)         { $enc_bom = ""           }

	if ($enc_bom and open $fh, "<:encoding($enc_bom)", $file) {
	    $opt_v > 1 and warn "Opened $file with encoding $enc_bom after BOM detection\n";
	    read $fh, (my $bom), 1;	# Skip BOM
	    $file = $fh;
	    push @RDarg, parser => "CSV";
	    }
	}
    }
else {
    $opt_v > 1 and warn "Working as a pipe\n";
    $file = *ARGV;
    }

if ($opt_c) {
    Spreadsheet::Read::parses ("csv") or die "No CSV module found\n";
    eval q{use Text::CSV_XS};
    }
if ($opt_H) {
    $enc_o = "utf-8";
    $opt_H = sub { $_[0]; };
    eval q{
	use HTML::Entities;
	$opt_H = sub {
	    encode_entities (Encode::is_utf8 ($_[0]) ? $_[0] :
		decode ("utf-8", $_[0]));
	    };
	};
    }

if ($sep) {
    my %sep = (
	tab		=> "\t",
	pipe		=> "|",
	colon		=> ":",
	semicolon	=> ";",
	comma		=> ",",
	);
    $sep = $sep{lc $sep} || $sep;
    }

push @RDarg, debug => $opt_d, clip => $clip;
$opt_A         and push @RDarg, attr  => 1;
defined $sep   and push @RDarg, sep   => $sep, parser => "csv";
defined $dtfmt and push @RDarg, dtfmt => $dtfmt;
$strip         and push @RDarg, strip => $strip;
$pivot         and push @RDarg, pivot => $pivot;
if ($passwd) {
    if ($passwd eq "-") {
	print STDERR "Password: ";
	eval "use Term::ReadKey;";
	eval { ReadMode ("noecho"); };
	chomp ($passwd = <STDIN>);
	eval { ReadMode ("echo");   };
	}
    push @RDarg, passwd => $passwd;
    }
$opt_v > 4 and warn "ReadData ($file, @RDarg);\n";
my $xls = eval { ReadData ($file, @RDarg) };
$opt_v > 7 and ddumper ($xls);
unless ($xls) {
    warn "cannot read $file\n";
    if ($@) {
	my $e = $@;

scripts/xlscat  view on Meta::CPAN

	$file, $si, $name_len, $sn, $nc, $nr, $active, $hidden, $eolt;
    $opt_i && !$is_grep and next;

    if (@opt_F) {
	foreach my $fld (@opt_F) {
	    $is_grep && defined $s->{$fld} && $s->{$fld} !~ $pattern and next;
	    print "$fld:",$s->{$fld},"\n";
	    }
	next;
	}

    if (my $rows = $opt_R) {
	$rows eq "all" and $rows = "1..$nr";	# all
	$rows =~ s/--(\d+)/-($nr+1-$1)/ge;	# 3--2
	$rows =~ s/-(\d+)(?=-)/($nr+1-$1)/ge;	# -3-
	$rows =~ s/-$/-$nr/;			# 3,6-
	$rows =~ s/-/../g;
	eval "%{\$print{row}} = map { \$_ => 1 } $rows";
	}
    if (my $cols = $opt_C) {
	$cols eq "all" and $cols = "1..$nc";	# all
	if ($cols =~ m/[A-Za-z]/) {		# -C B,D => -C 2,4
	    my %ct = map {
		my ($cc, $rr) = cell2cr (uc "$_".1);
		($_ => $cc)
		} ($cols =~ m/([a-zA-Z]+)/g);
	    $cols =~ s/([A-Za-z]+)/$ct{$1}/g;
	    }
	$cols =~ s/-$/-$nc/;			# 3,6-
	$cols =~ s/-/../g;
	eval "\$print{col} = [ map { \$_ - 1  } $cols ]";
	$nc = @{$print{col}};
	}
    $opt_v >= 8 and ddumper (\%print);

    $opt_H and print qq{<h2>$sn</h2>\n\n<table border="1">\n};
    my $undef = $opt_v > 2 ? "-- undef --" : "";
    my ($h, @w) = (0, (0) x $nc); # data height, -width, and default column widths
    my @align = ("") x $nc;
    foreach my $r ($r[0] .. $r[1]) {
	exists $print{row} && !exists $print{row}{$r} and next;
	my @att;
	my @row = map {
	    my $cell = cr2cell ($_, $r);
	    my ($uval, $fval) = map {
		defined $_ ? $enc_i ? decode ($enc_i, $_) : $_ : $undef
		} $s->{cell}[$_][$r], $s->{$cell};
	    if (defined $opt_N) {
		s/\n/$opt_N/go for grep { defined } $uval, $fval;
		}
	    if ($clip_len) {
		$_ = substr $_, 0, $clip_len - 1 for grep { length > $clip_len } $uval, $fval;
		}
	    $opt_v > 2 and warn "$_:$r '$uval' / '$fval'\n";
	    $opt_A and
		push @att, [ @{$s->{attr}[$_][$r]}{qw( fgcolor bgcolor bold uline halign )} ];
	    $opt_f && $s->{attr}[$_][$r]{formula}
		? "=".$s->{attr}[$_][$r]{formula}
		: defined $s->{cell}[$_][$r] ? $opt_u ? $uval : $fval : "";
	    } $c[0] .. $c[1];
	$r == 1 && $row[0] && defined $enc_bom and $row[0] =~ s/^\N{BOM}//;
	exists $print{col} and @row = @row[grep{$_<@row}@{$print{col}}];
	$is_grep && $r > $opt_h &&
	    ! first { defined $_ && $_ =~ $pattern } @row and next;
	$skip_empty && ! first { length } @row and next;
	if ($opt_first) {
	    @row >= $opt_first && $first{$row[$opt_first - 1]}++ and next;
	    }
	if ($opt_D) {
	    ddumper ($opt_D == 1 ? \@row :
		{ map { $s->{cell}[$_ + 1][1] => $row[$_] } 0 .. $#row });
	    next;
	    }
	if ($opt_H) {	# HTML
	    print "  <tr>";
	    if (defined $opt_n) {
		my $x = $r - $opt_n;
		$x <= 0 and $x = "";
		my $c = $r % 2 ? qq{ class="odd"} : "";
		print qq{<td style="text-align: right" $c>$x</td>};
		}
	    foreach my $c (0 .. $#row) {
		my $css = css_color (@{$att[$c]});
		$r % 2 and $css .= qq{ class="odd"};
		my $td  = $opt_H->($row[$c]);
		print "<td$css>$td</td>";
		}
	    print "</tr>\n";
	    next;
	    }
	if (defined $opt_n) {
	    unshift @row, $r;
	    unshift @att, [ "#ffffff", "#000000", 0, 0 ];
	    }
	if ($opt_L || $sort_order) {	# Autofit / Align / order
	    push @data, [ [ @row ], [ @att ] ];
	    next;
	    }
	if ($opt_c) {	# CSV
	    $opt_c->print (*STDOUT, \@row) or die $opt_c->error_diag;
	    next;
	    }
	if ($opt_A) {
	    foreach my $c (0 .. $#row) {
		$row[$c] =
		    ansi_color (@{$att[$c]}).
		    $row[$c] .
		    "\e[0m";
		}
	    }
	line (0, $opt_s => @row);
	} continue {
	    ++$h % 100 == 0 && $opt_v && $v_fmt and printf STDERR $v_fmt, $nc, $h, "\r";
	    }
    $opt_H and print "  </table>\n\n";
    $v_fmt && $v_fmt and printf STDERR $v_fmt, $nc, $h, "\n";
    if ($sort_order) {
	my @o;
	my @h;
	$sort_order =~ s/\+([0-9]+)\b// and @h = splice @data, 0, $1;
	for ($sort_order =~ m/([0-9]+[rn]*)/g) {



( run in 0.493 second using v1.01-cache-2.11-cpan-39bf76dae61 )