Spreadsheet-Edit
view release on metacpan or search on metacpan
lib/Spreadsheet/Edit/IO.pm view on Meta::CPAN
# confess dvis "($tag) fh is not open \$start_pos" unless $fh;
# my @layers = PerlIO::get_layers($fh);
# my $cur_pos = tell($fh);
# if (! seek($fh, $start_pos//0, SEEK_SET)) {
# btwN 1,dvis "($tag) **NOT SEEKABLE** \$cur_pos \@layers \$start_pos";
# } else {
# if ((my $n = read $fh, my $buffer, 128) != 0) {
# btwN 1,dvis "($tag) \$cur_pos \$start_pos \@layers First $n items are:\n", Hexify($buffer), "\n";
# } else {
# btwN 1,dvis "($tag) \$cur_pos \$start_pos \@layers **APPEARS EMPTY**";
# }
# seek($fh, $cur_pos, SEEK_SET) or die "re-seek to $cur_pos: $!";
# }
# }
my sub set_fh_encoding() { # returns true if set
my $enc = $opts->{input_encoding}; # must already be resolved!
my $bmode = ":raw:encoding($enc):crlf";
binmode $fh, $bmode or die "binmode '$bmode' : $!";
if ($debug) {
my @layers = PerlIO::get_layers($fh);
warn dvis 'set_fh_encoding: $fh @layers\n';
}
}
my sub open_input() {
oops if defined $fh;
if (defined $$ref2octets) {
open $fh, "<:raw", $ref2octets or confess "BUG:in-mem open:$!";
#_dump_fh("open_input TO PREVIOUSLY SLURPED");
} else {
my $path = $opts->{inpath_sans_sheet};
$fh = openhandle($path); # undef unless $path is a file handle
unless ($fh) {
open $fh, "<", $path or die "$path : $!";
}
binmode($fh);
#_dump_fh("AAA $path open_input raw");
}
if (! seek($fh, 0, SEEK_SET)) {
oops if defined $$ref2octets;
_binmode_slurp_and_log($fh, $ref2octets, $debug);
close $fh;
$fh = undef;
open $fh, "<:raw", $ref2octets or confess "BUG:in-mem open:$!";
#_dump_fh("BBB unseekable, slurped");
}
my $bomenc = File::BOM::get_encoding_from_filehandle($fh);
$start_pos = tell($fh);
if ($bomenc) {
btw dvis 'Input has BOM, $bomenc $start_pos' if $debug;
$opts->{input_encoding} = $bomenc;
binmode($fh); # unnecessary???
binmode($fh, ":raw:encoding($bomenc):crlf") or die "binmode: $!";
}
#_dump_fh("CCC final");
}
my sub determine_input_encoding() {
# If one encoding was specified by the user or implied by a BOM, use it;
# otherwise try multiple encodings specified by the user or defaulted
# until one seems to work.
$opts->{input_encoding} //= $default_input_encodings;
my @enclist = split m#,#, $opts->{input_encoding};
return
if @enclist == 1;
_slurp_ifnotslurped($fh, $ref2octets, $debug);
for my $enc (@enclist) {
eval { _decode_slurped_data($enc, $ref2octets, $start_pos) };
if ($@) {
btw "Input encoding '$enc' did not work...($@)\n" if $debug;
next;
}
btw "Input encoding '$enc' seems to work.\n" if $debug;
@enclist = ($enc);
last
}
confess "Could not detect encoding of $opts->{inpath_sans_sheet}\n"
if @enclist > 1;
$opts->{input_encoding} = $enclist[0];
} #determine_input_encoding
my sub readparse_csv(@) {
my %csvopts = (
@sane_CSV_read_options,
defined($opts->{quote_char}) ? (quote_char=>$opts->{quote_char}) : (),
defined($opts->{sep_char}) ? (sep_char=>$opts->{sep_char}) : (),
auto_diag => 2, # throw on error
@_
);
$csvopts{escape_char} = $csvopts{quote_char}; # must always be the same
my $csv = Text::CSV->new (\%csvopts)
or croak "Text::CSV->new: ", Text::CSV->error_diag(),
dvis('\n## %csvopts\n');
seek($fh, $start_pos, SEEK_SET) or die $!; # skip over possible BOM
my $rows;
while (my $F = $csv->getline( $fh )) {
push(@$rows, $F);
}
$rows
}
my sub determine_csv_q_sep($) {
my ($r2rows) = @_;
return
if defined($opts->{quote_char}) && defined($opts->{sep_char});
# Try combinations starting with the most-common '"' and ',' while
# parsing the file for unsafe unquoted values (throws on syntax error).
# The expectation is that the first try usually succeeds
Q:
for my $q (defined($opts->{quote_char})
? ($opts->{quote_char}) : ("\"", "'")) {
my $found_q;
SEP:
for my $sep (defined($opts->{sep_char})
? ($opts->{sep_char}) : (",","\t")) {
btw dvisq '--- TRYING $q $sep ---' if $debug;
# # Preliminary check for an illegal use of the quote char
# if (defined($chars)
# && $chars =~ /[^${q}${sep}\x{0D}\x{0A}]
( run in 1.228 second using v1.01-cache-2.11-cpan-71847e10f99 )