Text-AutoCSV
view release on metacpan or search on metacpan
general as possible)
* Put function remove_accents in @EXPORT_OK
* When a field is unknown and Text::AutoCSV croaks miserably, print out
the list of existing fields, to bring immediate help to caller.
* utils/csvcopy.pl: Add --out_fields option
Add --join (also -j) option
* On UTF-8 inputs: no longer use systematic :via(File::BOM), instead, add
:via(File::BOM) only when BOM is detected. Detection (for UTF-8) is done
manually, using raw file access.
Reason: via(File::BOM) would produce warnings when first line of a UTF-8
encoded file (that has no BOM) would contain non-ascii characters.
1.0.7 2016-09-22 Sébastien Millet <milletseb@laposte.net>
* lib/Text/AutoCSV.pm: ask for DateTime::Format::Strptime to be >= 1.63
(credits: SREZIC@cpan.org)
* lib/Text/AutoCSV.pm: no longer record each input record twice (once as
an array, once as a hash) when keeping it
in-memory. Instead, keep each record only as an
array and convert it to a hash on-the-fly as
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: Text-AutoCSV
requires:
Carp: '0'
Class::Struct: '0'
DateTime: '0'
DateTime::Format::Strptime: '1.71'
Exporter: '0'
Fcntl: '0'
File::BOM: '0'
Getopt::Long: '0'
Hash::Util: '0'
List::MoreUtils: '0'
Memoize: '0'
Params::Validate: '0'
Text::CSV: '0'
Unicode::Normalize: '0'
strict: '0'
warnings: '0'
version: 1.2.0
Makefile.PL view on Meta::CPAN
],
"LICENSE" => "perl",
"NAME" => "Text::AutoCSV",
"PREREQ_PM" => {
"Carp" => 0,
"Class::Struct" => 0,
"DateTime" => 0,
"DateTime::Format::Strptime" => "1.71",
"Exporter" => 0,
"Fcntl" => 0,
"File::BOM" => 0,
"Getopt::Long" => 0,
"Hash::Util" => 0,
"List::MoreUtils" => 0,
"Memoize" => 0,
"Params::Validate" => 0,
"Text::CSV" => 0,
"Unicode::Normalize" => 0,
"strict" => 0,
"warnings" => 0
},
Makefile.PL view on Meta::CPAN
);
my %FallbackPrereqs = (
"Carp" => 0,
"Class::Struct" => 0,
"DateTime" => 0,
"DateTime::Format::Strptime" => "1.71",
"Exporter" => 0,
"Fcntl" => 0,
"File::BOM" => 0,
"Getopt::Long" => 0,
"Hash::Util" => 0,
"List::MoreUtils" => 0,
"Memoize" => 0,
"POSIX" => 0,
"Params::Validate" => 0,
"Text::CSV" => 0,
"Time::Local" => 0,
"Unicode::Normalize" => 0,
"strict" => 0,
lib/Text/AutoCSV.pm view on Meta::CPAN
my $PKG = "Text::AutoCSV";
require Exporter;
our @ISA = 'Exporter';
our @EXPORT_OK = qw(remove_accents);
use Carp;
use Params::Validate qw(validate validate_pos :types);
use List::MoreUtils qw(first_index indexes);
use Fcntl qw(SEEK_SET);
use File::BOM;
use Text::CSV;
use DateTime;
# DateTime::Format::Strptime 1.70 does not work properly with us.
# Actually all version as of 1.63 are fine, except 1.70.
use DateTime::Format::Strptime 1.71;
use Class::Struct;
use Unicode::Normalize;
# lock_keys is used to prevent accessing non existing keys
lib/Text/AutoCSV.pm view on Meta::CPAN
$self->_register_pass("check $e encoding");
my $utf8_bom = 0;
if ( _is_utf8($e) ) {
if ( open my $fh, '<:raw', $in_file ) {
my $bom;
read $fh, $bom, 3;
if ( length($bom) == 3 and $bom eq "\xef\xbb\xbf" ) {
if ( !defined($via) ) {
$m .= ":via(File::BOM)";
}
}
close $fh;
}
}
my $inh;
if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
$self->_print_error("unable to open file '$in_file': $!");
return ( $encodings[0], $m0 );
lib/Text/AutoCSV.pm view on Meta::CPAN
my $enc = (
defined( $self->{_inh_encoding} )
? $self->{_inh_encoding}
: $DEFAULT_OUT_ENCODING
);
# out_encoding option takes precedence
$enc = $self->{out_encoding} if defined( $self->{out_encoding} );
my $m = ":encoding($enc)";
if ( _is_utf8($enc) and $self->{out_utf8_bom} ) {
$m .= ':via(File::BOM)';
}
if ( $OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214 ) {
# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story)
$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i;
}
binmode $outh, $m;
print( $_debugh "Encoding string used for output: $m\n" ) if $_debug;
lib/Text/AutoCSV.pm view on Meta::CPAN
Value by default: input encoding
Example:
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
out_encoding => 'UTF-16');
=item out_utf8_bom
Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is
not UTF-8, this attribute is ignored.
B<NOTE>
UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice,
UTF8-encoded files rarely have a BOM.
Using this attribute is not recommended. It is provided for the sake of
completeness, and also to produce Unicode files Microsoft EXCEL will be happy to
read.
At first sight it would seem more logical to make EXCEL happy with something
like this:
out_encoding => 'UTF-16'
But... While EXCEL will identify UTF-16 and read it as such, it will not take
into account the BOM found at the beginning. In the end the first cell will have
2 useless characters prepended. The only solution the author knows to workaround
this issue if to use UTF-8 as output encoding, and enforce a BOM. That is, use:
..., out_encoding => 'UTF-8', out_utf8_bom => 1, ...
=item out_sep_char
Enforce the output CSV separator character.
Value by default: input separator
Example:
scripts/csvcopy.pl view on Meta::CPAN
[
'out_encoding=s', 'STR',
'out_encoding', '<same as input>',
'output encoding (default: same as input)'
],
[
'out_utf8_bom=i', 'BOOL',
'out_utf8_bom', 'no',
'add BOM on UTF-8 output (default: no)'
],
[
'out_sep_char=s', 'STR', 'out_sep_char',
'<same as input>',
'output CSV separator (default: same as input)'
],
[
'out_quote_char=s', 'STR',
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ","
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT03 - t/e2.csv: read CSV UTF8 + BOM chars that are latin1+latin2"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e2.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
sep_char => ",",
encoding => 'UTF-8',
via => '',
has_headers => 0,
fields_column_names => ['Z']
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
# BOM appears here as explicit via discards the use of
# :via(File::BOM)
[
{ 'Z' => "\x{feff}u" },
{ 'Z' => "\x{e9}" },
{ 'Z' => "N\x{11b}\x{10d}\x{ed}" }
],
"UT05 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e2.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT06 - t/e2.csv: read CSV UTF8 chars that are latin1+latin2, explicit encoding option"
);
$csv = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
encoding => 'UTF-8',
via => ':via(File::BOM)'
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
# This time BOM is specified in the encoding parameter => no mess
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT07 - t/e3.csv: read CSV UTF8 + BOM chars that are latin1+latin2, explicit encoding with opts"
);
# R/W
my $tmpf = &get_non_existent_temp_file_name();
my $csvtmp = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf
$csv = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
fields_hr => { 'Y' => 'U' }
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT08 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2"
);
$csvtmp = Text::AutoCSV->new(
in_file => "t/${ww}e3.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
out_encoding => 'latin2'
)->write();
is( $csvtmp->get_in_encoding(),
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => 'latin2',
fields_hr => { 'Y' => 'U' }
);
$all = [ $csv->get_hr_all() ];
is_deeply(
$all,
[ { 'Y' => "\x{e9}" }, { 'Y' => "N\x{11b}\x{10d}\x{ed}" } ],
"UT10 - t/e3.csv: r/w: CSV UTF8 + BOM chars that are latin1+latin2, output latin2"
);
unlink $tmpf;
}
# * ************ *
# * latin* files *
# * ************ *
{
);
$c3 = Text::AutoCSV->new(
in_file => "t/${ww}e7.csv",
croak_if_error => 0,
sep_char => ",",
out_file => $tmpf,
encoding => "UTF-16LE, UTF-8, latin1"
);
is( $c3->get_in_encoding(),
'UTF-16LE', "EN19 - t/e7.csv: detect UTF-16LE with opts (BOM)" );
$all = [ $c3->get_hr_all() ];
is_deeply(
$all,
[ { 'U' => "\x{e9}" }, { 'U' => "N\x{11b}\x{10d}\x{ed}" } ],
"EN20 - t/e6.csv: detect UTF-16LE with opts (2) (BOM)"
);
$c1->write();
my $c1r = Text::AutoCSV->new(
in_file => $tmpf,
croak_if_error => 0,
sep_char => ",",
encoding => "UTF-8, latin1"
);
is( $c1r->get_in_encoding(),
t/13-accents.t view on Meta::CPAN
$csv = Text::AutoCSV->new( in_file => "t/${ww}acct-ub.csv" );
is( $csv->get_in_encoding(), 'UTF-8', "AC04: UTF-8: check encoding detection" );
$f = [ $csv->get_fields_names() ];
is_deeply(
$f,
[
'U', 'E', 'NECI', 'ELEMENTAIRE',
'ETREOUNEPASETRE', 'CHATEAU', 'HOPITAL', 'AMBIGUE',
'METRE'
],
"AC05: UTF-8 (BOM): check field names when input has accents"
);
$csv = Text::AutoCSV->new( in_file => "t/${ww}acct2.csv" );
is( $csv->get_in_encoding(),
'UTF-8', "AC06: UTF-8: check encoding detection (2)" );
$f = [ $csv->get_fields_names() ];
is_deeply(
$f,
[ 'A', 'CÅURETRE', 'N' ],
"AC07: UTF-8 (BOM): field names with special character (non us-ascci) not an accent"
);
$csv = Text::AutoCSV->new( in_file => "t/${ww}accx.csv" );
is( $csv->get_in_encoding(),
'UTF-8', "AC08 - t/accx.csv: check input encoding detection" );
# latin1 char
my $v = $csv->vlookup( 'A', 'etre', 'C' );
is( $v, '10', "AC09 - t/accx.csv: vlookup with accent" );
$v = $csv->vlookup( 'A', 'etre', 'C', { ignore_accents => 0 } );
tkcsvcopy.pl view on Meta::CPAN
tkcsvcopy.pl [OPTIONS...]
Perl/Tk GUI to detect settings of, and copy, CSV files.
-h, --help Display this help screen.
--db DB Enforce location of DB file to save options.
By default, store in ~/$DEFAULT_DB_FILE.
--nodb Don't use a db for options (options are not persistent).
--read F Upon start, display input information of file F.
--excel Tune output settings to make MS EXCEL happy.
Have , as separator, " as quote, " as escape,
UTF-8 encoding with BOM, and ymd/24h for datetime.
--excelfr Same as --excel, but ; as separator.
EOF
return;
}
if ( grep { /^--?h(elp)?$/i } @ARGV ) {
usage();
exit 0;
}
tkcsvcopy.pl view on Meta::CPAN
],
%chkopts
)->pack( -side => 'left', %stdpad );
my $frtopr_bom =
$frame_top_R->Frame(%fropts)->pack( -side => 'top', -fill => 'x' );
my $ctrl_out_utf8_bom = $frtopr_bom->Checkbutton(
-variable => \$db{out_utf8_bom},
%chkopts
)->pack( -side => 'right' );
$frtopr_bom->Label( -text => 'If UTF-8, write UTF-8 BOM ', %labopts )
->pack( -side => 'right', %stdpad );
my $frtopr_iaq =
$frame_top_R->Frame(%fropts)->pack( -side => 'top', -fill => 'x' );
my $ctrl_out_always_quote = $frtopr_iaq->BrowseEntry(
-variable => \$db{out_always_quote},
-width => 6,
-background => 'white',
%entopts
)->pack( -side => 'right', %stdpad );
( run in 0.562 second using v1.01-cache-2.11-cpan-131fc08a04b )