App-CSVUtils
view release on metacpan or search on metacpan
lib/App/CSVUtils.pm view on Meta::CPAN
defined($str) && length($str) or die [400, "Please specify code ($label)"];
$str = "package main; no strict; no warnings; sub { $str }";
log_trace "[csvutil] Compiling Perl code: $str";
my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval
die [400, "Can't compile code ($label) '$str': $@"] if $@;
$code;
}
sub eval_code {
no warnings 'once';
my ($code, $r, $value_for_topic, $return_topic) = @_;
local $_ = $value_for_topic;
local $main::r = $r;
local $main::row = $r->{input_row};
local $main::rownum = $r->{input_rownum};
local $main::data_rownum = $r->{input_data_rownum};
local $main::csv = $r->{input_parser};
local $main::fields_idx = $r->{input_fields_idx};
if ($return_topic) {
$code->($_);
$_;
} else {
$code->($_);
}
}
sub _get_field_idx {
my ($field, $field_idxs) = @_;
defined($field) && length($field) or die "Please specify at least a field\n";
my $idx = $field_idxs->{$field};
die "Unknown field '$field' (known fields include: ".
join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}}
keys %$field_idxs).")\n" unless defined $idx;
$idx;
}
sub _get_csv_row {
my ($csv, $row, $i, $outputs_header) = @_;
#use DD; print " "; dd $row;
return "" if $i == 1 && !$outputs_header;
my $status = $csv->combine(@$row)
or die "Error in line $i: ".$csv->error_input."\n";
$csv->string . "\n";
}
sub _instantiate_parser_default {
require Text::CSV_XS;
Text::CSV_XS->new({binary=>1});
}
sub _instantiate_parser {
require Text::CSV_XS;
my ($args, $prefix) = @_;
$prefix //= '';
my %tcsv_opts = (binary=>1);
if (defined $args->{"${prefix}sep_char"} ||
defined $args->{"${prefix}quote_char"} ||
defined $args->{"${prefix}escape_char"}) {
$tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"};
$tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"};
$tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"};
} elsif ($args->{"${prefix}tsv"}) {
$tcsv_opts{"sep_char"} = "\t";
$tcsv_opts{"quote_char"} = undef;
$tcsv_opts{"escape_char"} = undef;
}
$tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"};
$tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"};
Text::CSV_XS->new(\%tcsv_opts);
}
sub _instantiate_emitter {
my $args = shift;
_instantiate_parser($args, 'output_');
}
sub _complete_field_or_field_list {
# return list of known fields of a CSV
my $which = shift;
my %args = @_;
my $word = $args{word} // '';
my $cmdline = $args{cmdline};
my $r = $args{r};
# we are not called from cmdline, bail
return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef
# let's parse argv first
my $args;
{
# this is not activated yet
$r->{read_config} = 1;
my $res = $cmdline->parse_argv($r);
#return undef unless $res->[0] == 200;
$cmdline->_read_config($r) unless $r->{config};
$args = $res->[2];
}
# user hasn't specified -f, bail
return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename};
# user wants to read CSV from stdin, bail
return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-';
# can the file be opened?
my $csv_parser = _instantiate_parser(\%args, 'input_');
open my($fh), "<encoding(utf8)", $args->{input_filename} or do {
#warn "csvutils: Cannot open file '$args->{input_filename}': $!\n";
return [];
};
# can the header row be read?
my $row = $csv_parser->getline($fh) or return [];
if (defined $args->{input_header} && !$args->{input_header}) {
$row = [map {"field$_"} 1 .. @$row];
}
if ($which =~ /sort/) {
$row = [map {($_,"-$_","+$_","~$_")} @$row];
lib/App/CSVUtils.pm view on Meta::CPAN
push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields;
[100, "Continue", [\@selected_fields, \@selected_field_idxs_array]];
}
our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}];
our %argspecs_csv_input = (
input_header => {
summary => 'Specify whether input CSV has a header row',
'summary.alt.bool.not' => 'Specify that input CSV does not have a header row',
schema => 'bool*',
default => 1,
description => <<'_',
By default, the first row of the input CSV will be assumed to contain field
names (and the second row contains the first data row). When you declare that
input CSV does not have header row (`--no-input-header`), the first row of the
CSV is assumed to contain the first data row. Fields will be named `field1`,
`field2`, and so on.
_
cmdline_aliases => {
},
tags => ['category:input'],
},
input_skip_num_lines => {
summary => 'Number of lines to skip before header row',
schema => 'posint*',
description => <<'MARKDOWN',
This can be useful if you have a CSV files (usually some generated reports,
sometimes converted from spreadsheet) that have additional header lines or info
before the CSV header row.
See also the alternative option: `--input-skip-until-pattern`.
MARKDOWN
},
input_skip_until_pattern => {
summary => 'Skip rows until the first header row matches a regex pattern',
schema => 're_from_str*',
description => <<'MARKDOWN',
This is an alternative to the `--input-skip-num-lines` and can be useful if you
have a CSV files (usually some generated reports, sometimes converted from
spreadsheet) that have additional header lines or info before the CSV header
row.
With `--input-skip-num-lines`, you skip a fixed number of lines. With this
option, rows will be skipped until the first field matches the specified regex
pattern.
MARKDOWN
},
input_tsv => {
summary => "Inform that input file is in TSV (tab-separated) format instead of CSV",
schema => 'true*',
description => <<'_',
Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char`
options. If one of those options is specified, then `--input-tsv` will be
ignored.
_
tags => ['category:input'],
},
input_sep_char => {
summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
Defaults to `,` (comma). Overrides `--input-tsv` option.
_
tags => ['category:input'],
},
input_quote_char => {
summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
Defaults to `"` (double quote). Overrides `--input-tsv` option.
_
tags => ['category:input'],
},
input_escape_char => {
summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
Defaults to `\\` (backslash). Overrides `--input-tsv` option.
_
tags => ['category:input'],
},
);
our %argspecs_csv_output = (
output_header => {
summary => 'Whether output CSV should have a header row',
schema => 'bool*',
description => <<'_',
By default, a header row will be output *if* input CSV has header row. Under
`--output-header`, a header row will be output even if input CSV does not have
header row (value will be something like "col0,col1,..."). Under
`--no-output-header`, header row will *not* be printed even if input CSV has
header row. So this option can be used to unconditionally add or remove header
row.
_
tags => ['category:output'],
},
output_tsv => {
summary => "Inform that output file is TSV (tab-separated) format instead of CSV",
schema => 'bool*',
description => <<'_',
This is like `--input-tsv` option but for output instead of input.
Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char`
options. If one of those options is specified, then `--output-tsv` will be
ignored.
_
tags => ['category:output'],
},
output_sep_char => {
summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
This is like `--input-sep-char` option but for output instead of input.
Defaults to `,` (comma). Overrides `--output-tsv` option.
_
tags => ['category:output'],
},
output_quote_char => {
summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
This is like `--input-quote-char` option but for output instead of input.
Defaults to `"` (double quote). Overrides `--output-tsv` option.
_
tags => ['category:output'],
},
output_escape_char => {
summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS',
schema => ['str*', len=>1],
description => <<'_',
This is like `--input-escape-char` option but for output instead of input.
Defaults to `\\` (backslash). Overrides `--output-tsv` option.
_
tags => ['category:output'],
},
output_always_quote => {
summary => 'Whether to always quote values',
schema => 'bool*',
default => 0,
description => <<'_',
When set to false (the default), values are quoted only when necessary:
field1,field2,"field three contains comma (,)",field4
When set to true, then all values will be quoted:
"field1","field2","field three contains comma (,)","field4"
_
tags => ['category:output'],
},
output_quote_empty => {
summary => 'Whether to quote empty values',
schema => 'bool*',
default => 0,
description => <<'_',
When set to false (the default), empty values are not quoted:
field1,field2,,field4
When set to true, then empty values will be quoted:
field1,field2,"",field4
_
tags => ['category:output'],
},
);
our %argspecopt_input_filename = (
input_filename => {
summary => 'Input CSV file',
description => <<'_',
Use `-` to read from stdin.
Encoding of input file is assumed to be UTF-8.
_
schema => 'filename*',
default => '-',
'x.completion' => $xcomp_csvfiles,
tags => ['category:input'],
},
);
our %argspecopt_input_filenames = (
( run in 0.669 second using v1.01-cache-2.11-cpan-39bf76dae61 )