App-CSVUtils
view release on metacpan or search on metacpan
lib/App/CSVUtils/csv_freqtable.pm view on Meta::CPAN
description => <<'_',
_
add_args => {
%App::CSVUtils::argspecopt_field_1,
ignore_case => {
summary => 'Ignore case',
schema => 'true*',
cmdline_aliases => {i=>{}},
},
key => {
summary => 'Generate computed field with this Perl code',
description => <<'_',
If specified, then will compute field using Perl code.
The code will receive the row (arrayref, or if -H is specified, hashref) as the
argument. It should return the computed field (str).
_
schema => $App::CSVUtils::sch_req_str_or_code,
cmdline_aliases => {k=>{}},
},
%App::CSVUtils::argspecopt_hash,
%App::CSVUtils::argspecopt_with_data_rows,
},
add_args_rels => {
'req_one&' => [ ['field', 'key'] ],
},
tags => ['category:summarizing', 'outputs-data-structure', 'accepts-code'],
examples => [
{
summary => 'Show the age distribution of people',
argv => ['people.csv', 'age'],
test => 0,
'x.doc.show_result' => 0,
},
{
summary => 'Show the frequency of wins by a user, ignore case differences in user',
argv => ['winner.csv', 'user', '-i'],
test => 0,
'x.doc.show_result' => 0,
},
{
summary => 'Show the frequency of events by period (YYYY-MM)',
argv => ['events.csv', '-H', '--key', 'sprintf("%04d-%02d", $_->{year}, $_->{month})'],
test => 0,
'x.doc.show_result' => 0,
},
],
on_input_header_row => sub {
my $r = shift;
# check arguments
my $field_idx;
if (defined $r->{util_args}{field}) {
$field_idx = $r->{input_fields_idx}{ $r->{util_args}{field} };
die [404, "Field '$r->{util_args}{field}' not found in CSV"]
unless defined $field_idx;
}
$r->{wants_input_row_as_hashref} = 1 if $r->{util_args}{hash};
# this is a key we add to the stash
$r->{freqtable} //= {};
$r->{field_idx} = $field_idx;
$r->{code} = undef;
$r->{has_added_field} = 0;
$r->{freq_field} = undef;
$r->{input_rows} = [];
},
on_input_data_row => sub {
my $r = shift;
# add freq field
if ($r->{util_args}{with_data_rows} && !$r->{has_added_field}++) {
my $i = 1;
while (1) {
my $field = "freq" . ($i>1 ? $i : "");
unless (defined $r->{input_fields_idx}{$field}) {
$r->{input_fields_idx}{$field} = @{ $r->{input_fields} };
push @{ $r->{input_fields} }, $field;
$r->{freq_field} = $field;
push @{ $r->{input_row} }, undef;
last;
}
$i++;
}
}
my $field_val;
if ($r->{util_args}{key}) {
unless ($r->{code}) {
$r->{code} = compile_eval_code($r->{util_args}{key}, 'key');
}
$field_val = eval_code($r->{code}, $r, $r->{wants_input_row_as_hashref} ? $r->{input_row_as_hashref} : $r->{input_row}) // '';
} else {
$field_val = $r->{input_row}[ $r->{field_idx} ];
}
if ($r->{util_args}{ignore_case}) {
$field_val = lc $field_val;
}
$r->{freqtable}{$field_val}++;
if ($r->{util_args}{with_data_rows}) {
# we first put the field val, later we will fill the freq
if ($r->{wants_input_row_as_hashref}) {
$r->{input_row}{ $r->{freq_field} } = $field_val;
} else {
$r->{input_row}[-1] = $field_val;
}
push @{ $r->{input_rows} }, $r->{input_row};
}
},
( run in 0.847 second using v1.01-cache-2.11-cpan-39bf76dae61 )