App-CSVUtils
view release on metacpan or search on metacpan
lib/App/CSVUtils/csv_concat.pm view on Meta::CPAN
field. See the utility's example for an illustration.
MARKDOWN
},
},
tags => ['category:combining', 'join', 'merge'],
reads_multiple_csv => 1,
before_open_input_files => sub {
my $r = shift;
# we add the following keys to the stash
$r->{all_input_fields} = [];
$r->{all_input_fh} = [];
},
on_input_header_row => sub {
my $r = shift;
# after we read the header row of each input file, we record the fields
# as well as the filehandle, so we can resume reading the data rows
# later. before printing all the rows, we collect all the fields from
# all files first.
push @{ $r->{all_input_fields} }, $r->{input_fields};
push @{ $r->{all_input_fh} }, $r->{input_fh};
$r->{wants_skip_file}++;
},
after_close_input_files => sub {
my $r = shift;
# as described in the on_input_header_row hook, we have saved the input
# fields as well as the filehandles. so even though we are called in the
# after_close_input_files hook, where the main routine supposedly has
# closed all input files, we actually have saved the filehandles and can
# resume reading from them.
# collect all output fields
$r->{output_fields} = [];
$r->{output_fields_idx} = {};
for my $i (0 .. $#{ $r->{all_input_fields} }) {
my $input_fields = $r->{all_input_fields}[$i];
for my $j (0 .. $#{ $input_fields }) {
my $field = $input_fields->[$j];
unless (grep {$field eq $_} @{ $r->{output_fields} }) {
push @{ $r->{output_fields} }, $field;
$r->{output_fields_idx}{$field} = $#{ $r->{output_fields} };
}
}
}
my $csv = $r->{input_parser};
if ($r->{util_args}{overlay}) {
my $overwrite_fields = $r->{util_args}{overwrite_fields};
my $output_fields_idx = $r->{output_fields_idx};
while (1) {
my $has_not_eof;
my $combined_row = [("") x @{ $r->{output_fields} }];
my %seen_fields;
for my $i (0 .. $#{ $r->{all_input_fh} }) {
my $fh = $r->{all_input_fh}[$i];
next if eof($fh);
$has_not_eof++;
my $row = $csv->getline($fh);
my $input_fields = $r->{all_input_fields}[$i];
for my $j (0 .. $#{ $input_fields }) {
my $field = $input_fields->[$j];
if (!($seen_fields{$field}++) || $overwrite_fields) {
$combined_row->[ $output_fields_idx->{$field} ] = $row->[$j];
}
}
} # for all_input_fh
last unless $has_not_eof;
$r->{code_print_row}->($combined_row);
} # while 1
} else {
# print all the data rows
for my $i (0 .. $#{ $r->{all_input_fh} }) {
log_trace "[%d/%d] Adding rows from file #%d ...",
$i+1, scalar(@{$r->{all_input_fh}}), $i+1;
my $fh = $r->{all_input_fh}[$i];
my $input_fields = $r->{all_input_fields}[$i];
while (my $row = $csv->getline($fh)) {
my $combined_row = [("") x @{ $r->{output_fields} }];
for my $j (0 .. $#{ $input_fields }) {
my $field = $input_fields->[$j];
$combined_row->[ $r->{output_fields_idx}{$field} ] = $row->[$j];
}
$r->{code_print_row}->($combined_row);
}
} # for all input fh
}
},
);
1;
# ABSTRACT: Concatenate several CSV files together, collecting all the fields
__END__
=pod
=encoding UTF-8
=head1 NAME
App::CSVUtils::csv_concat - Concatenate several CSV files together, collecting all the fields
=head1 VERSION
This document describes version 1.036 of App::CSVUtils::csv_concat (from Perl distribution App-CSVUtils), released on 2025-02-04.
=head1 FUNCTIONS
=head2 csv_concat
Usage:
csv_concat(%args) -> [$status_code, $reason, $payload, \%result_meta]
Concatenate several CSV files together, collecting all the fields.
Example, concatenating this CSV:
col1,col2
1,2
3,4
and:
( run in 0.605 second using v1.01-cache-2.11-cpan-df04353d9ac )