App-Fasops
view release on metacpan or search on metacpan
lib/App/Fasops/Command/mergecsv.pm view on Meta::CPAN
package App::Fasops::Command::mergecsv;
use strict;
use warnings;
use autodie;
use App::Fasops -command;
use App::Fasops::Common;
sub abstract {
return 'merge csv files based on @fields';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ 'fields|f=i@', 'fields as identifies, 0 as first column', { default => [0] }, ],
[ 'concat|c', 'do concat other than merge. Keep first ID fields', ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "fasops mergecsv [options] <infile> [more files]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* Accept one or more csv files
* infile == stdin means reading from STDIN
cat 1.csv 2.csv | egaz mergecsv -f 0 -f 1
egaz mergecsv -f 0 -f 1 1.csv 2.csv
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} < 1 ) {
my $message = "This command need one or more input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
next if lc $_ eq "stdin";
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
# make array splicing happier
$opt->{fields} = [ sort @{ $opt->{fields} } ];
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# read
#----------------------------#
my $index_of = {}; # index of ids in @lines
my @lines;
my ( $count_all, $index ) = ( 0, 0 );
for my $infile ( @{$args} ) {
#@type IO::Handle
my $in_fh;
if ( lc $infile eq "stdin" ) {
$in_fh = *STDIN{IO};
}
else {
$in_fh = IO::Zlib->new( $infile, "rb" );
}
while ( !$in_fh->eof ) {
my $line = $in_fh->getline;
chomp $line;
next unless $line;
$count_all++;
my $id = join( "_", ( split ",", $line )[ @{ $opt->{fields} } ] );
if ( exists $index_of->{$id} ) {
if ( $opt->{concat} ) {
my $ori_index = $index_of->{$id};
my $ori_line = $lines[$ori_index];
my @fs = split ",", $line;
for my $f_idx ( reverse @{ $opt->{fields} } ) {
splice @fs, $f_idx, 1;
}
$lines[$ori_index] = join ",", $ori_line, @fs;
}
}
else {
( run in 2.039 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )