App-CSVUtils

 view release on metacpan or  search on metacpan

lib/App/CSVUtils/csv_concat.pm  view on Meta::CPAN

field. See the utility's example for an illustration.

MARKDOWN
        },
    },
    tags => ['category:combining', 'join', 'merge'],

    reads_multiple_csv => 1,

    before_open_input_files => sub {
        my $r = shift;

        # we add the following keys to the stash
        $r->{all_input_fields} = [];
        $r->{all_input_fh} = [];
    },

    on_input_header_row => sub {
        my $r = shift;

        # after we read the header row of each input file, we record the fields
        # as well as the filehandle, so we can resume reading the data rows
        # later. before printing all the rows, we collect all the fields from
        # all files first.

        push @{ $r->{all_input_fields} }, $r->{input_fields};
        push @{ $r->{all_input_fh} }, $r->{input_fh};
        $r->{wants_skip_file}++;
    },

    after_close_input_files => sub {
        my $r = shift;

        # as described in the on_input_header_row hook, we have saved the input
        # fields as well as the filehandles. so even though we are called in the
        # after_close_input_files hook, where the main routine supposedly has
        # closed all input files, we actually have saved the filehandles and can
        # resume reading from them.

        # collect all output fields
        $r->{output_fields} = [];
        $r->{output_fields_idx} = {};
        for my $i (0 .. $#{ $r->{all_input_fields} }) {
            my $input_fields = $r->{all_input_fields}[$i];
            for my $j (0 .. $#{ $input_fields }) {
                my $field = $input_fields->[$j];
                unless (grep {$field eq $_} @{ $r->{output_fields} }) {
                    push @{ $r->{output_fields} }, $field;
                    $r->{output_fields_idx}{$field} = $#{ $r->{output_fields} };
                }
            }
        }

        my $csv = $r->{input_parser};

        if ($r->{util_args}{overlay}) {

            my $overwrite_fields = $r->{util_args}{overwrite_fields};
            my $output_fields_idx = $r->{output_fields_idx};
            while (1) {
                my $has_not_eof;
                my $combined_row = [("") x @{ $r->{output_fields} }];
                my %seen_fields;
                for my $i (0 .. $#{ $r->{all_input_fh} }) {
                    my $fh = $r->{all_input_fh}[$i];

                    next if eof($fh);
                    $has_not_eof++;
                    my $row = $csv->getline($fh);
                    my $input_fields = $r->{all_input_fields}[$i];
                    for my $j (0 .. $#{ $input_fields }) {
                        my $field = $input_fields->[$j];
                        if (!($seen_fields{$field}++) || $overwrite_fields) {
                            $combined_row->[ $output_fields_idx->{$field} ] = $row->[$j];
                        }
                    }
                } # for all_input_fh
                last unless $has_not_eof;
                $r->{code_print_row}->($combined_row);
            } # while 1

        } else {

            # print all the data rows
            for my $i (0 .. $#{ $r->{all_input_fh} }) {
                log_trace "[%d/%d] Adding rows from file #%d ...",
                    $i+1, scalar(@{$r->{all_input_fh}}), $i+1;
                my $fh = $r->{all_input_fh}[$i];
                my $input_fields = $r->{all_input_fields}[$i];
                while (my $row = $csv->getline($fh)) {
                    my $combined_row = [("") x @{ $r->{output_fields} }];
                    for my $j (0 .. $#{ $input_fields }) {
                        my $field = $input_fields->[$j];
                        $combined_row->[ $r->{output_fields_idx}{$field} ] = $row->[$j];
                    }
                    $r->{code_print_row}->($combined_row);
                }
            } # for all input fh

        }
    },
);

1;
# ABSTRACT: Concatenate several CSV files together, collecting all the fields

__END__

=pod

=encoding UTF-8

=head1 NAME

App::CSVUtils::csv_concat - Concatenate several CSV files together, collecting all the fields

=head1 VERSION

This document describes version 1.036 of App::CSVUtils::csv_concat (from Perl distribution App-CSVUtils), released on 2025-02-04.

=head1 FUNCTIONS


=head2 csv_concat

Usage:

 csv_concat(%args) -> [$status_code, $reason, $payload, \%result_meta]

Concatenate several CSV files together, collecting all the fields.

Example, concatenating this CSV:

 col1,col2
 1,2
 3,4

and:



( run in 0.605 second using v1.01-cache-2.11-cpan-df04353d9ac )