App-CSVUtils

 view release on metacpan or  search on metacpan

lib/App/CSVUtils/csv_setop.pm  view on Meta::CPAN

        $r->{all_input_data_rows} [ $r->{input_filenum}-1 ] = [];
    },

    on_input_data_row => sub {
        my $r = shift;

        push @{ $r->{all_input_data_rows}[ $r->{input_filenum}-1 ] },
            $r->{input_row};
    },

    after_close_input_files => sub {
        require Tie::IxHash;

        my $r = shift;

        my $op = $r->{util_args}{op};
        my $ci = $r->{util_args}{ignore_case};
        my $num_files = @{ $r->{util_args}{input_filenames} };

        my @compare_fields; # elem = [fieldname-for-file1, fieldname-for-file2, ...]
        if (defined $r->{util_args}{compare_fields}) {
            my @ff = ref($r->{util_args}{compare_fields}) eq 'ARRAY' ?
                @{$r->{util_args}{compare_fields}} : split(/,/, $r->{util_args}{compare_fields});
            for my $field_idx (0..$#ff) {
                my @ff2 = split /:/, $ff[$field_idx];
                for (@ff2+1 .. $num_files) {
                    push @ff2, $ff2[0];
                }
                $compare_fields[$field_idx] = \@ff2;
            }
            # XXX check that specified fields exist
        } else {
            for my $field_idx (0..$#{ $r->{all_input_fields}[0] }) {
                $compare_fields[$field_idx] = [
                    map { $r->{all_input_fields}[0][$field_idx] } 0..$num_files-1];
            }
        }

        my @result_fields; # elem = fieldname, ...
        if (defined $r->{util_args}{result_fields}) {
            @result_fields = ref($r->{util_args}{result_fields}) eq 'ARRAY' ?
                @{$r->{util_args}{result_fields}} : split(/,/, $r->{util_args}{result_fields});
            # XXX check that specified fields exist
        } else {
            @result_fields = @{ $r->{all_input_fields}[0] };
        }
        $r->{output_fields} = \@result_fields;

        tie my(%res), 'Tie::IxHash';

        my $code_get_compare_key = sub {
            my ($file_idx, $row_idx) = @_;
            my $row   = $r->{all_input_data_rows}[$file_idx][$row_idx];
            my $key = join "|", map {
                my $field = $compare_fields[$_][$file_idx];
                my $field_idx = $r->{all_input_fields_idx}[$file_idx]{$field};
                my $val = defined $field_idx ? $row->[$field_idx] : "";
                $val = uc $val if $ci;
                $val;
            } 0..$#compare_fields;
            #say "D:compare_key($file_idx, $row_idx)=<$key>";
            $key;
        };

        my $code_print_result_row = sub {
            my ($file_idx, $row) = @_;
            my @res_row = map {
                my $field = $result_fields[$_];
                my $field_idx = $r->{all_input_fields_idx}[$file_idx]{$field};
                defined $field_idx ? $row->[$field_idx] : "";
            } 0..$#result_fields;
            $r->{code_print_row}->(\@res_row);
        };

        if ($op eq 'intersect') {
            for my $file_idx (0..$num_files-1) {
                if ($file_idx == 0) {
                    for my $row_idx (0..$#{ $r->{all_input_data_rows}[$file_idx] }) {
                        my $key = $code_get_compare_key->($file_idx, $row_idx);
                        $res{$key} //= [1, $row_idx]; # [num_of_occurrence, row_idx]
                    }
                } else {
                    for my $row_idx (0..$#{ $r->{all_input_data_rows}[$file_idx] }) {
                        my $key = $code_get_compare_key->($file_idx, $row_idx);
                        if ($res{$key} && $res{$key}[0] == $file_idx) {
                            $res{$key}[0]++;
                        }
                    }
                }

                # print result
                if ($file_idx == $num_files-1) {
                    for my $key (keys %res) {
                        $code_print_result_row->(
                            0, $r->{all_input_data_rows}[0][$res{$key}[1]])
                            if $res{$key}[0] == $num_files;
                    }
                }
            } # for file_idx

        } elsif ($op eq 'union') {

            for my $file_idx (0..$num_files-1) {
                for my $row_idx (0..$#{ $r->{all_input_data_rows}[$file_idx] }) {
                    my $key = $code_get_compare_key->($file_idx, $row_idx);
                    next if $res{$key}++;
                    my $row = $r->{all_input_data_rows}[$file_idx][$row_idx];
                    $code_print_result_row->($file_idx, $row);
                }
            } # for file_idx

        } elsif ($op eq 'diff') {

            for my $file_idx (0..$num_files-1) {
                if ($file_idx == 0) {
                    for my $row_idx (0..$#{ $r->{all_input_data_rows}[$file_idx] }) {
                        my $key = $code_get_compare_key->($file_idx, $row_idx);
                        $res{$key} //= [$file_idx, $row_idx];
                    }
                } else {
                    for my $row_idx (0..$#{ $r->{all_input_data_rows}[$file_idx] }) {



( run in 0.328 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )