App-ElasticSearch-Utilities

 view release on metacpan or  search on metacpan

lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm  view on Meta::CPAN

                    return [{condition => $make{$matcher}->($term,$qs) }];
                }
            }
        }
    }
    return;
}

sub _parse_csv {
    my ($file,$col) = @_;
    my $csv = Text::CSV_XS->new({binary=>1,empty_is_undef=>1});
    open my $fh, "<:encoding(utf8)", $file or die "Unable to read $file: $!";
    my %uniq = ();
    while( my $row = $csv->getline($fh) ) {
        my $val;
        eval {
            $val = $row->[$col];
        };
        next unless defined $val;
        $uniq{$val} = 1;
    }
    return \%uniq;
}

sub _parse_txt {
    my ($file,$col) = @_;
    my %uniq=();
    my @rows = grep { defined && length && !/^#/ } read_lines($file);
    debug({color=>'magenta'}, @rows);
    if(@rows) {
        for(@rows) {
            chomp;
            # Split on tabs or nulls
            my @cols = split /[\t\0]/;
            my $value = $cols[$col];
            if(defined $value) {
                $uniq{$value} = 1;
            }
        }
    }
    return \%uniq;
}

sub _parse_json {
    my ($file,$field) = @_;

    die "For new line delimited JSON, please specify the key, ie <field>:$file\[key.path.i.want\]"
        if $field eq "-1";

    my %uniq = ();
    my $line = 0;
    my @path = split /\./, $field;      # Supports key.subkey.subsubkey format
    JSON_LINE: foreach my $json ( read_lines($file) ) {
        $line++;
        my $data;
        eval {
            $data = decode_json($json);
            1;
        } or do {
            my $err = $@;
            output({stderr=>1,color=>'yellow'}, sprintf "Invalid JSON in %s, line %d: %s",
                $file,
                $line,
                $err,
            );
            verbose({stderr=>1,color=>'magenta',indent=>1}, $json);
            next;
        };
        # Walk the path
        foreach my $k (@path) {
            next JSON_LINE unless exists $data->{$k};
            $data = $data->{$k};
        }
        # At this point $data should contain our values
        if( is_arrayref($data) ) {
            $uniq{$_} = 1 for grep { !is_ref($_) } @{ $data };
        }
        elsif( !is_ref($data) ) {
            $uniq{$data} = 1;
        }
    }

    die "Expected newline-delimited JSON in $file, but it was empty or didn't contain '$field'"
        unless keys %uniq;

    return \%uniq;
}

1;

__END__

=pod

=head1 NAME

App::ElasticSearch::Utilities::QueryString::FileExpansion - Build a terms query from unique values in a column of a file

=head1 VERSION

version 8.8

=head1 SYNOPSIS

=head2 App::ElasticSearch::Utilities::QueryString::FileExpansion

If the match ends in .dat, .txt, .csv, or .json then we attempt to read a file with that name and OR the condition:

    $ cat test.dat
    50  1.2.3.4
    40  1.2.3.5
    30  1.2.3.6
    20  1.2.3.7

Or

    $ cat test.csv
    50,1.2.3.4
    40,1.2.3.5
    30,1.2.3.6
    20,1.2.3.7

Or

    $ cat test.txt
    1.2.3.4



( run in 1.103 second using v1.01-cache-2.11-cpan-39bf76dae61 )