App-ElasticSearch-Utilities
view release on metacpan or search on metacpan
lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm view on Meta::CPAN
return [{condition => $make{$matcher}->($term,$qs) }];
}
}
}
}
return;
}
sub _parse_csv {
my ($file,$col) = @_;
my $csv = Text::CSV_XS->new({binary=>1,empty_is_undef=>1});
open my $fh, "<:encoding(utf8)", $file or die "Unable to read $file: $!";
my %uniq = ();
while( my $row = $csv->getline($fh) ) {
my $val;
eval {
$val = $row->[$col];
};
next unless defined $val;
$uniq{$val} = 1;
}
return \%uniq;
}
sub _parse_txt {
my ($file,$col) = @_;
my %uniq=();
my @rows = grep { defined && length && !/^#/ } read_lines($file);
debug({color=>'magenta'}, @rows);
if(@rows) {
for(@rows) {
chomp;
# Split on tabs or nulls
my @cols = split /[\t\0]/;
my $value = $cols[$col];
if(defined $value) {
$uniq{$value} = 1;
}
}
}
return \%uniq;
}
sub _parse_json {
my ($file,$field) = @_;
die "For new line delimited JSON, please specify the key, ie <field>:$file\[key.path.i.want\]"
if $field eq "-1";
my %uniq = ();
my $line = 0;
my @path = split /\./, $field; # Supports key.subkey.subsubkey format
JSON_LINE: foreach my $json ( read_lines($file) ) {
$line++;
my $data;
eval {
$data = decode_json($json);
1;
} or do {
my $err = $@;
output({stderr=>1,color=>'yellow'}, sprintf "Invalid JSON in %s, line %d: %s",
$file,
$line,
$err,
);
verbose({stderr=>1,color=>'magenta',indent=>1}, $json);
next;
};
# Walk the path
foreach my $k (@path) {
next JSON_LINE unless exists $data->{$k};
$data = $data->{$k};
}
# At this point $data should contain our values
if( is_arrayref($data) ) {
$uniq{$_} = 1 for grep { !is_ref($_) } @{ $data };
}
elsif( !is_ref($data) ) {
$uniq{$data} = 1;
}
}
die "Expected newline-delimited JSON in $file, but it was empty or didn't contain '$field'"
unless keys %uniq;
return \%uniq;
}
1;
__END__
=pod
=head1 NAME
App::ElasticSearch::Utilities::QueryString::FileExpansion - Build a terms query from unique values in a column of a file
=head1 VERSION
version 8.8
=head1 SYNOPSIS
=head2 App::ElasticSearch::Utilities::QueryString::FileExpansion
If the match ends in .dat, .txt, .csv, or .json then we attempt to read a file with that name and OR the condition:
$ cat test.dat
50 1.2.3.4
40 1.2.3.5
30 1.2.3.6
20 1.2.3.7
Or
$ cat test.csv
50,1.2.3.4
40,1.2.3.5
30,1.2.3.6
20,1.2.3.7
Or
$ cat test.txt
1.2.3.4
( run in 1.103 second using v1.01-cache-2.11-cpan-39bf76dae61 )