App-ElasticSearch-Utilities

 view release on metacpan or  search on metacpan

lib/App/ElasticSearch/Utilities/QueryString/FileExpansion.pm  view on Meta::CPAN

package App::ElasticSearch::Utilities::QueryString::FileExpansion;
# ABSTRACT: Build a terms query from unique values in a column of a file

use v5.16;
use warnings;

our $VERSION = '8.8'; # VERSION

use CLI::Helpers qw(:output);
use File::Slurp::Tiny qw(read_lines);
use JSON::MaybeXS;
use Ref::Util qw(is_ref is_arrayref is_hashref);
use Text::CSV_XS;
use namespace::autoclean;

use Moo;
with 'App::ElasticSearch::Utilities::QueryString::Plugin';

sub _build_priority { 10; }

my %parsers = (
    txt  => \&_parse_txt,
    dat  => \&_parse_txt,
    csv  => \&_parse_csv,
    json => \&_parse_json,
);


sub handle_token {
    my($self,$token) = @_;

    my $makeMatcher = sub {
        my ($matcher,$field,$patterns)  = @_;
        my @tests;
        foreach my $pattern (@{ $patterns }) {
            push @tests, { $matcher => { $field => { value => $pattern } } };
        }
        return {
            bool => {
                should => \@tests,
                minimum_should_match => 1,
            }
        }
    };
    my %make = (
        terms => sub {
            my ($field, $uniq) = @_;
            return { terms => { $field => $uniq } };
        },
        regexp   => sub { $makeMatcher->(regexp   => @_) },
        wildcard => sub { $makeMatcher->(wildcard => @_) },
    );
    if( my ($term,$match) = split /\:/, $token, 2 ) {
        if( defined $match && $match =~ /(.*\.(\w{3,4}))(?:\[([^\]]+)\])?$/) {
            my($file,$type,$col) = ($1,$2,$3);
            # Support Wildcards
            my $matcher = $file =~ s/^\~// ? 'regexp'
                        : $file =~ s/^\*// ? 'wildcard'
                        : 'terms';
            $col //= -1;
            $type = lc $type;
            verbose({level=>2,color=>'magenta'}, sprintf "# %s attempt of %s type, %s[%s] %s",
                $self->name, $type, $file, $col, -f $file ? 'exists' : 'does not exist'
            );
            if( exists $parsers{$type} && -f $file ) {
                my $uniq = $parsers{$type}->($file,$col);
                if (defined $uniq && is_hashref($uniq) && scalar(keys %$uniq)) {
                    verbose({color=>'cyan'},
                        sprintf "# FILE:%s[%s] contained %d unique elements.",
                        $file,
                        $col,
                        scalar(keys %$uniq),



( run in 0.774 second using v1.01-cache-2.11-cpan-39bf76dae61 )