App-ElasticSearch-Utilities
view release on metacpan or search on metacpan
lib/App/ElasticSearch/Utilities/QueryString.pm view on Meta::CPAN
package App::ElasticSearch::Utilities::QueryString;
# ABSTRACT: CLI query string fixer
use v5.16;
use warnings;
our $VERSION = '8.9'; # VERSION
use App::ElasticSearch::Utilities qw(:config);
use App::ElasticSearch::Utilities::Query;
use CLI::Helpers qw(:output);
use Module::Pluggable::Object;
use Moo;
use Ref::Util qw(is_arrayref);
use Types::Standard qw(ArrayRef Enum HashRef);
use namespace::autoclean;
my %JOINING = map { $_ => 1 } qw( AND OR );
my %TRAILING = map { $_ => 1 } qw( AND OR NOT );
has 'context' => (
is => 'rw',
isa => Enum[qw(query filter)],
lazy => 1,
default => sub { 'query' },
);
has search_path => (
is => 'rw',
isa => ArrayRef,
default => sub {[]},
);
has default_join => (
is => 'rw',
isa => Enum[qw(AND OR)],
default => sub { 'AND' },
);
has plugins => (
is => 'ro',
isa => ArrayRef,
builder => '_build_plugins',
lazy => 1,
);
has fields_meta => (
is => 'rw',
isa => HashRef,
default => sub { {} },
);
sub expand_query_string {
my $self = shift;
my $query = App::ElasticSearch::Utilities::Query->new(
fields_meta => $self->fields_meta,
);
my @processed = ();
TOKEN: foreach my $token (@_) {
foreach my $p (@{ $self->plugins }) {
my $res = $p->handle_token($token);
if( defined $res ) {
push @processed, is_arrayref($res) ? @{$res} : $res;
next TOKEN;
}
}
push @processed, { query_string => $token };
}
debug({color=>"magenta"}, "Processed parts");
debug_var({color=>"magenta"},\@processed);
my $context = $self->context eq 'query' ? 'must' : 'filter';
my $invert=0;
my @dangling=();
my @qs=();
foreach my $part (@processed) {
if( exists $part->{dangles} ) {
push @dangling, $part->{query_string};
}
elsif( exists $part->{query_string} ) {
push @qs, @dangling, $part->{query_string};
@dangling=(),
}
elsif( exists $part->{condition} ) {
my $target = $invert ? 'must_not' : $context;
$query->add_bool( $target => $part->{condition} );
@dangling=();
}
elsif( exists $part->{nested} ) {
$query->nested($part->{nested}{query});
$query->nested_path($part->{nested}{path});
@dangling=();
}
# Carry over the Inversion for instance where we jump out of the QS
$invert = exists $part->{invert} && $part->{invert};
}
if(@qs) {
pop @qs while @qs && exists $TRAILING{$qs[-1]};
shift @qs while @qs && exists $JOINING{$qs[0]};
# Ensure there's a joining token, otherwise use our default
if( @qs > 1 ) {
my $prev_query = 0;
my @joined = ();
foreach my $part ( @qs ) {
if( $prev_query ) {
push @joined, $self->default_join() unless exists $JOINING{$part};
}
push @joined, $part;
# Here we include AND, NOT, OR
$prev_query = exists $TRAILING{$part} ? 0 : 1;
}
@qs = @joined;
}
}
$query->add_bool($context => { query_string => { query => join(' ', @qs) } }) if @qs;
return $query;
}
# Builder Routines for QS Objects
sub _build_plugins {
my $self = shift;
my $globals = es_globals('plugins');
my $finder = Module::Pluggable::Object->new(
search_path => ['App::ElasticSearch::Utilities::QueryString',@{ $self->search_path }],
except => [qw(
App::ElasticSearch::Utilities::QueryString::AutoEscape
App::ElasticSearch::Utilities::QueryString::Plugin
)],
instantiate => 'new',
);
my @plugins;
foreach my $p ( sort { $a->priority <=> $b->priority || $a->name cmp $b->name }
$finder->plugins(
fields_meta => $self->fields_meta,
options => defined $globals ? $globals : {},
)
) {
debug(sprintf "Loaded %s with priority:%d", $p->name, $p->priority);
push @plugins, $p;
}
return \@plugins;
}
# Return true
1;
__END__
=pod
=head1 NAME
App::ElasticSearch::Utilities::QueryString - CLI query string fixer
=head1 VERSION
version 8.9
=head1 SYNOPSIS
This class provides a pluggable architecture to expand query strings on the
command-line into complex Elasticsearch queries.
=head1 ATTRIBUTES
=head2 context
Defaults to 'query', but can also be set to 'filter' so the elements will be
added to the 'must' or 'filter' parameter.
=head2 search_path
An array reference of additional namespaces to search for loading the query string
processing plugins. Example:
$qs->search_path([qw(My::Company::QueryString)]);
This will search:
App::ElasticSearch::Utilities::QueryString::*
My::Company::QueryString::*
For query processing plugins.
=head2 default_join
When fixing up the query string, if two tokens are found next to eachother
missing a joining token, join using this token. Can be either C<AND> or C<OR>,
and defaults to C<AND>.
=head2 plugins
Array reference of ordered query string processing plugins, lazily assembled.
=head2 fields_meta
A hash reference with the field data from L<App::ElasticSearch::Utilities::es_index_fields>.
=head1 METHODS
=head2 expand_query_string(@tokens)
This function takes a list of tokens, often from the command line via @ARGV. Uses
a plugin infrastructure to allow customization.
Returns: L<App::ElasticSearch::Utilities::Query> object
=head1 TOKENS
The token expansion plugins can return undefined, which is basically a noop on the token.
The plugin can return a hash reference, which marks that token as handled and no other plugins
receive that token. The hash reference may contain:
=over 2
=item query_string
This is the rewritten bits that will be reassembled in to the final query string.
=item condition
This is usually a hash reference representing the condition going into the bool query. For instance:
{ terms => { field => [qw(alice bob charlie)] } }
Or
{ prefix => { user_agent => 'Go ' } }
These conditions will wind up in the B<must> or B<must_not> section of the B<bool> query depending on the
state of the the invert flag.
=item invert
This is used by the bareword "not" to track whether the token invoked a flip from the B<must> to the B<must_not>
state. After each token is processed, if it didn't set this flag, the flag is reset.
=item dangles
This is used for bare words like "not", "or", and "and" to denote that these terms cannot dangle from the
beginning or end of the query_string. This allows the final pass of the query_string builder to strip these
words to prevent syntax errors.
=back
=head1 Extended Syntax
The search string is pre-analyzed before being sent to ElasticSearch. The following plugins
work to manipulate the query string and provide richer, more complete syntax for CLI applications.
=head2 App::ElasticSearch::Utilities::QueryString::Barewords
The following barewords are transformed:
or => OR
and => AND
not => NOT
=head2 App::ElasticSearch::Utilities::QueryString::Text
Provides field prefixes to manipulate the text search capabilities.
lib/App/ElasticSearch/Utilities/QueryString.pm view on Meta::CPAN
=head3 Wildcard Query via '*'
Provide an '*' prefix to a query string parameter to promote that parameter to a C<wildcard> filter.
This uses the wild card match for text fields to making matching more intuitive.
E.g.:
*user_agent:"Mozilla*"
Is translated into:
{ wildcard => { user_agent => "Mozilla* } }
=head3 Regexp Query via '/'
Provide an '/' prefix to a query string parameter to promote that parameter to a C<regexp> filter.
If you want to use regexp matching for finding data, you can use:
/message:'\\bden(ial|ied|y)'
Is translated into:
{ regexp => { message => "\\bden(ial|ied|y)" } }
=head3 Fuzzy Matching via '~'
Provide an '~' prefix to a query string parameter to promote that parameter to a C<fuzzy> filter.
~message:deny
Is translated into:
{ fuzzy => { message => "deny" } }
=head3 Phrase Matching via '+'
Provide an '+' prefix to a query string parameter to promote that parameter to a C<match_phrase> filter.
+message:"login denied"
Is translated into:
{ match_phrase => { message => "login denied" } }
=head3 Automatic Match Queries for Text Fields
If the field meta data is provided and the field is a C<text> type, the query
will automatically be mapped to a C<match> query.
# message field is text
message:"foo"
Is translated into:
{ match => { message => "foo" } }
=head2 App::ElasticSearch::Utilities::QueryString::IP
If a field is an IP address uses CIDR Notation, it's expanded to a range query.
src_ip:10.0/8 => src_ip:[10.0.0.0 TO 10.255.255.255]
=head2 App::ElasticSearch::Utilities::QueryString::Ranges
This plugin translates some special comparison operators so you don't need to
remember them anymore.
Example:
price:<100
Will translate into a:
{ range: { price: { lt: 100 } } }
And:
price:>50,<100
Will translate to:
{ range: { price: { gt: 50, lt: 100 } } }
=head3 Supported Operators
B<gt> via E<gt>, B<gte> via E<gt>=, B<lt> via E<lt>, B<lte> via E<lt>=
=head2 App::ElasticSearch::Utilities::QueryString::Underscored
This plugin translates some special underscore surrounded tokens into
the Elasticsearch Query DSL.
Implemented:
=head3 _prefix_
Example query string:
_prefix_:useragent:'Go '
Translates into:
{ prefix => { useragent => 'Go ' } }
=head2 App::ElasticSearch::Utilities::QueryString::FileExpansion
If the match ends in .dat, .txt, .csv, or .json then we attempt to read a file with that name and OR the condition:
$ cat test.dat
50 1.2.3.4
40 1.2.3.5
30 1.2.3.6
20 1.2.3.7
Or
$ cat test.csv
50,1.2.3.4
40,1.2.3.5
30,1.2.3.6
20,1.2.3.7
Or
$ cat test.txt
1.2.3.4
1.2.3.5
1.2.3.6
1.2.3.7
Or
$ cat test.json
{ "ip": "1.2.3.4" }
{ "ip": "1.2.3.5" }
{ "ip": "1.2.3.6" }
{ "ip": "1.2.3.7" }
We can source that file:
src_ip:test.dat => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
src_ip:test.json[ip] => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
This make it simple to use the --data-file output options and build queries
based off previous queries. For .txt and .dat file, the delimiter for columns
in the file must be either a tab or a null. For files ending in
.csv, Text::CSV_XS is used to accurate parsing of the file format. Files
ending in .json are considered to be newline-delimited JSON.
You can also specify the column of the data file to use, the default being the last column or (-1). Columns are
B<zero-based> indexing. This means the first column is index 0, second is 1, .. The previous example can be rewritten
as:
src_ip:test.dat[1]
or:
src_ip:test.dat[-1]
For newline delimited JSON files, you need to specify the key path you want to extract from the file. If we have a
JSON source file with:
{ "first": { "second": { "third": [ "bob", "alice" ] } } }
{ "first": { "second": { "third": "ginger" } } }
{ "first": { "second": { "nope": "fred" } } }
We could search using:
actor:test.json[first.second.third]
Which would expand to:
{ "terms": { "actor": [ "alice", "bob", "ginger" ] } }
This option will iterate through the whole file and unique the elements of the list. They will then be transformed into
an appropriate L<terms query|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html>.
=head3 Wildcards
We can also have a group of wildcard or regexp in a file:
$ cat wildcards.dat
*@gmail.com
*@yahoo.com
To enable wildcard parsing, prefix the filename with a C<*>.
es-search.pl to_address:*wildcards.dat
Which expands the query to:
{
"bool": {
"minimum_should_match":1,
"should": [
{"wildcard":{"to_outbound":{"value":"*@gmail.com"}}},
{"wildcard":{"to_outbound":{"value":"*@yahoo.com"}}}
]
}
}
No attempt is made to verify or validate the wildcard patterns.
=head3 Regular Expressions
If you'd like to specify a file full of regexp, you can do that as well:
$ cat regexp.dat
.*google\.com$
.*yahoo\.com$
To enable regexp parsing, prefix the filename with a C<~>.
es-search.pl to_address:~regexp.dat
Which expands the query to:
{
"bool": {
"minimum_should_match":1,
"should": [
{"regexp":{"to_outbound":{"value":".*google\\.com$"}}},
{"regexp":{"to_outbound":{"value":".*yahoo\\.com$"}}}
]
}
}
No attempt is made to verify or validate the regexp expressions.
=head2 App::ElasticSearch::Utilities::QueryString::Nested
Implement the proposed nested query syntax early. Example:
nested_path:"field:match AND string"
=head1 AUTHOR
Brad Lhotsky <brad@divisionbyzero.net>
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2026 by Brad Lhotsky.
This is free software, licensed under:
The (three-clause) BSD License
=cut
( run in 0.478 second using v1.01-cache-2.11-cpan-5623c5533a1 )