App-ElasticSearch-Utilities
view release on metacpan or search on metacpan
scripts/es-copy-index.pl view on Meta::CPAN
use strict;
use warnings;
use App::ElasticSearch::Utilities qw(:default :index);
use App::ElasticSearch::Utilities::Query;
use App::ElasticSearch::Utilities::QueryString;
use CLI::Helpers qw(:all);
use File::Basename;
use File::Slurp::Tiny qw(read_lines);
use Getopt::Long qw(:config posix_default no_ignore_case no_ignore_case_always);
use Hash::Merge::Simple qw(clone_merge);
use JSON::MaybeXS;
use Pod::Usage;
use Ref::Util qw(is_hashref);
use Time::HiRes qw(time);
#------------------------------------------------------------------------#
# Argument Parsing
my %OPT;
GetOptions(\%OPT, qw(
from=s
to=s
source=s
destination=s
append|A
block=i
mapping=s
settings=s
help|h
manual|m
));
#------------------------------------------------------------------------#
# Documentation
pod2usage(1) if $OPT{help};
pod2usage(-exitstatus => 0, -verbose => 2) if $OPT{manual};
debug_var(\%OPT);
#------------------------------------------------------------------------#
# Copy To/From
my %INDEX = (
from => $OPT{source},
to => exists $OPT{destination} ? $OPT{destination} : $OPT{source},
block => exists $OPT{block} ? $OPT{block} : 1000,
);
my %HOST = (
from => $OPT{from},
to => exists $OPT{to} ? $OPT{to} : $OPT{from},
);
if( $HOST{to} eq $HOST{from} && $INDEX{to} eq $INDEX{from} ) {
output({color=>'red',IMPORTANT=>1},
"FATAL: Cannot copy from the same host to the same index name!"
);
exit 1;
}
#------------------------------------------------------------------------#
# Build the Query
my $JSON = JSON->new->pretty->canonical;
my $qs = App::ElasticSearch::Utilities::QueryString->new();
my $q = @ARGV ? $qs->expand_query_string(@ARGV)
: App::ElasticSearch::Utilities::Query->new(must => {match_all=>{}});
$q->set_scan_scroll('1m');
$q->set_size( $INDEX{block} );
# Connect to ElasticSearch
my %ES = ();
foreach my $dir (qw(from to)) {
$ES{$dir} = es_connect( $HOST{$dir} );
}
die "Invalid index: $INDEX{from}" unless $ES{from}->exists( index => $INDEX{from} );
my $TO_EXISTS = $ES{to}->exists( index => $INDEX{to} );
my $RECORDS = 0;
my $TOTAL=0;
my $LAST = time;
my ($status, $res);
# Mappings/Settings for Non-existant index.
unless( exists $OPT{append} ) {
die "Index $INDEX{to} already exists in $HOST{to}" if $TO_EXISTS;
$res = es_request($ES{from}, '_settings', {index => $INDEX{from}} );
debug_var($res);
my $from_settings = $res->{$INDEX{from}}{settings};
my @settings = ({
index => {
number_of_shards => $from_settings->{index}{number_of_shards},
number_of_replicas => $from_settings->{index}{number_of_replicas},
}
});
if( exists $OPT{settings} && -f $OPT{settings} ) {
my $content = join '', read_lines($OPT{settings});
eval {
push @settings, $JSON->decode($content);
1;
} or do {
debug($content);
die "Parsing JSON from $OPT{settings} failed: $@";
};
}
my $to_settings = clone_merge(@settings);
# Determine if we get mappings from a file or from the index.
my $mappings;
if( exists $OPT{mapping} && -f $OPT{mapping} ) {
my $content = join '', read_lines($OPT{mapping});
eval {
$mappings = $JSON->decode($content);
1;
} or do {
debug($content);
die "Parsing JSON from $OPT{mapping} failed: $@";
};
}
else {
$mappings = $res->{$INDEX{from}}{mappings};
}
$res = es_request($ES{to}, '/',
scripts/es-copy-index.pl view on Meta::CPAN
=head3 Wildcard Query via '*'
Provide an '*' prefix to a query string parameter to promote that parameter to a C<wildcard> filter.
This uses the wild card match for text fields to making matching more intuitive.
E.g.:
*user_agent:"Mozilla*"
Is translated into:
{ wildcard => { user_agent => "Mozilla* } }
=head3 Regexp Query via '/'
Provide an '/' prefix to a query string parameter to promote that parameter to a C<regexp> filter.
If you want to use regexp matching for finding data, you can use:
/message:'\\bden(ial|ied|y)'
Is translated into:
{ regexp => { message => "\\bden(ial|ied|y)" } }
=head3 Fuzzy Matching via '~'
Provide an '~' prefix to a query string parameter to promote that parameter to a C<fuzzy> filter.
~message:deny
Is translated into:
{ fuzzy => { message => "deny" } }
=head3 Phrase Matching via '+'
Provide an '+' prefix to a query string parameter to promote that parameter to a C<match_phrase> filter.
+message:"login denied"
Is translated into:
{ match_phrase => { message => "login denied" } }
=head3 Automatic Match Queries for Text Fields
If the field meta data is provided and the field is a C<text> type, the query
will automatically be mapped to a C<match> query.
# message field is text
message:"foo"
Is translated into:
{ match => { message => "foo" } }
=head2 App::ElasticSearch::Utilities::QueryString::IP
If a field is an IP address uses CIDR Notation, it's expanded to a range query.
src_ip:10.0/8 => src_ip:[10.0.0.0 TO 10.255.255.255]
=head2 App::ElasticSearch::Utilities::QueryString::Ranges
This plugin translates some special comparison operators so you don't need to
remember them anymore.
Example:
price:<100
Will translate into a:
{ range: { price: { lt: 100 } } }
And:
price:>50,<100
Will translate to:
{ range: { price: { gt: 50, lt: 100 } } }
=head3 Supported Operators
B<gt> via E<gt>, B<gte> via E<gt>=, B<lt> via E<lt>, B<lte> via E<lt>=
=head2 App::ElasticSearch::Utilities::QueryString::Underscored
This plugin translates some special underscore surrounded tokens into
the Elasticsearch Query DSL.
Implemented:
=head3 _prefix_
Example query string:
_prefix_:useragent:'Go '
Translates into:
{ prefix => { useragent => 'Go ' } }
=head2 App::ElasticSearch::Utilities::QueryString::FileExpansion
If the match ends in .dat, .txt, .csv, or .json then we attempt to read a file with that name and OR the condition:
$ cat test.dat
50 1.2.3.4
40 1.2.3.5
30 1.2.3.6
20 1.2.3.7
Or
$ cat test.csv
50,1.2.3.4
40,1.2.3.5
30,1.2.3.6
20,1.2.3.7
Or
$ cat test.txt
1.2.3.4
1.2.3.5
1.2.3.6
1.2.3.7
Or
$ cat test.json
{ "ip": "1.2.3.4" }
{ "ip": "1.2.3.5" }
{ "ip": "1.2.3.6" }
{ "ip": "1.2.3.7" }
We can source that file:
src_ip:test.dat => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
src_ip:test.json[ip] => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
This make it simple to use the --data-file output options and build queries
based off previous queries. For .txt and .dat file, the delimiter for columns
in the file must be either a tab or a null. For files ending in
.csv, Text::CSV_XS is used to accurate parsing of the file format. Files
ending in .json are considered to be newline-delimited JSON.
You can also specify the column of the data file to use, the default being the last column or (-1). Columns are
B<zero-based> indexing. This means the first column is index 0, second is 1, .. The previous example can be rewritten
as:
src_ip:test.dat[1]
or:
src_ip:test.dat[-1]
For newline delimited JSON files, you need to specify the key path you want to extract from the file. If we have a
JSON source file with:
{ "first": { "second": { "third": [ "bob", "alice" ] } } }
{ "first": { "second": { "third": "ginger" } } }
{ "first": { "second": { "nope": "fred" } } }
We could search using:
actor:test.json[first.second.third]
Which would expand to:
{ "terms": { "actor": [ "alice", "bob", "ginger" ] } }
This option will iterate through the whole file and unique the elements of the list. They will then be transformed into
an appropriate L<terms query|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html>.
=head3 Wildcards
We can also have a group of wildcard or regexp in a file:
$ cat wildcards.dat
*@gmail.com
*@yahoo.com
To enable wildcard parsing, prefix the filename with a C<*>.
es-search.pl to_address:*wildcards.dat
Which expands the query to:
{
"bool": {
"minimum_should_match":1,
"should": [
{"wildcard":{"to_outbound":{"value":"*@gmail.com"}}},
{"wildcard":{"to_outbound":{"value":"*@yahoo.com"}}}
]
}
}
No attempt is made to verify or validate the wildcard patterns.
=head3 Regular Expressions
If you'd like to specify a file full of regexp, you can do that as well:
$ cat regexp.dat
.*google\.com$
.*yahoo\.com$
To enable regexp parsing, prefix the filename with a C<~>.
es-search.pl to_address:~regexp.dat
Which expands the query to:
{
"bool": {
"minimum_should_match":1,
"should": [
{"regexp":{"to_outbound":{"value":".*google\\.com$"}}},
{"regexp":{"to_outbound":{"value":".*yahoo\\.com$"}}}
]
}
}
No attempt is made to verify or validate the regexp expressions.
=head2 App::ElasticSearch::Utilities::QueryString::Nested
Implement the proposed nested query syntax early. Example:
nested_path:"field:match AND string"
=head1 AUTHOR
Brad Lhotsky <brad@divisionbyzero.net>
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2026 by Brad Lhotsky.
This is free software, licensed under:
The (three-clause) BSD License
=cut
( run in 0.893 second using v1.01-cache-2.11-cpan-5623c5533a1 )