App-ElasticSearch-Utilities
view release on metacpan or search on metacpan
scripts/es-search.pl view on Meta::CPAN
if( $OPT{by}) {
my ($type,$field) = split /\:/, $OPT{by};
if( exists $SUPPORTED_AGGREGATIONS{$type} ) {
$sub_agg{by} = { $type => {field => $field} };
}
else {
output({color=>'red'}, "Aggregation '$type' is not currently supported, ignoring.");
}
}
if( $OPT{with} ) {
my @with = is_arrayref($OPT{with}) ? @{ $OPT{with} } : ( $OPT{with} );
foreach my $with ( @with ) {
my @attrs = split /:/, $with, 3;
# Process Args from Right to Left
my $arg = @attrs == 3 ? pop @attrs
: $attrs[-1] =~ /^\d/ ? pop @attrs
: '';
my $pcts = $arg =~ /^\d{1,2}(?:\.\d+)?(?:,\d{1,2}(?:\.\d+)?)*$/ ? $arg : '25,50,75,90,95,99';
my $size = $arg =~ /^\d+$/ ? $arg : 3;
my $hi = $arg || 0.1;
my $field = exists $FIELDS{$attrs[-1]} ? pop @attrs : undef;
my $type = @attrs ? pop @attrs : 'terms';
# Skip invalid elements
next unless defined $field and defined $size and $size > 0;
my %params = ();
my $id = "$type-$field";
# If a term agg and we haven't used this field name, simplify it
if( $type =~ /terms$/ && !$sub_agg{$field} ) {
$id = $field;
$params{size} = $size;
$params{missing} = 'MISSING' if $OPT{'with-missing'};
}
if( $type =~ /histogram|stats|percentiles/ && !$OPT{'no-implications'} ) {
output({color=>'magenta',sticky=>1}, "* Using a statistical aggregation implies an exists filter on $field, use --no-implications to disable this");
$q->add_bool( must => { exists => { field => $field } } );
}
$sub_agg{$id} = {
$type => {
field => $field,
$type eq 'percentiles' ? ( percents => [split /,/, $pcts] ) : (),
$type eq 'histogram' ? ( interval => $hi ) : (),
%params,
}
};
}
}
my %params = ();
$params{missing} = 'MISSING' if $OPT{'with-missing'} and $top_agg eq 'terms';
my $field = shift @agg_fields;
$agg_header = "count\tpct\t" . $field;
$agg{$top_agg} = { field => $field, %params };
if( $OPT{'bg-filter'} && $top_agg eq 'significant_terms' ) {
my $bgf = App::ElasticSearch::Utilities::QueryString->new();
my $bgq = $bgf->expand_query_string($OPT{'bg-filter'});
$agg{$top_agg}->{background_filter} = $bgq->query;
}
if( exists $sub_agg{by} ) {
$agg_header = "$OPT{by}\t" . $agg_header;
$agg{$top_agg}->{order} = [ { by => $ORDER }, { "_count" => "desc" } ];
}
$agg{aggregations} = \%sub_agg if keys %sub_agg;
if( exists $OPT{all} ) {
verbose({color=>'cyan'}, "# Aggregations with --all are limited to returning 1,000,000 results.");
$agg{$top_agg}->{size} = 1_000_000;
}
else {
$agg{$top_agg}->{size} = $CONFIG{size};
}
$q->add_aggregations( top => \%agg );
$q->add_aggregations( out_of => { cardinality => { field => $field } } );
if( $OPT{interval} ) {
$q->wrap_aggregations( step => {
date_histogram => {
field => $CONFIG{timestamp},
interval => $OPT{interval},
}
});
}
}
elsif(exists $OPT{tail}) {
$q->set_size($CONFIG{'max-batch-size'});
@AGES = ($AGES[-1]);
}
elsif( $OPT{all} ) {
$q->set_size( $CONFIG{'max-batch-size'} );
}
else {
$q->set_size( $CONFIG{size} < $CONFIG{'max-batch-size'} ? $CONFIG{size} : $CONFIG{'max-batch-size'} );
}
my %displayed_indices = ();
my $TOTAL_HITS = 0;
my $OUT_OF = 0;
my $last_hit_ts = undef;
my $duration = 0;
my $displayed = 0;
my $header = 0;
my $age = undef;
my %last_batch_id = ();
my %AGGS_TOTALS = ();
my %AGES_SEEN = ();
# Handle CTRL+C During the Loop
my $DONE = 0;
local $SIG{INT} = sub { $DONE=1 };
verbose({color=>'green',level=>1}, "= Query setup complete, beginning request.");
AGES: while( !$DONE && @AGES ) {
# With --tail, we don't want to deplete @AGES
$age = $OPT{tail} ? $AGES[0] : shift @AGES;
# Pause for 200ms if we're tailing
scripts/es-search.pl view on Meta::CPAN
my $v = es_format_numeric($agg->{$k}{$stat}, $CONFIG{precision});
push @stats, $alias{$stat} || $stat => $v;
}
$subaggs{$k} = [ \@stats ] if @stats;
}
}
}
if( keys %subaggs ) {
foreach my $subagg (sort keys %subaggs) {
foreach my $extra ( @{ $subaggs{$subagg} } ) {
output({indent=>$indent,data=>1},
join "\t", @out, $subagg, @{ $extra }
);
}
}
}
else {
# Simple output
output({indent=>$indent,data=>!$CONFIG{summary}}, join("\t",@out));
}
}
}
elsif(exists $aggregations->{top}) {
output({indent=>1,color=>'red'}, "= No results.");
}
}
}
sub by_index_age {
return $ORDER eq 'asc'
? $indices{$b} <=> $indices{$a}
: $indices{$a} <=> $indices{$b};
}
__END__
=pod
=head1 NAME
es-search.pl - Provides a CLI for quick searches of data in ElasticSearch daily indexes
=head1 VERSION
version 8.9
=head1 SYNOPSIS
es-search.pl [search string]
Options:
--help print help
--manual print full manual
--filter Force filter context for all query elements
--show Comma separated list of fields to display, default is ALL, switches to tab output
--tail Continue the query until CTRL+C is sent
--top Perform an aggregation on the fields, by a comma separated list of up to 2 items
--by Perform an aggregation using the result of this, example: --by cardinality:src_ip
--with Perform a sub aggregation on the query
--bg-filter Only used if --top aggregation is significant_terms, applies a background filter
--match-all Enables the ElasticSearch match_all operator
--interval When running aggregations, wrap the aggreation in a date_histogram with this interval
--prefix Takes "field:string" and enables the Lucene prefix query for that field
--exists Field which must be present in the document
--missing Field which must not be present in the document
--size Result size, default is 20, aliased to -n and --limit
--max-batch-size When making requests to ES, retrieve this many docs in a single request, defaults to 50
--all Don't consider result size, just give me *everything*
--asc Sort by ascending timestamp
--desc Sort by descending timestamp (Default)
--sort List of fields for custom sorting
--format When --show isn't used, use this method for outputting the record, supported: json, jsonpretty, yaml
json assumes --no-decorator as we assume you're piping through jq
--pretty Where possible, use JSON->pretty
--precision For floating point values, use this many digits of precision, defaults to 3
--no-decorators Do not show the header with field names in the query results
--no-header Same as above
--no-implications Don't attempt to imply filters from statistical aggregations
--fields Display the field list for this index!
--bases Display the index base list for this cluster.
--timestamp Field to use as the date object, default: @timestamp
From App::ElasticSearch::Utilities:
--local Use localhost as the elasticsearch host
--host ElasticSearch host to connect to
--port HTTP port for your cluster
--proto Defaults to 'http', can also be 'https'
--http-username HTTP Basic Auth username
--password-exec Script to run to get the users password
--insecure Don't verify TLS certificates
--cacert Specify the TLS CA file
--capath Specify the directory with TLS CAs
--cert Specify the path to the client certificate
--key Specify the path to the client private key file
--noop Any operations other than GET are disabled, can be negated with --no-noop
--timeout Timeout to ElasticSearch, default 10
--keep-proxy Do not remove any proxy settings from %ENV
--index Index to run commands against
--base For daily indexes, reference only those starting with "logstash"
(same as --pattern logstash-* or logstash-DATE)
--pattern Use a pattern to operate on the indexes
--days If using a pattern or base, how many days back to go, default: 1
See also the "CONNECTION ARGUMENTS" and "INDEX SELECTION ARGUMENTS" sections from App::ElasticSearch::Utilities.
From CLI::Helpers:
--data-file Path to a file to write lines tagged with 'data => 1'
--tags A comma separated list of tags to display
--color Boolean, enable/disable color, default use git settings
--verbose Incremental, increase verbosity (Alias is -v)
--debug Show developer output
--debug-class Show debug messages originating from a specific package, default: main
--quiet Show no output (for cron)
--syslog Generate messages to syslog as well
--syslog-facility Default "local0"
--syslog-tag The program name, default is the script name
--syslog-debug Enable debug messages to syslog if in use, default false
--nopaste Use App::Nopaste to paste output to configured paste service
scripts/es-search.pl view on Meta::CPAN
This will show the top 2 programs with log messages containing the word error by the cardinality (count
distinct host) of hosts showing the top 5 hosts
Without the --with, the results might look like this:
112314 0.151 sshd
21224 0.151 ntp
The B<--with> option would expand that output to look like this:
112314 0.151 host bastion-804 12431 0.111 sshd
112314 0.151 host bastion-803 10009 0.089 sshd
112314 0.151 host bastion-805 9768 0.087 sshd
112314 0.151 host bastion-801 8789 0.078 sshd
112314 0.151 host bastion-802 4121 0.037 sshd
21224 0.016 host webapp-324 21223 0.999 ntp
21224 0.016 host mail-42 1 0.000 ntp
This may be specified multiple times, the result is more I<rows>, not more I<columns>, e.g.
$ es-search.pl --base logstash error --top program --size 2 --by cardinality:host --with host:5 --with dc:2
Produces:
112314 0.151 dc arlington 112314 1.000 sshd
112314 0.151 host bastion-804 12431 0.111 sshd
112314 0.151 host bastion-803 10009 0.089 sshd
112314 0.151 host bastion-805 9768 0.087 sshd
112314 0.151 host bastion-801 8789 0.078 sshd
112314 0.151 host bastion-802 4121 0.037 sshd
21224 0.016 dc amsterdam 21223 0.999 ntp
21224 0.016 dc la 1 0.000 ntp
21224 0.016 host webapp-324 21223 0.999 ntp
21224 0.016 host mail-42 1 0.000 ntp
You may sub aggregate using any L<bucket agggregation|https://www.elastic.co/guide/en/elasticsearch/reference/master/search-aggregations-bucket.html>
as long as the aggregation provides a B<key> element. Additionally, doc_count, score, and bg_count will be reported in the output.
Other examples:
--with significant_terms:crime
--with cardinality:accts
--with min:out_bytes
--with max:out_bytes
--with avg:out_bytes
--with sum:out_bytes
--with stats:out_bytes
--with extended_stats:out_bytes
--with percentiles:out_bytes
--with percentiles:out_bytes:50,95,99
--with histogram:out_bytes:1024
=item B<with-missing>
For terms aggregations, adds a C<MISSING> bucket.
=item B<bg-filter>
Only used if the C<--top> aggregation is C<significant_terms>. Sets the
background filter for the C<significant_terms> aggregation.
es-search.pl --top significant_terms:src_ip method:POST file:\/get\/sensitive_data --bg-filter method:POST
=item B<interval>
When performing aggregations, wrap those aggregations in a date_histogram of this interval. This
helps flush out "what changed in the last hour."
=item B<match-all>
Apply the ElasticSearch "match_all" search operator to query on all documents
in the index. This is the default with no search parameters.
=item B<prefix>
Takes a "field:string" combination and you can use multiple --prefix options will be "AND"'d
Example:
--prefix useragent:'Go '
Will search for documents where the useragent field matches a prefix search on the string 'Go '
JSON Equivalent is:
{ "prefix": { "useragent": "Go " } }
=item B<exists>
Filter results to those containing a valid, not null field
--exists referer
Only show records with a referer field in the document.
=item B<missing>
Filter results to those not containing a valid, not null field
--missing referer
Only show records without a referer field in the document.
=item B<bases>
Display a list of bases that can be used with the --base option.
Use with --verbose to show age information on the indexes in each base.
=item B<fields>
Display a list of searchable fields
=item B<index>
Search only this index for data, may also be a comma separated list
=item B<days>
The number of days back to search, the default is 5
( run in 1.885 second using v1.01-cache-2.11-cpan-d8267643d1d )