App-ElasticSearch-Utilities

 view release on metacpan or  search on metacpan

scripts/es-search.pl  view on Meta::CPAN

    if( $OPT{by}) {
        my ($type,$field) = split /\:/, $OPT{by};
        if( exists $SUPPORTED_AGGREGATIONS{$type} ) {
            $sub_agg{by} = { $type => {field => $field} };
        }
        else {
            output({color=>'red'}, "Aggregation '$type' is not currently supported, ignoring.");
        }
    }
    if( $OPT{with} ) {
        my @with = is_arrayref($OPT{with}) ? @{ $OPT{with} } : ( $OPT{with} );
        foreach my $with ( @with )  {
            my @attrs = split /:/, $with, 3;
            # Process Args from Right to Left
            my $arg   = @attrs == 3 ? pop @attrs
                      : $attrs[-1] =~ /^\d/ ? pop @attrs
                      : '';
            my $pcts  = $arg =~ /^\d{1,2}(?:\.\d+)?(?:,\d{1,2}(?:\.\d+)?)*$/ ? $arg : '25,50,75,90,95,99';
            my $size  = $arg =~ /^\d+$/ ? $arg : 3;
            my $hi    = $arg || 0.1;
            my $field = exists $FIELDS{$attrs[-1]} ? pop @attrs : undef;
            my $type  = @attrs ? pop @attrs : 'terms';
            # Skip invalid elements
            next unless defined $field and defined $size and $size > 0;

            my %params = ();
            my $id = "$type-$field";
            # If a term agg and we haven't used this field name, simplify it
            if( $type =~ /terms$/ && !$sub_agg{$field} ) {
                $id = $field;
                $params{size} = $size;
                $params{missing} = 'MISSING' if $OPT{'with-missing'};
            }

            if( $type =~ /histogram|stats|percentiles/ && !$OPT{'no-implications'} ) {
                output({color=>'magenta',sticky=>1}, "* Using a statistical aggregation implies an exists filter on $field, use --no-implications to disable this");
                $q->add_bool( must => { exists => { field => $field } } );
            }

            $sub_agg{$id} = {
                $type => {
                    field => $field,
                    $type eq 'percentiles' ? ( percents => [split /,/, $pcts] ) : (),
                    $type eq 'histogram'   ? ( interval => $hi ) :  (),
                    %params,
                }
            };
        }
    }

    my %params = ();
    $params{missing} = 'MISSING' if $OPT{'with-missing'} and $top_agg eq 'terms';

    my $field = shift @agg_fields;
    $agg_header = "count\tpct\t" . $field;
    $agg{$top_agg} = { field => $field, %params };

    if( $OPT{'bg-filter'} && $top_agg eq 'significant_terms' ) {
        my $bgf = App::ElasticSearch::Utilities::QueryString->new();
        my $bgq = $bgf->expand_query_string($OPT{'bg-filter'});
        $agg{$top_agg}->{background_filter} = $bgq->query;

    }

    if( exists $sub_agg{by} ) {
        $agg_header = "$OPT{by}\t" . $agg_header;
        $agg{$top_agg}->{order} = [ { by => $ORDER }, { "_count" => "desc" } ];
    }
    $agg{aggregations} = \%sub_agg if keys %sub_agg;

    if( exists $OPT{all} ) {
        verbose({color=>'cyan'}, "# Aggregations with --all are limited to returning 1,000,000 results.");
        $agg{$top_agg}->{size} = 1_000_000;
    }
    else {
        $agg{$top_agg}->{size} = $CONFIG{size};
    }
    $q->add_aggregations( top => \%agg );
    $q->add_aggregations( out_of => { cardinality => { field => $field  } } );

    if( $OPT{interval} ) {
        $q->wrap_aggregations( step => {
            date_histogram => {
                field    => $CONFIG{timestamp},
                interval => $OPT{interval},
            }
        });
    }
}
elsif(exists $OPT{tail}) {
    $q->set_size($CONFIG{'max-batch-size'});
    @AGES = ($AGES[-1]);
}
elsif( $OPT{all} ) {
    $q->set_size( $CONFIG{'max-batch-size'} );
}
else {
    $q->set_size( $CONFIG{size} < $CONFIG{'max-batch-size'} ? $CONFIG{size} : $CONFIG{'max-batch-size'} );
}

my %displayed_indices = ();
my $TOTAL_HITS        = 0;
my $OUT_OF            = 0;
my $last_hit_ts       = undef;
my $duration          = 0;
my $displayed         = 0;
my $header            = 0;
my $age               = undef;
my %last_batch_id     = ();
my %AGGS_TOTALS       = ();
my %AGES_SEEN         = ();
# Handle CTRL+C During the Loop
my $DONE              = 0;
local $SIG{INT}       = sub { $DONE=1 };

verbose({color=>'green',level=>1}, "= Query setup complete, beginning request.");
AGES: while( !$DONE && @AGES ) {
    # With --tail, we don't want to deplete @AGES
    $age = $OPT{tail} ? $AGES[0] : shift @AGES;

    # Pause for 200ms if we're tailing

scripts/es-search.pl  view on Meta::CPAN

                                my $v = es_format_numeric($agg->{$k}{$stat}, $CONFIG{precision});
                                push @stats, $alias{$stat} || $stat => $v;
                            }
                            $subaggs{$k} = [ \@stats ] if @stats;
                        }
                    }
                }
                if( keys %subaggs ) {
                    foreach my $subagg (sort keys %subaggs) {
                        foreach my $extra ( @{ $subaggs{$subagg} } ) {
                            output({indent=>$indent,data=>1},
                                join "\t", @out, $subagg, @{ $extra }
                            );
                        }
                    }
                }
                else {
                    # Simple output
                    output({indent=>$indent,data=>!$CONFIG{summary}}, join("\t",@out));
                }
            }
        }
        elsif(exists $aggregations->{top}) {
            output({indent=>1,color=>'red'}, "= No results.");
        }
    }
}

sub by_index_age {
    return $ORDER eq 'asc'
        ? $indices{$b} <=> $indices{$a}
        : $indices{$a} <=> $indices{$b};
}

__END__

=pod

=head1 NAME

es-search.pl - Provides a CLI for quick searches of data in ElasticSearch daily indexes

=head1 VERSION

version 8.9

=head1 SYNOPSIS

es-search.pl [search string]

Options:

    --help              print help
    --manual            print full manual
    --filter            Force filter context for all query elements
    --show              Comma separated list of fields to display, default is ALL, switches to tab output
    --tail              Continue the query until CTRL+C is sent
    --top               Perform an aggregation on the fields, by a comma separated list of up to 2 items
    --by                Perform an aggregation using the result of this, example: --by cardinality:src_ip
    --with              Perform a sub aggregation on the query
    --bg-filter         Only used if --top aggregation is significant_terms, applies a background filter
    --match-all         Enables the ElasticSearch match_all operator
    --interval          When running aggregations, wrap the aggreation in a date_histogram with this interval
    --prefix            Takes "field:string" and enables the Lucene prefix query for that field
    --exists            Field which must be present in the document
    --missing           Field which must not be present in the document
    --size              Result size, default is 20, aliased to -n and --limit
    --max-batch-size    When making requests to ES, retrieve this many docs in a single request, defaults to 50
    --all               Don't consider result size, just give me *everything*
    --asc               Sort by ascending timestamp
    --desc              Sort by descending timestamp (Default)
    --sort              List of fields for custom sorting
    --format            When --show isn't used, use this method for outputting the record, supported: json, jsonpretty, yaml
                        json assumes --no-decorator as we assume you're piping through jq
    --pretty            Where possible, use JSON->pretty
    --precision         For floating point values, use this many digits of precision, defaults to 3
    --no-decorators     Do not show the header with field names in the query results
    --no-header         Same as above
    --no-implications   Don't attempt to imply filters from statistical aggregations
    --fields            Display the field list for this index!
    --bases             Display the index base list for this cluster.
    --timestamp         Field to use as the date object, default: @timestamp

From App::ElasticSearch::Utilities:

    --local         Use localhost as the elasticsearch host
    --host          ElasticSearch host to connect to
    --port          HTTP port for your cluster
    --proto         Defaults to 'http', can also be 'https'
    --http-username HTTP Basic Auth username
    --password-exec Script to run to get the users password
    --insecure      Don't verify TLS certificates
    --cacert        Specify the TLS CA file
    --capath        Specify the directory with TLS CAs
    --cert          Specify the path to the client certificate
    --key           Specify the path to the client private key file
    --noop          Any operations other than GET are disabled, can be negated with --no-noop
    --timeout       Timeout to ElasticSearch, default 10
    --keep-proxy    Do not remove any proxy settings from %ENV
    --index         Index to run commands against
    --base          For daily indexes, reference only those starting with "logstash"
                     (same as --pattern logstash-* or logstash-DATE)
    --pattern       Use a pattern to operate on the indexes
    --days          If using a pattern or base, how many days back to go, default: 1

See also the "CONNECTION ARGUMENTS" and "INDEX SELECTION ARGUMENTS" sections from App::ElasticSearch::Utilities.

From CLI::Helpers:

    --data-file         Path to a file to write lines tagged with 'data => 1'
    --tags              A comma separated list of tags to display
    --color             Boolean, enable/disable color, default use git settings
    --verbose           Incremental, increase verbosity (Alias is -v)
    --debug             Show developer output
    --debug-class       Show debug messages originating from a specific package, default: main
    --quiet             Show no output (for cron)
    --syslog            Generate messages to syslog as well
    --syslog-facility   Default "local0"
    --syslog-tag        The program name, default is the script name
    --syslog-debug      Enable debug messages to syslog if in use, default false
    --nopaste           Use App::Nopaste to paste output to configured paste service

scripts/es-search.pl  view on Meta::CPAN


This will show the top 2 programs with log messages containing the word error by the cardinality (count
distinct host) of hosts showing the top 5 hosts

Without the --with, the results might look like this:

    112314 0.151 sshd
    21224  0.151 ntp

The B<--with> option would expand that output to look like this:

    112314   0.151 host   bastion-804   12431  0.111 sshd
    112314   0.151 host   bastion-803   10009  0.089 sshd
    112314   0.151 host   bastion-805   9768   0.087 sshd
    112314   0.151 host   bastion-801   8789   0.078 sshd
    112314   0.151 host   bastion-802   4121   0.037 sshd
    21224    0.016 host   webapp-324    21223  0.999 ntp
    21224    0.016 host   mail-42       1      0.000 ntp

This may be specified multiple times, the result is more I<rows>, not more I<columns>, e.g.

    $ es-search.pl --base logstash error --top program --size 2 --by cardinality:host --with host:5 --with dc:2

Produces:

    112314 0.151  dc     arlington     112314 1.000 sshd
    112314 0.151  host   bastion-804   12431  0.111 sshd
    112314 0.151  host   bastion-803   10009  0.089 sshd
    112314 0.151  host   bastion-805   9768   0.087 sshd
    112314 0.151  host   bastion-801   8789   0.078 sshd
    112314 0.151  host   bastion-802   4121   0.037 sshd
    21224  0.016  dc     amsterdam     21223  0.999 ntp
    21224  0.016  dc     la            1      0.000 ntp
    21224  0.016  host   webapp-324    21223  0.999 ntp
    21224  0.016  host   mail-42       1      0.000 ntp

You may sub aggregate using any L<bucket agggregation|https://www.elastic.co/guide/en/elasticsearch/reference/master/search-aggregations-bucket.html>
as long as the aggregation provides a B<key> element.  Additionally, doc_count, score, and bg_count will be reported in the output.

Other examples:

    --with significant_terms:crime
    --with cardinality:accts
    --with min:out_bytes
    --with max:out_bytes
    --with avg:out_bytes
    --with sum:out_bytes
    --with stats:out_bytes
    --with extended_stats:out_bytes
    --with percentiles:out_bytes
    --with percentiles:out_bytes:50,95,99
    --with histogram:out_bytes:1024

=item B<with-missing>

For terms aggregations, adds a C<MISSING> bucket.

=item B<bg-filter>

Only used if the C<--top> aggregation is C<significant_terms>.  Sets the
background filter for the C<significant_terms> aggregation.

    es-search.pl --top significant_terms:src_ip method:POST file:\/get\/sensitive_data --bg-filter method:POST

=item B<interval>

When performing aggregations, wrap those aggregations in a date_histogram of this interval.  This
helps flush out "what changed in the last hour."

=item B<match-all>

Apply the ElasticSearch "match_all" search operator to query on all documents
in the index.  This is the default with no search parameters.

=item B<prefix>

Takes a "field:string" combination and you can use multiple --prefix options will be "AND"'d

Example:

    --prefix useragent:'Go '

Will search for documents where the useragent field matches a prefix search on the string 'Go '

JSON Equivalent is:

    { "prefix": { "useragent": "Go " } }

=item B<exists>

Filter results to those containing a valid, not null field

    --exists referer

Only show records with a referer field in the document.

=item B<missing>

Filter results to those not containing a valid, not null field

    --missing referer

Only show records without a referer field in the document.

=item B<bases>

Display a list of bases that can be used with the --base option.

Use with --verbose to show age information on the indexes in each base.

=item B<fields>

Display a list of searchable fields

=item B<index>

Search only this index for data, may also be a comma separated list

=item B<days>

The number of days back to search, the default is 5



( run in 1.885 second using v1.01-cache-2.11-cpan-d8267643d1d )