App-ElasticSearch-Utilities
view release on metacpan or search on metacpan
lib/App/ElasticSearch/Utilities/Aggregations.pm view on Meta::CPAN
package App::ElasticSearch::Utilities::Aggregations;
# ABSTRACT: Code to simplify creating and working with Elasticsearch aggregations
use v5.16;
use warnings;
use App::ElasticSearch::Utilities qw(es_format_numeric);
use Storable qw(dclone);
use Sub::Exporter -setup => {
exports => [ qw(
expand_aggregate_string
es_flatten_aggregations es_flatten_aggs
is_single_stat
)],
groups => {
default => [qw(
expand_aggregate_string
es_flatten_aggregations es_flatten_aggs
is_single_stat
)],
},
};
my %Aggregations;
$Aggregations{terms} = {
params => sub { $_[0] && $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
type => 'bucket',
composite => 1,
};
$Aggregations{significant_terms} = {
params => sub { $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
type => 'bucket',
};
$Aggregations{rare_terms} = {
params => sub { $_[0] =~ /^\d+$/ ? { max_doc_count => $_[0] } : {} },
type => 'bucket',
};
$Aggregations{histogram} = {
params => sub {
return unless defined $_[0];
return unless $_[0] > 0;
return { interval => $_[0] };
},
type => 'bucket',
composite => 1,
};
$Aggregations{date_histogram} = {
params => sub { { calendar_interval => $_[0] || '1h' } },
type => 'bucket',
composite => 1,
};
$Aggregations{geohash_grid} = {
params => sub { $_[0] =~ /^\d+$/ ? { precision => $_[0] } : {} },
type => 'bucket',
composite => 1,
};
$Aggregations{missing} = { type => 'bucket' };
$Aggregations{avg} = { single_stat => 1, type => 'metric' };
$Aggregations{max} = { single_stat => 1, type => 'metric' };
$Aggregations{min} = { single_stat => 1, type => 'metric' };
$Aggregations{sum} = { single_stat => 1, type => 'metric' };
$Aggregations{cardinality} = { single_stat => 1, type => 'metric' };
$Aggregations{stats} = { type => 'metric' };
$Aggregations{extended_stats} = { type => 'metric' };
$Aggregations{percentiles} = {
params => sub {
my @pcts = $_[0] ? split /,/, $_[0] : qw(25 50 75 90);
return { percents => \@pcts };
},
};
$Aggregations{geo_centroid} = { type => 'metric' };
sub is_single_stat {
my ($agg) = @_;
return unless $agg;
return unless exists $Aggregations{$agg};
return unless exists $Aggregations{$agg}->{single_stat};
return $Aggregations{$agg}->{single_stat};
}
sub expand_aggregate_string {
my ($token) = @_;
my %aggs = ();
foreach my $def ( split /\+/, $token ) {
my $alias = $def =~ s/^(\w+)=// ? $1 : undef;
my @parts = split /:/, $def, 3;
if( @parts == 1 ) {
lib/App/ElasticSearch/Utilities/Aggregations.pm view on Meta::CPAN
"terms": {
"field": "field_name",
"size": 20
}
}
}
=item B<significant_terms>
Same as C<terms>.
significant_terms:field_name:10
Results in:
{
"rare_terms.field_name": {
"terms": {
"field": "field_name",
"size": 10
}
}
}
=item B<rare_terms>
Same as C<terms> but the positional parameter is the C<max_doc_count>.
rare_terms:field_name:10
Results in:
{
"rare_terms.field_name": {
"terms": {
"field": "field_name",
"max_doc_count": 10
}
}
}
=item B<histogram>
Creates a histogram for numeric fields. Positional parameter is the interval.
histogram:field_name:10
Results in:
{
"histogram.field_name": {
"histogram": {
"field": "field_name",
"interval": 10
}
}
}
=item B<date_histogram>
Creates a histogram for date fields. Positional parameter is the calendar_interval.
date_histogram:field_name:1h
Results in:
{
"histogram.field_name": {
"histogram": {
"field": "field_name",
"calendar_interval": "1h"
}
}
}
=item B<geohash_grid>
Creates a geohash grid bucket aggregation. Positional parameter is the precision.
geohash_grid:field_name:6
Results in:
{
"geohash_grid.field_name": {
"geohash_grid": {
"field": "field_name",
"precision": 6
}
}
}
=item B<missing>
Creates a bucket for documents missing the field. No positional parameters.
missing:field_name
Results in:
{
"missing.field_name": {
"missing": {
"field": "field_name"
}
}
}
=back
=head2 Metric Aggregations
Aggregations that generate metrics from enclosing buckets.
=over 2
=item B<avg>, B<max>, B<min>, B<sum>
Single stat metric aggregations to generate the various single statistics over the enclosing bucket.
sum:field_name
Results in
{
"sum.field_names": {
"sum": {
"field": "field_name"
}
}
}
( run in 1.611 second using v1.01-cache-2.11-cpan-d8267643d1d )