App-ElasticSearch-Utilities

 view release on metacpan or  search on metacpan

lib/App/ElasticSearch/Utilities/Aggregations.pm  view on Meta::CPAN

package App::ElasticSearch::Utilities::Aggregations;
# ABSTRACT: Code to simplify creating and working with Elasticsearch aggregations

use v5.16;
use warnings;

use App::ElasticSearch::Utilities qw(es_format_numeric);

use Storable qw(dclone);
use Sub::Exporter -setup => {
    exports => [ qw(
        expand_aggregate_string
        es_flatten_aggregations es_flatten_aggs
        is_single_stat
    )],
    groups => {
        default => [qw(
            expand_aggregate_string
            es_flatten_aggregations es_flatten_aggs
            is_single_stat
        )],
    },
};

my %Aggregations;


$Aggregations{terms} = {
    params    => sub { $_[0] && $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{significant_terms} = {
    params => sub { $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
    type   => 'bucket',
};


$Aggregations{rare_terms} = {
    params => sub { $_[0] =~ /^\d+$/ ? { max_doc_count => $_[0] } : {} },
    type   => 'bucket',
};


$Aggregations{histogram} = {
    params => sub {
        return unless defined $_[0];
        return unless $_[0] > 0;
        return { interval => $_[0] };
    },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{date_histogram} = {
    params    => sub { { calendar_interval => $_[0] || '1h' } },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{geohash_grid} = {
    params    => sub { $_[0] =~ /^\d+$/ ? { precision => $_[0] } : {} },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{missing} = { type => 'bucket' };


$Aggregations{avg} = { single_stat => 1, type => 'metric' };
$Aggregations{max} = { single_stat => 1, type => 'metric' };
$Aggregations{min} = { single_stat => 1, type => 'metric' };
$Aggregations{sum} = { single_stat => 1, type => 'metric' };


$Aggregations{cardinality} = { single_stat => 1, type => 'metric' };


$Aggregations{stats} = { type => 'metric' };


$Aggregations{extended_stats} = { type => 'metric' };


$Aggregations{percentiles} = {
    params => sub {
        my @pcts = $_[0] ? split /,/, $_[0] : qw(25 50 75 90);
        return { percents => \@pcts };
    },
};


$Aggregations{geo_centroid} = { type => 'metric' };




sub is_single_stat {
    my ($agg) = @_;
    return unless $agg;
    return unless exists $Aggregations{$agg};
    return unless exists $Aggregations{$agg}->{single_stat};
    return $Aggregations{$agg}->{single_stat};
}


sub expand_aggregate_string {
    my ($token) = @_;

    my %aggs = ();
    foreach my $def ( split /\+/, $token ) {
        my $alias = $def =~ s/^(\w+)=// ? $1 : undef;
        my @parts = split /:/, $def, 3;
        if( @parts == 1 ) {

lib/App/ElasticSearch/Utilities/Aggregations.pm  view on Meta::CPAN

            "terms": {
                "field": "field_name",
                "size": 20
            }
        }
    }

=item B<significant_terms>

Same as C<terms>.

    significant_terms:field_name:10

Results in:

    {
        "rare_terms.field_name": {
            "terms": {
                "field": "field_name",
                "size": 10
            }
        }
    }

=item B<rare_terms>

Same as C<terms> but the positional parameter is the C<max_doc_count>.

    rare_terms:field_name:10

Results in:

    {
        "rare_terms.field_name": {
            "terms": {
                "field": "field_name",
                "max_doc_count": 10
            }
        }
    }

=item B<histogram>

Creates a histogram for numeric fields.  Positional parameter is the interval.

    histogram:field_name:10

Results in:

    {
        "histogram.field_name": {
            "histogram": {
                "field": "field_name",
                "interval": 10
            }
        }
    }

=item B<date_histogram>

Creates a histogram for date fields.  Positional parameter is the calendar_interval.

    date_histogram:field_name:1h

Results in:

    {
        "histogram.field_name": {
            "histogram": {
                "field": "field_name",
                "calendar_interval": "1h"
            }
        }
    }

=item B<geohash_grid>

Creates a geohash grid bucket aggregation.  Positional parameter is the precision.

    geohash_grid:field_name:6

Results in:

    {
        "geohash_grid.field_name": {
            "geohash_grid": {
                "field": "field_name",
                "precision": 6
            }
        }
    }

=item B<missing>

Creates a bucket for documents missing the field.  No positional parameters.

    missing:field_name

Results in:

    {
        "missing.field_name": {
            "missing": {
                "field": "field_name"
            }
        }
    }

=back

=head2 Metric Aggregations

Aggregations that generate metrics from enclosing buckets.

=over 2

=item B<avg>, B<max>, B<min>, B<sum>

Single stat metric aggregations to generate the various single statistics over the enclosing bucket.

    sum:field_name

Results in

    {
        "sum.field_names": {
            "sum": {
                "field": "field_name"
            }
        }
    }



( run in 1.611 second using v1.01-cache-2.11-cpan-d8267643d1d )