App-ElasticSearch-Utilities

 view release on metacpan or  search on metacpan

lib/App/ElasticSearch/Utilities/Aggregations.pm  view on Meta::CPAN

package App::ElasticSearch::Utilities::Aggregations;
# ABSTRACT: Code to simplify creating and working with Elasticsearch aggregations

use v5.16;
use warnings;

use App::ElasticSearch::Utilities qw(es_format_numeric);

use Storable qw(dclone);
use Sub::Exporter -setup => {
    exports => [ qw(
        expand_aggregate_string
        es_flatten_aggregations es_flatten_aggs
        is_single_stat
    )],
    groups => {
        default => [qw(
            expand_aggregate_string
            es_flatten_aggregations es_flatten_aggs
            is_single_stat
        )],
    },
};

my %Aggregations;


$Aggregations{terms} = {
    params    => sub { $_[0] && $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{significant_terms} = {
    params => sub { $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
    type   => 'bucket',
};


$Aggregations{rare_terms} = {
    params => sub { $_[0] =~ /^\d+$/ ? { max_doc_count => $_[0] } : {} },
    type   => 'bucket',
};


$Aggregations{histogram} = {
    params => sub {
        return unless defined $_[0];
        return unless $_[0] > 0;
        return { interval => $_[0] };
    },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{date_histogram} = {
    params    => sub { { calendar_interval => $_[0] || '1h' } },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{geohash_grid} = {
    params    => sub { $_[0] =~ /^\d+$/ ? { precision => $_[0] } : {} },
    type      => 'bucket',
    composite => 1,
};


$Aggregations{missing} = { type => 'bucket' };


$Aggregations{avg} = { single_stat => 1, type => 'metric' };
$Aggregations{max} = { single_stat => 1, type => 'metric' };
$Aggregations{min} = { single_stat => 1, type => 'metric' };
$Aggregations{sum} = { single_stat => 1, type => 'metric' };


$Aggregations{cardinality} = { single_stat => 1, type => 'metric' };


$Aggregations{stats} = { type => 'metric' };


$Aggregations{extended_stats} = { type => 'metric' };


$Aggregations{percentiles} = {
    params => sub {
        my @pcts = $_[0] ? split /,/, $_[0] : qw(25 50 75 90);
        return { percents => \@pcts };
    },
};


$Aggregations{geo_centroid} = { type => 'metric' };




sub is_single_stat {
    my ($agg) = @_;
    return unless $agg;
    return unless exists $Aggregations{$agg};
    return unless exists $Aggregations{$agg}->{single_stat};
    return $Aggregations{$agg}->{single_stat};
}


sub expand_aggregate_string {
    my ($token) = @_;

    my %aggs = ();
    foreach my $def ( split /\+/, $token ) {
        my $alias = $def =~ s/^(\w+)=// ? $1 : undef;
        my @parts = split /:/, $def, 3;
        if( @parts == 1 ) {
            $alias ||= $def;
            $aggs{$alias} = { terms => { field => $def, size => 20 } };
            next;
        }
        my ($agg, $field);
        if( exists $Aggregations{$parts[0]} ) {
            $agg     = shift @parts;
            $field   = shift @parts;
        }
        else {
            $agg = 'terms';
            $field = shift @parts;
        }
        my $params  = {};
        my $paramStr = shift @parts;

        if( $paramStr && $paramStr =~ /\w+=/ ) {
            # split on commas using a positive lookahead for a "word="
            foreach my $token (split /,(?=\w+=)/, $paramStr) {
                my ($k,$v) = split /=/, $token, 2;
                next unless $k and $v;
                $params->{$k} = $v =~ /,/ ? [ split /,/, $v ] : $v;
            }
        }
        elsif( exists $Aggregations{$agg}->{params} ) {
            # Process parameters
            $params = $Aggregations{$agg}->{params}->($paramStr);
        }
        $alias ||= join ".", $agg eq 'terms' ? ($field) : ($agg, $field);
        $aggs{$alias} = { $agg => { field => $field, %{ $params || {} } } };
    }
    return \%aggs;
}


sub es_flatten_aggregations {
    my ($result,$field,$parent) = @_;

    $parent ||= [];
    my @rows = ();

    my @remove = qw(
        doc_count_error_upper_bound
        sum_other_doc_count
    );

    my $row = dclone($parent);
    my $extract = sub {
        my ($key, $hash) = @_;

        if( $hash->{value_as_string} ) {
            push @{ $row }, $key, $hash->{value_as_string};
        }

lib/App/ElasticSearch/Utilities/Aggregations.pm  view on Meta::CPAN

        }
        if( keys %buckets ) {
            foreach my $k ( sort keys %buckets ) {
                if( @{ $buckets{$k} } ) {
                    foreach my $bucket ( @{ $buckets{$k} } ) {
                        push @rows, @{ es_flatten_aggregations($bucket, $k, $row) };
                    }
                }
                else {
                    push @rows, $row;
                }
            }
        }
        else {
            push @rows, $row;
        }
    }
    else {
        foreach my $k ( sort keys %{ $result } ) {
            delete $result->{$k}{$_} for @remove;
            $extract->($k, $result->{$k});
            my $buckets = delete $result->{$k}{buckets};
            if( $buckets and @{ $buckets } ) {
                foreach my $bucket ( @{ $buckets } ) {
                    push @rows, @{ es_flatten_aggregations($bucket,$k,$row) };
                }
            }
            else {
                push @rows, $row;
            }
        }
    }

    return \@rows;
}

# Setup Aliases
*es_flatten_aggs = \&es_flatten_aggregations;


1;

__END__

=pod

=head1 NAME

App::ElasticSearch::Utilities::Aggregations - Code to simplify creating and working with Elasticsearch aggregations

=head1 VERSION

version 8.9

=head1 FUNCTIONS

=head2 is_single_stat()

Returns true if an aggregation returns a single value.

=head2 expand_aggregate_string( token )

Takes a simplified aggregation grammar and expands it the full aggregation hash.

Simple Terms:

    field_name

To

    {
        field_name => {
            terms => {
                field => 'field_name',
                size  => 20,
            }
        }
    }

Alias expansion:

    alias=field_name

To

    {
        alias => {
            terms => {
                field => 'field_name',
                size  => 20,
            }
        }
    }

Parameters:

    alias=field_name:10

To

    {
        alias => {
            terms => {
                field => 'field_name',
                size  => 10,
            }
        }
    }

Parameters, k/v:

    alias=field_name:size=13

To

    {
        alias => {
            terms => {
                field => 'field_name',
                size  => 13,
            }
        }
    }



( run in 0.513 second using v1.01-cache-2.11-cpan-5623c5533a1 )