App-RecordStream

 view release on metacpan or  search on metacpan

doc/recs-collate.pod  view on Meta::CPAN

 cov, covar, covariance: find covariance of provided fields
 dcount, dct, distinctcount, distinctct: count unique values from provided field
 first: first value for a field
 firstrec, firstrecord: first record
 last: last value for a field
 lastrec, lastrecord: last record seen
 linearregression, linreg: perform a linear regression of provided fields, dumping various statistics
 max, maximum: maximum value for a field
 min, minimum: minimum value for a field
 mode: most common value for a field
 perc, percentile: value of pXX for field
 percentilemap, percmap: map of percentile values for field
 recformax, recformaximum, recordformax, recordformaximum: returns the record corresponding to the maximum value for a field
 recformin, recforminimum, recordformin, recordforminimum: returns the record corresponding to the minimum value for a field
 records, recs: returns an arrayref of all records
 stddev: find standard deviation of provided field
 sum: sums provided field
 uarray: collect unique values from provided field into an array
 uconcat, uconcatenate: concatenate unique values from provided field
 valuestokeys, vk: use one key-value as a key for a different value in the record
 var, variance: find variance of provided field
 

lib/App/RecordStream/Aggregator/Percentile.pm  view on Meta::CPAN

use warnings;

use App::RecordStream::Aggregator::InjectInto::Field;
use App::RecordStream::DomainLanguage::Registry;

use base qw(App::RecordStream::Aggregator::InjectInto::Field);

sub new
{
  my $class      = shift;
  my $percentile = shift;
  my $field      = shift;

  my $this = $class->SUPER::new($field);
  $this->{'percentile'} = $percentile;

  return $this;
}

sub new_from_valuation
{
  my $class      = shift;
  my $percentile = shift;
  my $valuation  = shift;

  my $this = $class->SUPER::new_from_valuation($valuation);
  $this->{'percentile'} = $percentile;

  return $this;
}

sub initial {
  return [];
}

sub combine_field
{

lib/App/RecordStream/Aggregator/Percentile.pm  view on Meta::CPAN


  push @$cookie, $value;
  return $cookie;
}

sub squish
{
  my $this   = shift;
  my $cookie = shift;

  my $percentile = $this->{'percentile'};

  my @sorted = sort { $a <=> $b } @$cookie;

  my $index = int( (scalar @sorted) * ($percentile / 100));

  if ( $index == scalar @sorted )
  {
    $index--;
  }

  return $sorted[$index];
}

sub short_usage
{
  return "value of pXX for field";
}

sub long_usage
{
  print <<EOF;
Usage: per,<percentile>,<field>
   Finds the field value which <percentile> percent of values are less than.

   This is computed by creating an array of all values, sorting, and indexing into it at the
   floor((percentile / 100) * length) point
EOF
}

sub argct
{
  return 2;
}

App::RecordStream::Aggregator->register_implementation('percentile', __PACKAGE__);
App::RecordStream::Aggregator->register_implementation('perc', __PACKAGE__);

App::RecordStream::DomainLanguage::Registry::register_vfn(__PACKAGE__, 'new_from_valuation', 'percentile', 'SCALAR', 'VALUATION');
App::RecordStream::DomainLanguage::Registry::register_vfn(__PACKAGE__, 'new_from_valuation', 'perc', 'SCALAR', 'VALUATION');

1;

lib/App/RecordStream/Aggregator/PercentileMap.pm  view on Meta::CPAN

our $VERSION = "4.0.25";

use strict;
use warnings;

use App::RecordStream::Aggregator::InjectInto::Field;
use App::RecordStream::DomainLanguage::Registry;

use base qw(App::RecordStream::Aggregator::InjectInto::Field);

sub _make_percentiles
{
  my $percentiles = shift;

  if(ref($percentiles) eq "ARRAY")
  {
    return $percentiles;
  }

  # be careful, split(' ', ...) is extreme magic split, not split on one space
  return [split(' ', $percentiles)];
}

sub new
{
  my $class       = shift;
  my $percentiles = shift;
  my $field       = shift;

  my $this = $class->SUPER::new($field);
  $this->{'percentiles'} = _make_percentiles($percentiles);

  return $this;
}

sub new_from_valuation
{
  my $class       = shift;
  my $percentiles = shift;
  my $valuation   = shift;

  my $this = $class->SUPER::new_from_valuation($valuation);
  $this->{'percentiles'} = _make_percentiles($percentiles);

  return $this;
}

sub initial {
  return [];
}

sub combine_field
{

lib/App/RecordStream/Aggregator/PercentileMap.pm  view on Meta::CPAN


sub squish
{
  my $this   = shift;
  my $cookie = shift;

  my @sorted = sort { $a <=> $b } @$cookie;

  my %ret;

  for my $percentile (@{$this->{'percentiles'}})
  {
    my $index = int((scalar @sorted) * ($percentile / 100));

    if($index == scalar(@sorted))
    {
      $index--;
    }

    $ret{$percentile} = $sorted[$index];
  }

  return \%ret;
}

sub short_usage
{
  return "map of percentile values for field";
}

sub long_usage
{
  print <<EOF;
Usage: percmap,<percentiles>,<field>
   Finds the field values which <percentiles> percent of values are less than.

   This is computed by creating an array of all values, sorting, and indexing
   into it at the floor((percentile / 100) * length) point

   <percentiles> will be perl split to determine percentiles to compute.

   Output is a hash whose keys are percentiles and whose values are
   corresponding field values.
EOF
}

sub argct
{
  return 2;
}

App::RecordStream::Aggregator->register_implementation('percentilemap', __PACKAGE__);
App::RecordStream::Aggregator->register_implementation('percmap', __PACKAGE__);

App::RecordStream::DomainLanguage::Registry::register_vfn(__PACKAGE__, 'new_from_valuation', 'percentilemap', 'SCALAR', 'VALUATION');
App::RecordStream::DomainLanguage::Registry::register_vfn(__PACKAGE__, 'new_from_valuation', 'percmap', 'SCALAR', 'VALUATION');

1;

lib/App/RecordStream/Manual/Story.pm  view on Meta::CPAN

avg,latency" requests that the average aggregator be used on the latency field.
"-a count" requests that the count aggregator be used.

Aggregators are one of the most powerful features of recs. As of writing there
are 21 distinct aggregators ready for use. Some of the most powerful are:

  average: averages provided field
  count: counts (non-unique) records
  distinctcount: count unique values from provided field
  maximum: maximum value for a field
  percentile: value of pXX for field
  sum: sums provided field

You can find out what all of them are with `recs-collate --list-aggregators`.

Here are a few sample records from after the collate step:

  {"count":11,"url":"/dbfiles/list.cgi","avg_latency":21.0909090909091}
  {"count":2,"url":"/linkGenerator/Host.cgi","avg_latency":0.5}
  {"count":3,"url":"/view_image.cgi","avg_latency":0.333333333333333}
  {"count":21,"url":"/dbfiles/check.cgi","avg_latency":0.476190476190476}

src/fast-recs-collate/aggregators.c  view on Meta::CPAN

    struct sum_data *d = _d;
    printf("%g", d->sum);
}

/*
 * Perc
 */

struct perc_config_data
{
    double percentile;
};

struct perc_data
{
    int values_len;
    int values_size;
    double *values;
};

static bool perc_parse_args(void **config_data, char *config_str, int *num_fields, char **fields)
{
    if(*config_str)
    {
        char *comma = strchr(config_str, ',');
        if(!comma) return false;
        *comma++ = '\0';

        struct perc_config_data *c = *config_data = malloc(sizeof(struct perc_config_data));
        char *endp;
        c->percentile = strtod(config_str, &endp);
        if(endp == config_str) return false;  /* failed to parse into number */

        fields[0] = comma;
        *num_fields = 1;

        return true;
    }
    else
    {
        return false;

src/fast-recs-collate/aggregators.c  view on Meta::CPAN

    if(d1 < d2) return -1;
    else if(d1 > d2) return 1;
    else return 0;
}

static void perc_dump(void *_c, void *_d)
{
    struct perc_config_data *c = _c;
    struct perc_data *d = _d;
    qsort(d->values, d->values_len, sizeof(*d->values), cmp_dbl);
    double perc = d->values[(int)floor((c->percentile / 100) * d->values_len)];
    printf("%g", perc);
}

static void perc_free(void *_c, void *_d)
{
    struct concat_data *d = _d;
    free(d->concat_buf);
}

/*

src/fast-recs-collate/aggregators.c  view on Meta::CPAN

    {"correlation", "corr", sizeof(struct corr_data),
      corr_parse_args, corr_init, corr_add, corr_dump, NULL},
    {"covariance", "cov", sizeof(struct cov_data),
      cov_parse_args, cov_init, cov_add, cov_dump, NULL},
    {"maximum", "max", sizeof(struct max_data),
      max_parse_args, max_init, max_add, max_dump, NULL},
    {"minimum", "min", sizeof(struct min_data),
      min_parse_args, min_init, min_add, min_dump, NULL},
    {"mode", "mode", sizeof(struct mode_data),
      mode_parse_args, mode_init, mode_add, mode_dump, mode_free},
    {"percentile", "perc", sizeof(struct perc_data),
      perc_parse_args, perc_init, perc_add, perc_dump, perc_free},
    {"sum", "sum", sizeof(struct sum_data),
      sum_parse_args, sum_init, sum_add, sum_dump, NULL},
    {"variance", "var", sizeof(struct var_data),
      var_parse_args, var_init, var_add, var_dump, NULL},
    {NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL}
};

tests/RecordStream/Aggregator/Percentile.t  view on Meta::CPAN

use strict;
use warnings;

use Test::More 'no_plan';
use Data::Dumper;
use App::RecordStream::Record;

BEGIN { use_ok("App::RecordStream::Aggregator::Percentile"); }

ok(my $aggr = App::RecordStream::Aggregator::Percentile->new(90, "x"), "Initialize");
is(percentile_100_values($aggr), 91, "90th percentile of 1-100");

ok($aggr = App::RecordStream::Aggregator::Percentile->new(100, "x"), "Initialize");
is(percentile_100_values($aggr), 100, "100th percentile of 1-100");

sub percentile_100_values {
  my $aggr = shift;

  my $cookie = $aggr->initial();

  foreach my $n (1..100)
  {
    $cookie = $aggr->combine($cookie, App::RecordStream::Record->new("x" => $n));
  }

  return $aggr->squish($cookie);



( run in 0.456 second using v1.01-cache-2.11-cpan-05162d3a2b1 )