Algorithm-LossyCount

 view release on metacpan or  search on metacpan

lib/Algorithm/LossyCount.pm  view on Meta::CPAN

package Algorithm::LossyCount;

# ABSTRACT: Memory-efficient approximate frequency count.

use v5.10;
use Algorithm::LossyCount::Entry;
use Carp;
use POSIX qw//;

our $VERSION = 0.03;

sub new {
  my ($class, %params) = @_;

  my $max_error_ratio = delete $params{max_error_ratio}
    // Carp::croak('Missing mandatory parameter: "max_error_ratio"');
  if (%params) {
    Carp::croak(
      'Unknown parameter(s): ',
      join ', ', map { qq/"$_"/ } sort keys %params,
    )
  }

  Carp::croak('max_error_ratio must be positive.') if $max_error_ratio <= 0;

  my $self = bless +{
    bucket_size => POSIX::ceil(1 / $max_error_ratio),
    current_bucket => 1,
    entries => +{},
    max_error_ratio => $max_error_ratio,
    num_samples => 0,
    num_samples_in_current_bucket => 0,
  } => $class;

  return $self;
}

sub add_sample {
  my ($self, $sample) = @_;

  Carp::croak('add_sample() requires 1 parameter.') unless defined $sample;

  if (defined (my $entry = $self->entries->{$sample})) {
    $entry->increment_frequency;
    $entry->num_allowed_errors($self->current_bucket - 1);
  } else {
    $self->entries->{$sample} = Algorithm::LossyCount::Entry->new(
      num_allowed_errors => $self->current_bucket - 1,
    );
  }

  ++$self->{num_samples};
  ++$self->{num_samples_in_current_bucket};
  $self->clear_bucket if $self->bucket_is_full;
}

sub bucket_is_full {
  my ($self) = @_;

  $self->num_samples_in_current_bucket >= $self->bucket_size;
}

sub bucket_size { $_[0]->{bucket_size} }

sub clear_bucket {
  my ($self) = @_;

  for my $sample (keys %{ $self->entries }) {
    my $entry = $self->entries->{$sample};
    unless ($entry->survive_in_bucket($self->current_bucket)) {
      delete $self->entries->{$sample};
    }
  }
  ++$self->{current_bucket};
  $self->{num_samples_in_current_bucket} = 0;
}

sub current_bucket { $_[0]->{current_bucket} }

sub entries { $_[0]->{entries} }

sub frequencies {
  my ($self, %params) = @_;

  my $support = delete $params{support} // 0;
  if (%params) {



( run in 1.546 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )