Algorithm-LossyCount
view release on metacpan or search on metacpan
t/Algorithm/LossyCount.t view on Meta::CPAN
use v5.10;
use List::Util qw/shuffle sum/;
use Test::Exception::LessClever;
use Test::More;
use_ok 'Algorithm::LossyCount';
sub zipf_distribution {
my ($num_samples) = @_;
my $partition_function = sum map { 1 / $_ } 1 .. $num_samples;
return sub {
my ($i) = @_;
1 / ($i * $partition_function);
};
}
throws_ok {
Algorithm::LossyCount->new;
} qr/max_error_ratio/, 'max_error_ratio is a mandatory parameter.';
my $num_samples = 20000;
my $distribution = zipf_distribution($num_samples);
my %sample_frequencies;
for my $i (1 .. $num_samples) {
my $probability = $distribution->($i);
my $frequency = int ($probability * $num_samples);
next if $frequency == 0;
$sample_frequencies{$i} = $frequency;
}
subtest 'Basic' => sub {
my $counter = new_ok 'Algorithm::LossyCount' => [ max_error_ratio => 0.005 ];
my @samples =
shuffle map { ($_) x $sample_frequencies{$_} } keys %sample_frequencies;
$counter->add_sample($_) for @samples;
my $frequencies = $counter->frequencies;
my @frequent_samples = (
sort { $frequencies->{$b} <=> $frequencies->{$a} } keys %$frequencies
)[0 .. keys(%$frequencies) / 100];
for my $sample (@frequent_samples) {
my $errors = $sample_frequencies{$sample} - $frequencies->{$sample};
my $error_ratio = $errors / $sample_frequencies{$sample};
cmp_ok $error_ratio, '<=', $counter->max_error_ratio;
}
};
done_testing;
( run in 0.628 second using v1.01-cache-2.11-cpan-5623c5533a1 )