Algorithm-Numerical-Sample
view release on metacpan or search on metacpan
lib/Algorithm/Numerical/Sample.pm view on Meta::CPAN
foreach (@PARAMS) {
$args {$_} = $args {"-$_"} unless defined $args {$_};
}
# Check for set parameter.
die "sample requires the set parameter" unless $args {set};
my $set = $args {set};
# Set sample and set size.
my $sample_size = defined $args {sample_size} ? $args {sample_size} : 1;
my $set_size = @$set;
# Reservoir will be our sample.
my @reservoir = (undef) x $sample_size;
# Initialize counters.
my $sample_counter = 0;
my $set_counter = 0;
# Loop as long as the reservoir isn't filled.
while ($sample_counter < $sample_size) {
# Draw a random number.
my $U = rand ($set_size - $set_counter);
if ($U < $sample_size - $sample_counter) {
# Select the next element with probability
# $sample_size - $sample_counter
# ------------------------------
# $set_size - $set_counter
$reservoir [$sample_counter ++] = $set -> [$set_counter];
}
$set_counter ++;
}
wantarray ? @reservoir : \@reservoir;
}
package Algorithm::Numerical::Sample::Stream;
use strict;
sub new {
my $proto = shift;
my $class = ref $proto || $proto;
my %args = @_;
foreach (qw /sample_size/) {
$args {$_} = $args {"-$_"} unless defined $args {$_};
}
my $self = {};
$self -> {sample_size} = defined $args {sample_size} ? $args {sample_size}
: 1;
$self -> {seen} = 0;
$self -> {reservoir} = [(undef) x $self -> {sample_size}];
bless $self, $class;
}
sub data {
my $self = shift;
foreach my $sample (@_) {
if ($self -> {seen} < $self -> {sample_size}) {
# Initialize reservoir.
$self -> {reservoir} -> [$self -> {seen}] =
[$self -> {seen}, $sample];
}
else {
# Draw number.
my $U = int rand ($self -> {seen} + 1);
if ($U < $self -> {sample_size}) {
$self -> {reservoir} -> [$U] = [$self -> {seen}, $sample];
}
}
$self -> {seen} ++;
}
return;
}
sub extract {
my $self = shift;
my @result = map {$_ -> [1]}
sort {$a -> [0] <=> $b -> [0]} @{$self -> {reservoir}};
$self -> {seen} = 0;
$self -> {reservoir} = [(undef) x $self -> {sample_size}];
wantarray ? @result : $result [0];
}
__END__
=head1 NAME
Algorithm::Numerical::Sample - Draw samples from a set
=head1 SYNOPSIS
use Algorithm::Numerical::Sample qw /sample/;
@sample = sample (-set => [1 .. 10000],
-sample_size => 100);
$sampler = Algorithm::Numerical::Sample::Stream -> new;
while (<>) {$sampler -> data ($_)}
$random_line = $sampler -> extract;
=head1 DESCRIPTION
This package gives two methods to draw fair, random samples from a set.
There is a procedural interface for the case the entire set is known,
and an object oriented interface when the a set with unknown size has
( run in 2.061 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )