Algorithm-Numerical-Sample

 view release on metacpan or  search on metacpan

lib/Algorithm/Numerical/Sample.pm  view on Meta::CPAN

    foreach (@PARAMS) {
        $args {$_} = $args {"-$_"} unless defined $args {$_};
    }

    # Check for set parameter.
    die "sample requires the set parameter" unless $args {set};

    my $set = $args {set};

    # Set sample and set size.
    my $sample_size = defined $args {sample_size} ? $args {sample_size} : 1;
    my $set_size    = @$set;

    # Reservoir will be our sample.
    my @reservoir      = (undef) x $sample_size;

    # Initialize counters.
    my $sample_counter = 0;
    my $set_counter    = 0;

    # Loop as long as the reservoir isn't filled.
    while ($sample_counter < $sample_size) {
        # Draw a random number.
        my $U = rand ($set_size - $set_counter);
        if ($U < $sample_size - $sample_counter) {
            # Select the next element with probability
            #    $sample_size - $sample_counter
            #    ------------------------------
            #    $set_size    - $set_counter
            $reservoir [$sample_counter ++] = $set -> [$set_counter];
        }
        $set_counter ++;
    }

    wantarray ? @reservoir : \@reservoir;
}



package Algorithm::Numerical::Sample::Stream;

use strict;


sub new {
    my $proto = shift;
    my $class = ref $proto || $proto;
    my %args  = @_;

    foreach (qw /sample_size/) {
        $args {$_} = $args {"-$_"} unless defined $args {$_};
    }

    my $self  = {};

    $self -> {sample_size} = defined $args {sample_size} ? $args {sample_size}
                                                         : 1;
    $self -> {seen}      = 0;
    $self -> {reservoir} = [(undef) x $self -> {sample_size}];

    bless $self, $class;
}

sub data {
    my $self   = shift;

    foreach my $sample (@_) {
        if ($self -> {seen} < $self -> {sample_size}) {
            # Initialize reservoir.
            $self -> {reservoir} -> [$self -> {seen}] =
                                    [$self -> {seen}, $sample];
        }
        else {
            # Draw number.
            my $U = int rand ($self -> {seen} + 1);
            if ($U < $self -> {sample_size}) {
                $self -> {reservoir} -> [$U] = [$self -> {seen}, $sample];
            }
        }

        $self -> {seen} ++;
    }

    return;
}

sub extract {
    my $self = shift;

    my @result = map {$_ -> [1]}
                 sort {$a -> [0] <=> $b -> [0]} @{$self -> {reservoir}};

    $self -> {seen}      = 0;
    $self -> {reservoir} = [(undef) x $self -> {sample_size}];

    wantarray ? @result : $result [0];
}


__END__

=head1 NAME

Algorithm::Numerical::Sample - Draw samples from a set

=head1 SYNOPSIS

    use Algorithm::Numerical::Sample  qw /sample/;

    @sample = sample (-set         => [1 .. 10000],
                      -sample_size => 100);

    $sampler = Algorithm::Numerical::Sample::Stream -> new;
    while (<>) {$sampler -> data ($_)}
    $random_line = $sampler -> extract;

=head1 DESCRIPTION

This package gives two methods to draw fair, random samples from a set.
There is a procedural interface for the case the entire set is known,
and an object oriented interface when the a set with unknown size has



( run in 2.061 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )