Bio-Community
view release on metacpan or search on metacpan
lib/Bio/Community/Tools/Sampler.pm view on Meta::CPAN
=head2 get_rand_member
Function: Get a random member from a community (sample with replacement). This
method requires the Math::GSL::Randist module.
Note: If you need to draw many members, using get_rand_community() is
much more efficient.
Usage : my $member = $sampler->get_rand_member();
Args : None
Returns : A Bio::Community::Member object
=cut
method get_rand_member () {
# Pick a random member based on the community's cdf
my $counts = $self->_get_rand_members(1);
# Get the rank of this member
my $rank;
for my $i ( 0 .. $#$counts ) {
if ($counts->[$i] > 0) {
$rank = $i+1;
last;
}
}
# Get and return the corresponding Member
return $self->community->get_member_by_rank($rank);
}
=head2 get_rand_community
Function: Create a community from random members of a community. This method
requires the Math::GSL::Randist module.
Usage : my $community = $sampler->get_rand_community(1000);
Args : Number of members (positive integer)
Returns : A Bio::Community object
=cut
method get_rand_community ( PositiveInt $total_count = 1 ) {
# Adding random members 1 by 1 in a communty is slow. Generate all the members
# first. Then add them all at once to a community.
my $counts = $self->_get_rand_members($total_count);
my $randcomm = Bio::Community->new();
my $comm = $self->community;
for my $rank (1 .. scalar @$counts) {
my $count = $counts->[$rank-1];
next if not $count;
my $member = $comm->get_member_by_rank($rank);
$randcomm->add_member( $member, $count );
}
return $randcomm;
}
####
# Implement sampling without replacement:
# #Use gsl_ran_choose from GSL: https://www.gnu.org/software/gsl/manual/html_node/Shuffling-and-Sampling.html
# Output == input if count required == count in reference community
# Throw if count required > count in reference community
# Throw if reference community has percentages
# Make an array [Member1, Member1, Member2, Member3, Member3, Member3]
# Take a random member
# Without replacement should be default. It is beneficial when:
# sampling close to max count in community
# Sampling with replacement is beneficial when:
# sampling from percentages (or with weights)
# sampling beyond observed count in community
# member count is so high that sampling without replacement would exhaust memory
# But which one is faster / more resource economic?
####
method _get_rand_members ( $total_count = 1 ) {
# 1/ Get member probabilities (i.e. relative abundances)
my @P = ();
my $comm = $self->community || $self->throw('No community was provided');
for my $rank (1 .. $comm->get_richness) {
my $relab = $comm->get_rel_ab( $comm->get_member_by_rank($rank) );
push @P, $relab;
}
# 2/ Draw random members
if (not eval { require Math::GSL::Randist }) {
$self->throw("Need module Math::GSL::Randist to draw random members from community\n$@");
}
# Could call $self->get_rand_member() many times instead, but would be very slow!
return Math::GSL::Randist::gsl_ran_multinomial($self->_prng, \@P, $total_count);
}
__PACKAGE__->meta->make_immutable;
1;
( run in 0.633 second using v1.01-cache-2.11-cpan-39bf76dae61 )