Bio-Community

 view release on metacpan or  search on metacpan

lib/Bio/Community/Tools/Sampler.pm  view on Meta::CPAN


=head2 get_rand_member

 Function: Get a random member from a community (sample with replacement). This
           method requires the Math::GSL::Randist module.
           Note: If you need to draw many members, using get_rand_community() is
           much more efficient.
 Usage   : my $member = $sampler->get_rand_member();
 Args    : None
 Returns : A Bio::Community::Member object

=cut

method get_rand_member () {
   # Pick a random member based on the community's cdf
   my $counts = $self->_get_rand_members(1);
   # Get the rank of this member
   my $rank;
   for my $i ( 0 .. $#$counts ) {
        if ($counts->[$i] > 0) {
           $rank = $i+1;
           last;
        }
   }
   # Get and return the corresponding Member
   return $self->community->get_member_by_rank($rank);
}


=head2 get_rand_community

 Function: Create a community from random members of a community. This method
           requires the Math::GSL::Randist module.
 Usage   : my $community = $sampler->get_rand_community(1000);
 Args    : Number of members (positive integer)
 Returns : A Bio::Community object

=cut

method get_rand_community ( PositiveInt $total_count = 1 ) {
   # Adding random members 1 by 1 in a communty is slow. Generate all the members
   # first. Then add them all at once to a community.
   my $counts = $self->_get_rand_members($total_count);
   my $randcomm = Bio::Community->new();
   my $comm = $self->community;
   for my $rank (1 .. scalar @$counts) {
      my $count  = $counts->[$rank-1];
      next if not $count;
      my $member = $comm->get_member_by_rank($rank);
      $randcomm->add_member( $member, $count );
   }
   return $randcomm;
}


####
# Implement sampling without replacement:
#    #Use gsl_ran_choose from GSL: https://www.gnu.org/software/gsl/manual/html_node/Shuffling-and-Sampling.html
#    Output == input if count required == count in reference community
#    Throw if count required > count in reference community
#    Throw if reference community has percentages
#    Make an array [Member1, Member1, Member2, Member3, Member3, Member3]
#    Take a random member

# Without replacement should be default. It is beneficial when:
#    sampling close to max count in community

# Sampling with replacement is beneficial when:
#    sampling from percentages (or with weights)
#    sampling beyond observed count in community
#    member count is so high that sampling without replacement would exhaust memory

# But which one is faster / more resource economic?
####


method _get_rand_members ( $total_count = 1 ) {
   # 1/ Get member probabilities (i.e. relative abundances)
   my @P = ();
   my $comm = $self->community || $self->throw('No community was provided');
   for my $rank (1 .. $comm->get_richness) {
      my $relab = $comm->get_rel_ab( $comm->get_member_by_rank($rank) );
      push @P, $relab;
   }

   # 2/ Draw random members
   if (not eval { require Math::GSL::Randist }) {
      $self->throw("Need module Math::GSL::Randist to draw random members from community\n$@");
   }
   # Could call $self->get_rand_member() many times instead, but would be very slow!
   return Math::GSL::Randist::gsl_ran_multinomial($self->_prng, \@P, $total_count);
}


__PACKAGE__->meta->make_immutable;

1;



( run in 0.633 second using v1.01-cache-2.11-cpan-39bf76dae61 )