Algorithm-RandomPointGenerator
view release on metacpan or search on metacpan
lib/Algorithm/RandomPointGenerator.pm view on Meta::CPAN
}
$self->{_input_histogram} = \@newhist;
} elsif ($self->{_y_axis_pos_direction} eq "down") {
$self->{_input_histogram} = \@hist;
} else {
die "Something is wrong with your value for the construction option 'y_xis_pos_direction'";
}
}
# This subroutine is for reading from a text file the horizontal and the vertical
# limits of the bounding box in which the randomly generated points must reside. The
# file can either be in the CSV format or the white-space-separated format. Apart
# from any comment lines that begin with the hash mark, this must must contain
# exactly two lines, the first indicated the x-axis points that define the horizontal
# span of the bounding box and the second indicating the y-axis points that define
# the vertical span of the same.
sub read_file_for_bounding_box {
my $self = shift;
my $histref = $self->{_input_histogram};
my $filename = $self->{_bbox_file};
open FILEIN, $filename
or die "unable to open file: $!";
my @bounding_box;
while (<FILEIN>) {
next if /^#/;
next if /^\s*$/;
chomp;
my @line;
if ($filename =~ /.csv$/) {
@line = map {$_ =~ s/^\s*|\s*$//; $_} split /,/, $_;
} else {
@line = split;
}
die "There must exist exactly two entries in each line of the bounding box file"
if @line != 2;
push @bounding_box, \@line;
}
die "There must exist only two lines in the bounding box file" if @bounding_box != 2;
close FILEIN;
$self->{_x_delta} = ($bounding_box[0][1] - $bounding_box[0][0]) / @{$histref->[0]};
$self->{_y_delta} = ($bounding_box[1][1] - $bounding_box[1][0]) / @$histref;
$self->{_bounding_box} = \@bounding_box;
}
# This is the heart of the module --- in the sense that this method implements the
# Metropolis-Hastings algorithm for generating the random points. This algorithm is
# the most popular algorithm today for what is known as the MCMC (Markov Chain Monte
# Carlo) sampling from a desired probability distribution.
sub metropolis_hastings {
$|++;
my $self = shift;
die "\nyou must first call read_file_for_bounding_box() before you can call\n" .
"metropolis_hastings()$!\n" unless $self->{_bounding_box};
die "\nyou must first call normalize_input_histogram() before you can call\n" .
"metropolis_hastings()$!\n" unless $self->{_normalized_input_hist};
die "\nyou must first call set_sigmas_for_proposal_density() before you can call\n" .
"metropolis_hastings()$!\n" unless $self->{_sigmax_for_proposal_density};
my $box = $self->{_bounding_box};
my $N_discard = $self->{_how_many_to_discard};
my $N_iterations = $self->{_N} + $N_discard;
my $sample = [$box->[0][0] + ($box->[0][1] - $box->[0][0]) / 2.0,
$box->[1][0] + ($box->[1][1] - $box->[1][0]) / 2.0];
while ($self->desired_density($sample) == 0) {
$sample = [$self->{_bounding_box}->[0][0] +
random_uniform() * ($self->{_bounding_box}->[0][1] - $self->{_bounding_box}->[0][0]),
$self->{_bounding_box}->[1][0] +
random_uniform() * ($self->{_bounding_box}->[1][1] - $self->{_bounding_box}->[1][0])]
}
print "\nstarting sample: @$sample\n" unless $self->{_command_line_mode};
if ($self->{_command_line_mode}) {
print "\nThe Metropolis-Hastings algorithm will be run over 2500 iterations.\n" .
"Of the 2500 points generated, the first 500 will be discarded.\n" .
"Each dot shown below stands for 50 iterations of the algorithm.\n\n";
} else {
print "\nThe Metropolis-Hastings algorithm will be run for $N_discard more iterations\n" .
"than the number of points you requested. The first $N_discard initial points thus\n" .
"generated will be discarded from the final output.\n\n";
}
my @arr;
foreach my $i (0..$N_iterations-1) {
unless ($self->{_command_line_mode}) {
print "\nIteration number: $i (out of $N_iterations)\n" if $i % ($N_iterations / 10) == 0;
}
print ". " if $i % 50 == 0;
# Get proposal probability q( $y | $x ).
my ($newsample, $prob) = $self->get_sample_using_proposal( $sample );
my $a1 = $self->desired_density( $newsample ) / $self->desired_density( $sample );
my $a2 = $self->proposal_density( $sample, $newsample ) / $prob;
my $a = $a1 * $a2;
my $u = random_uniform();
if ( $a >= 1 ) {
$sample = $newsample;
push @arr, $sample;
} elsif ($u < $a) {
$sample = $newsample;
push @arr, $sample;
} else {
push @arr, $sample;
}
}
print "\nTotal number of iterations run: $N_iterations\n\n" unless $self->{_command_line_mode};
$self->{_generated_points} = \@arr;
}
# From the standpoint of matching the input histogram, the quality of the random
# points generated by the Metropolis-Hastings algorithm is affected by what sort of a
# probability distribution is used for the proposal density function. This module
# uses a bivariate Gaussian density function for this purpose whose widths along x
# and y are controlled by constructor parameter "proposal_density_width" that
# defaults to 0.1. The function here sets the values for the sigmax and sigmay
# parameters of this bivariate density.
sub set_sigmas_for_proposal_density {
my $self = shift;
die "\nyou must call read_file_for_bounding_box() before you can call\n" .
"set_sigmas_for_proposal_density()$!\n" unless $self->{_bounding_box};
my $param = $self->{_proposal_density_width};
my $box = $self->{_bounding_box};
$self->{_sigmax_for_proposal_density} = ($box->[0][1] - $box->[0][0]) * $param;
$self->{_sigmay_for_proposal_density} = ($box->[1][1] - $box->[1][0]) * $param;
}
# This function is called by metropolis_hastings() for generating the next point in a
# random walk in the 2D plane:
sub get_sample_using_proposal {
my $self = shift;
my $x = shift;
my $mean = $x; # for proposal_prob($y|$x) = norm($x, $sigma ** 2)
my $sigmax = $self->{_sigmax_for_proposal_density};
my $sigmay = $self->{_sigmay_for_proposal_density};
my @SIGMA = ( [$sigmax**2, 0], [0, $sigmay**2] );
my $sample = Math::Random::random_multivariate_normal( 1, @$mean, @SIGMA );
my $gaussian_exponent = - 0.5 * ( (($sample->[0] - $mean->[0])**2 / $sigmax**2 ) +
(($sample->[1] - $mean->[1])**2 / $sigmay**2 ) ) ;
my $prob = ( 1.0 / (2 * PI * $sigmax * $sigmay ) ) * euler( $gaussian_exponent );
return ($sample, $prob);
}
# This function returns the desired density at the candidate point produced by the by
# proposal density function. The desired density is calculated by applying bilinear
# interpolation to the bin counts to the four nearest four points in the normalized
# version of the input histogram.
sub desired_density {
my $self = shift;
my $sample = shift;
my $histref = $self->{_normalized_input_hist};
my $bbsize = $self->{_bounding_box};
my $horiz_delta = $self->{_x_delta};
my $vert_delta = $self->{_y_delta};
print "horiz_delta: $horiz_delta vert_delta: $vert_delta\n" if $self->{_debug};
return 0 if $sample->[0] < $bbsize->[0][0] || $sample->[0] > $bbsize->[0][1] ||
$sample->[1] < $bbsize->[1][0] || $sample->[1] > $bbsize->[1][1];
print "horizontal extent: $bbsize->[0][0] $bbsize->[0][1]\n" if $self->{_debug};
print "vertical extent: $bbsize->[1][0] $bbsize->[1][1]\n" if $self->{_debug};
my $bin_horiz = int( ($sample->[0] - $bbsize->[0][0]) / $horiz_delta );
my $bin_vert = int( ($sample->[1] - $bbsize->[1][0]) / $vert_delta );
print "bin 2D index: horiz: $bin_horiz vert: $bin_vert for sample value @$sample\n"
if $self->{_debug};
my $prob00 = $histref->[$bin_vert][$bin_horiz] || 0;
return $prob00 if (($bin_horiz + 1) >= @{$histref->[0]}) || (($bin_vert + 1) >= @{$histref});
my $prob01 = $histref->[$bin_vert + 1][$bin_horiz] || 0;
my $prob10 = $histref->[$bin_vert][$bin_horiz+ 1] || 0;
( run in 4.084 seconds using v1.01-cache-2.11-cpan-96521ef73a4 )