Algorithm-RandomPointGenerator
view release on metacpan or search on metacpan
lib/Algorithm/RandomPointGenerator.pm view on Meta::CPAN
package Algorithm::RandomPointGenerator;
#---------------------------------------------------------------------------
# Copyright (c) 2014 Avinash Kak. All rights reserved. This program is free
# software. You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::RandomPointGenerator generates a set of random points in a 2D plane
# according to a user-specified probability distribution that is supplied as a
# 2D histogram.
# ---------------------------------------------------------------------------
use 5.10.0;
use strict;
use Carp;
use List::Util qw/reduce/;
use Math::Random;
use constant PI => 4 * atan2( 1, 1 );
use Math::Big qw/euler/;
use File::Basename;
use Graphics::GnuplotIF;
our $VERSION = '1.01';
# from perl docs:
my $_num_regex = '^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$';
# Useful for creating reproducible results:
random_seed_from_phrase('hellojello');
# Constructor:
sub new {
my ($class, %args) = @_;
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument " .
"--- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
bless {
_hist_file => $args{input_histogram_file} || croak("histogram file required"),
_bbox_file => $args{bounding_box_file} || croak("bounding box file required"),
_N => $args{number_of_points} || 2000,
_how_many_to_discard => $args{how_many_to_discard} || 500,
_debug => $args{debug} || 0,
_proposal_density_width => $args{proposal_density_width} || 0.1,
_y_axis_pos_direction => $args{y_axis_pos_direction} || "down",
_output_hist_bins_along_x => $args{output_hist_bins_along_x} || 40,
_command_line_mode => $args{command_line_mode} || 0,
_x_delta => undef,
_y_delta => undef,
_input_histogram => undef,
_output_histogram => undef,
_bounding_box => undef,
_generated_points => undef,
_normalized_input_hist => undef,
_sigmax_for_proposal_density => undef,
_sigmay_for_proposal_density => undef,
_bin_width_for_output_hist => undef,
_bin_height_for_output_hist => undef,
}, $class;
}
# The file that contains a 2D histogram of the desired probability distribution for
# the random points must either be in the CSV or the white-space-separated
# format. Since we are working with 2D densities, each line of this text file should
# show one row of the histogram. The subroutine creates an array of arrays from the
# information read from the file, which each inner row holding one row of the
# histogram.
sub read_histogram_file_for_desired_density {
my $self = shift;
my $filename = $self->{_hist_file};
open FILEIN, $filename
or die "unable to open file: $!";
my @hist;
while (<FILEIN>) {
next if /^#/;
next if /^\s*$/;
chomp;
my @line;
if ($filename =~ /.csv$/) {
@line = map {$_ =~ s/^\s*|\s*$//; $_} split /,/, $_;
} else {
@line = split;
}
push @hist, \@line;
}
close FILEIN;
if ($self->{_y_axis_pos_direction} eq "up") {
my @newhist;
foreach my $i (0..@hist-1) {
push @newhist, $hist[scalar(@hist)-$i-1];
}
$self->{_input_histogram} = \@newhist;
} elsif ($self->{_y_axis_pos_direction} eq "down") {
$self->{_input_histogram} = \@hist;
} else {
die "Something is wrong with your value for the construction option 'y_xis_pos_direction'";
}
}
# This subroutine is for reading from a text file the horizontal and the vertical
# limits of the bounding box in which the randomly generated points must reside. The
# file can either be in the CSV format or the white-space-separated format. Apart
lib/Algorithm/RandomPointGenerator.pm view on Meta::CPAN
my $a = $a1 * $a2;
my $u = random_uniform();
if ( $a >= 1 ) {
$sample = $newsample;
push @arr, $sample;
} elsif ($u < $a) {
$sample = $newsample;
push @arr, $sample;
} else {
push @arr, $sample;
}
}
print "\nTotal number of iterations run: $N_iterations\n\n" unless $self->{_command_line_mode};
$self->{_generated_points} = \@arr;
}
# From the standpoint of matching the input histogram, the quality of the random
# points generated by the Metropolis-Hastings algorithm is affected by what sort of a
# probability distribution is used for the proposal density function. This module
# uses a bivariate Gaussian density function for this purpose whose widths along x
# and y are controlled by constructor parameter "proposal_density_width" that
# defaults to 0.1. The function here sets the values for the sigmax and sigmay
# parameters of this bivariate density.
sub set_sigmas_for_proposal_density {
my $self = shift;
die "\nyou must call read_file_for_bounding_box() before you can call\n" .
"set_sigmas_for_proposal_density()$!\n" unless $self->{_bounding_box};
my $param = $self->{_proposal_density_width};
my $box = $self->{_bounding_box};
$self->{_sigmax_for_proposal_density} = ($box->[0][1] - $box->[0][0]) * $param;
$self->{_sigmay_for_proposal_density} = ($box->[1][1] - $box->[1][0]) * $param;
}
# This function is called by metropolis_hastings() for generating the next point in a
# random walk in the 2D plane:
sub get_sample_using_proposal {
my $self = shift;
my $x = shift;
my $mean = $x; # for proposal_prob($y|$x) = norm($x, $sigma ** 2)
my $sigmax = $self->{_sigmax_for_proposal_density};
my $sigmay = $self->{_sigmay_for_proposal_density};
my @SIGMA = ( [$sigmax**2, 0], [0, $sigmay**2] );
my $sample = Math::Random::random_multivariate_normal( 1, @$mean, @SIGMA );
my $gaussian_exponent = - 0.5 * ( (($sample->[0] - $mean->[0])**2 / $sigmax**2 ) +
(($sample->[1] - $mean->[1])**2 / $sigmay**2 ) ) ;
my $prob = ( 1.0 / (2 * PI * $sigmax * $sigmay ) ) * euler( $gaussian_exponent );
return ($sample, $prob);
}
# This function returns the desired density at the candidate point produced by the by
# proposal density function. The desired density is calculated by applying bilinear
# interpolation to the bin counts to the four nearest four points in the normalized
# version of the input histogram.
sub desired_density {
my $self = shift;
my $sample = shift;
my $histref = $self->{_normalized_input_hist};
my $bbsize = $self->{_bounding_box};
my $horiz_delta = $self->{_x_delta};
my $vert_delta = $self->{_y_delta};
print "horiz_delta: $horiz_delta vert_delta: $vert_delta\n" if $self->{_debug};
return 0 if $sample->[0] < $bbsize->[0][0] || $sample->[0] > $bbsize->[0][1] ||
$sample->[1] < $bbsize->[1][0] || $sample->[1] > $bbsize->[1][1];
print "horizontal extent: $bbsize->[0][0] $bbsize->[0][1]\n" if $self->{_debug};
print "vertical extent: $bbsize->[1][0] $bbsize->[1][1]\n" if $self->{_debug};
my $bin_horiz = int( ($sample->[0] - $bbsize->[0][0]) / $horiz_delta );
my $bin_vert = int( ($sample->[1] - $bbsize->[1][0]) / $vert_delta );
print "bin 2D index: horiz: $bin_horiz vert: $bin_vert for sample value @$sample\n"
if $self->{_debug};
my $prob00 = $histref->[$bin_vert][$bin_horiz] || 0;
return $prob00 if (($bin_horiz + 1) >= @{$histref->[0]}) || (($bin_vert + 1) >= @{$histref});
my $prob01 = $histref->[$bin_vert + 1][$bin_horiz] || 0;
my $prob10 = $histref->[$bin_vert][$bin_horiz+ 1] || 0;
my $prob11 = $histref->[$bin_vert + 1][$bin_horiz + 1] || 0;
print "The four probs: $prob00 $prob01 $prob10 $prob11\n" if $self->{_debug};
my $horiz_fractional = (($sample->[0] - $bbsize->[0][0]) / $horiz_delta) - $bin_horiz;
my $vert_fractional = (($sample->[1] - $bbsize->[1][0]) / $vert_delta) - $bin_vert;
print "horiz frac: $horiz_fractional vert frac: $vert_fractional\n" if $self->{_debug};
my $interpolated_prob = $prob00 * (1 - $horiz_fractional) * (1 - $vert_fractional) +
$prob10 * $horiz_fractional * (1 - $vert_fractional) +
$prob01 * (1 - $horiz_fractional) * $vert_fractional +
$prob11 * $horiz_fractional * $vert_fractional;
print "Interpolated prob: $interpolated_prob\n" if $self->{_debug};
return $interpolated_prob;
}
sub proposal_density {
my $self = shift;
my $sample = shift;
my $mean = shift;
my $sigmax = $self->{_sigmax_for_proposal_density};
my $sigmay = $self->{_sigmay_for_proposal_density};
my @SIGMA = ( [$sigmax, 0], [0, $sigmay] );
my $gaussian_exponent = - 0.5 * ( (($sample->[0] - $mean->[0])**2 / $sigmax**2 ) +
(($sample->[1] - $mean->[1])**2 / $sigmay**2 ) ) ;
my $prob = ( 1.0 / (2 * PI * $sigmax * $sigmay ) ) * euler( $gaussian_exponent );
return $prob;
}
sub normalize_input_histogram {
my $self = shift;
die "\nyou must first call read_histogram_file_for_desired_density() before you can call\n" .
"normalize_input_histogram()$!\n" unless $self->{_input_histogram};
my $histref = deep_copy_AoA( $self->{_input_histogram} );
my $summation = reduce {$a + $b} map {@$_} @$histref;
foreach my $i (0..@$histref-1) {
foreach my $j (0..@{$histref->[0]}-1) {
$histref->[$i][$j] /= $summation;
}
}
$self->{_normalized_input_hist} = $histref;
}
# Display a 2D histogram. Requires one argument which must be a reference to an
# array of arrays, which inner array holding one row of the histogram.
sub display_hist_in_terminal_window {
my $self = shift;
die "\nyou must first call read_histogram_file_for_desired_density() before you can call\n" .
"display_hist_in_terminal_window()$!\n" unless $self->{_input_histogram};
my $histref = $self->{_input_histogram};
foreach my $y (0..@$histref-1) {
foreach my $x (0..@{$histref->[0]}-1) {
if ($histref->[$y][$x] < 100) {
printf "%d ", $histref->[$y][$x];
} else {
printf "%.3e ", $histref->[$y][$x];
}
}
print "\n";
}
print "\n";
}
sub write_generated_points_to_a_file {
my $self = shift;
die "\nyou must first call metropolis_hastings() before you can call\n" .
"write_generated_points_to_a_file()$!\n" unless $self->{_generated_points};
my $master_file_basename = basename($self->{_hist_file}, ('.csv', '.dat', '.txt'));
my $out_file = "random_points_generated_for_$master_file_basename.csv";
my @samples = @{$self->{_generated_points}};
my $N_discard = $self->{_how_many_to_discard};
my @truncated_sample_list = @samples[$N_discard..$#samples-1];
fisher_yates_shuffle(\@truncated_sample_list);
lib/Algorithm/RandomPointGenerator.pm view on Meta::CPAN
print OUTFILE "@out_sample\n";
} else {
print OUTFILE "\n";
}
$oldfirst = $first;
}
print OUTFILE "\n";
close OUTFILE;
my $argstring = <<"END";
set hidden3d
splot "$temp_file" with lines
END
my $plot;
if (!defined $pause_time) {
$plot = Graphics::GnuplotIF->new( persist => 1 );
} else {
$plot = Graphics::GnuplotIF->new();
}
# my $plot = Graphics::GnuplotIF->new(persist => 1);
$plot->gnuplot_cmd( $argstring );
$plot->gnuplot_pause( $pause_time ) if defined $pause_time;
}
sub DESTROY {
my $self = shift;
my $master_file_basename = basename($self->{_hist_file}, ('.csv', '.dat', '.txt'));
unlink glob "__temp_$master_file_basename*";
}
sub deep_copy_AoA {
my $ref_in = shift;
my $ref_out;
foreach my $i (0..@{$ref_in}-1) {
foreach my $j (0..@{$ref_in->[$i]}-1) {
$ref_out->[$i]->[$j] = $ref_in->[$i]->[$j];
}
}
return $ref_out;
}
# from perl docs:
sub fisher_yates_shuffle {
my $arr = shift;
my $i = @$arr;
while (--$i) {
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / input_histogram_file
bounding_box_file
number_of_points
how_many_to_discard
proposal_density_width
y_axis_pos_direction
output_hist_bins_along_x
command_line_mode
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
1;
=pod
=head1 NAME
Algorithm::RandomPointGenerator -- This module generates a set of random points in a
2D plane according to a user-specified probability distribution that is provided to
the module in the form of a 2D histogram.
=head1 SYNOPSIS
# The quickest way to use the module is through the script genRand2D that you'll
# find in the examples subdirectory. You can move this script to any convenient
# location in your directory structure. Call this script as a command-line utility
# in the following manner:
genRand2D --histfile your_histogram_file.csv --bbfile your_bounding_box_file.csv
# where the '--histfile' option supplies the name of the file that contains a 2D
# histogram and the option '--bbfile' the name of the file that defines a bounding
# box in the XY-plane to which the histogram applies. The module uses the
# Metropolis-Hastings algorithm to draw random points from a probability density
# function that is approximated by the 2D histogram you supply through the
# '--histfile' option. You can also run the command
genRand2D --help
# for further information regarding these two command-line options. An invocation
# of genRand2D gives you 2000 random points that are deposited in a file whose
# name is printed out in the terminal window in which you invoke the genRand2D
# command.
# The rest of this Synopsis concerns using the module with greater control over
# the production and display of random points. Obviously, the very first thing
# you would need to do would be to import the module:
use Algorithm::RandomPointGenerator;
# Next, name the file that contains a 2D histogram for the desired density
# function for the generation of random points:
my $input_histogram_file = "histogram.csv";
lib/Algorithm/RandomPointGenerator.pm view on Meta::CPAN
This is also a required call. This call reads in the left and the right limits on
the x-axis and the lower and the upper limits on the y-axis for the portion of the
XY-plane for which the random points are needed.
=item B<normalize_input_histogram():>
$generator->normalize_input_histogram();
This normalizes the input histogram to turn it into an approximation to the desired
probability density function for the random points.
=item B<set_sigmas_for_proposal_density():>
$generator->set_sigmas_for_proposal_density();
The Metropolis-Hastings algorithm is a random-walk algorithm that at each point first
creates a candidate for the next point and then makes a probabilistic decision
regarding the acceptance of the candidate point as the next point on the walk. The
candidate point is drawn from what is known as the proposal density function. This
module uses a bivariate Gaussian for the proposal density. You set the width of the
proposal density by calling this method.
=item B<metropolis_hastings():>
This is the workhorse of the module, as you would expect. As its name implies, it
uses the Metropolis-Hastings random-walk algorithm to generate a set of random points
whose probability distribution corresponds to the input histogram.
=item B<write_generated_points_to_a_file():>
$generator->write_generated_points_to_a_file();
This method writes the generated points to a disk file whose name is keyed to the
name of the input histogram file. The two coordinates for each generated random point
are written out to a new line in this file.
=item B<make_output_histogram_for_generated_points():>
$generator->make_output_histogram_for_generated_points();
The name of the method speaks for itself.
=item B<plot_histogram_3d_surface():>
$generator->plot_histogram_3d_surface();
This method uses a Perl wrapper for Gnuplot, as provided by the module
Graphics::GnuplotIF, for creating a 3D surface plot of the histogram of the random
points generated by the module.
=item B<plot_histogram_lineplot():>
$generator->plot_histogram_lineplot();
This creates a 3D line plot display of the histogram of the generated random points.
=item B<display_output_histogram_in_terminal_window():>
$generator->display_output_histogram_in_terminal_window();
Useful for debugging purposes, it displays in the terminal window a two dimensional
array of numbers that is the histogram of the random points generated by the module.
=back
=head1 THE C<examples> DIRECTORY
Probably the most useful item in the C<examples> directory is the command-line script
C<genRand2D> that can be called simply with two arguments for generating a set of
random points. A call to this script looks like
genRand2D --histfile your_histogram_file.csv --bbfile your_bounding_box_file.csv
where the C<--histfile> option supplies the name of the file that contains a 2D input
histogram and the C<--bbfile> option the name of the file that defines the bounding
box in the XY-plane. You can also execute the command line
genRand2D --help
for displaying information as to what is required by the two options for the
C<genRand2D> command. The command-line invocation of C<genRand2D> gives you 2000
random points that are deposited in a file whose name is printed out in the terminal
window in which you execute the command.
To become more familiar with all of the different options you can use with this
module, you should experiment with the script:
generate_random_points.pl
You can feed it different 2D histograms --- even made-up 2D histograms --- and look
at the histogram of the generated random points to see how well the module is
working. Keep in mind, though, if your made-up input histogram has disconnected
blobs in it, the random-points that are generated may correspond to just one of the
blobs. Since the process of random-point generation involves a random walk, the
algorithm may not be able to hop from one blob to another in the input histogram if
they are too far apart. As to what exactly you'll get by way of the output histogram
would depend on your choice of the width of the proposal density.
The C<examples> directory contains the following histogram and bounding-box files
for you to get started:
hist1.csv bb1.csv
hist2.csv bb2.csv
If you run the C<generate_random_points.pl> script with the C<hist1.csv> and
C<bb1.csv> files, the histogram you get for the 2000 random points generated by the
module will look like what you see in the file C<output_histogram_for_hist1.png>. On
a Linux machine, you can see this histogram with the usual C<display> command from
the ImageMagick library. And if you run C<generate_random_points.pl> script with the
C<hist2.csv> and C<bb2.csv> files, you'll see an output histogram that should look
like what you see in C<output_histogram_for_hist2.png>.
You should also try invoking the command-line calls:
genRand2D --histfile hist1.csv --bbfile bb1.csv
genRand2D --histfile hist2.csv --bbfile bb2.csv
=head1 REQUIRED
( run in 1.185 second using v1.01-cache-2.11-cpan-0bd6704ced7 )