Algorithm-RandomPointGenerator

 view release on metacpan or  search on metacpan

lib/Algorithm/RandomPointGenerator.pm  view on Meta::CPAN

        }
        $self->{_input_histogram} = \@newhist; 
    } elsif ($self->{_y_axis_pos_direction} eq "down") {
        $self->{_input_histogram} = \@hist; 
    } else {
        die "Something is wrong with your value for the construction option 'y_xis_pos_direction'";
    }
}


# This subroutine is for reading from a text file the horizontal and the vertical
# limits of the bounding box in which the randomly generated points must reside.  The
# file can either be in the CSV format or the white-space-separated format.  Apart
# from any comment lines that begin with the hash mark, this must must contain
# exactly two lines, the first indicated the x-axis points that define the horizontal
# span of the bounding box and the second indicating the y-axis points that define
# the vertical span of the same.
sub read_file_for_bounding_box {
    my $self = shift;
    my $histref = $self->{_input_histogram};
    my $filename = $self->{_bbox_file};
    open FILEIN, $filename
        or die "unable to open file: $!";
    my @bounding_box;
    while (<FILEIN>) {
        next if /^#/;
        next if /^\s*$/;
        chomp;
        my @line;
        if ($filename =~ /.csv$/) {
            @line =  map {$_ =~ s/^\s*|\s*$//; $_} split /,/, $_;
        } else {
            @line = split;
        }
        die "There must exist exactly two entries in each line of the bounding box file"
            if @line != 2;
        push @bounding_box, \@line;
    }
    die "There must exist only two lines in the bounding box file" if @bounding_box != 2;
    close FILEIN;
    $self->{_x_delta} = ($bounding_box[0][1] - $bounding_box[0][0]) / @{$histref->[0]};
    $self->{_y_delta} = ($bounding_box[1][1] - $bounding_box[1][0]) / @$histref;
    $self->{_bounding_box} = \@bounding_box;
}
    
# This is the heart of the module --- in the sense that this method implements the
# Metropolis-Hastings algorithm for generating the random points.  This algorithm is
# the most popular algorithm today for what is known as the MCMC (Markov Chain Monte
# Carlo) sampling from a desired probability distribution.
sub metropolis_hastings {    
    $|++;
    my $self = shift;
    die "\nyou must first call read_file_for_bounding_box() before you can call\n" .   
        "metropolis_hastings()$!\n" unless $self->{_bounding_box};
    die "\nyou must first call normalize_input_histogram() before you can call\n" .
        "metropolis_hastings()$!\n" unless $self->{_normalized_input_hist};
    die "\nyou must first call set_sigmas_for_proposal_density() before you can call\n" .
        "metropolis_hastings()$!\n" unless $self->{_sigmax_for_proposal_density};
    my $box = $self->{_bounding_box};
    my $N_discard = $self->{_how_many_to_discard};
    my $N_iterations = $self->{_N} + $N_discard;
    my $sample = [$box->[0][0] + ($box->[0][1] - $box->[0][0]) / 2.0,
                  $box->[1][0] + ($box->[1][1] - $box->[1][0]) / 2.0];
    while ($self->desired_density($sample) == 0) {
        $sample = [$self->{_bounding_box}->[0][0] +
             random_uniform() * ($self->{_bounding_box}->[0][1] - $self->{_bounding_box}->[0][0]),
                   $self->{_bounding_box}->[1][0] +
             random_uniform() * ($self->{_bounding_box}->[1][1] - $self->{_bounding_box}->[1][0])]
    }
    print "\nstarting sample: @$sample\n" unless $self->{_command_line_mode};
    if ($self->{_command_line_mode}) {
        print "\nThe Metropolis-Hastings algorithm will be run over 2500 iterations.\n" .
              "Of the 2500 points generated, the first 500 will be discarded.\n" .
              "Each dot shown below stands for 50 iterations of the algorithm.\n\n";
    } else {
        print "\nThe Metropolis-Hastings algorithm will be run for $N_discard more iterations\n" .
              "than the number of points you requested.  The first $N_discard initial points thus\n" .
              "generated will be discarded from the final output.\n\n";
    }
    my @arr;
    foreach my $i (0..$N_iterations-1) {
        unless ($self->{_command_line_mode}) {
            print "\nIteration number: $i  (out of $N_iterations)\n" if $i % ($N_iterations / 10) == 0;
        }
        print ". " if $i % 50 == 0;
        # Get proposal probability q( $y | $x ).
        my ($newsample, $prob) = $self->get_sample_using_proposal( $sample ); 
        my $a1 = $self->desired_density( $newsample ) / $self->desired_density( $sample );
        my $a2 = $self->proposal_density( $sample, $newsample ) / $prob;
        my $a = $a1 * $a2;
        my $u = random_uniform();
        if ( $a >= 1 ) {
            $sample = $newsample;
            push @arr, $sample;
        } elsif ($u < $a) {
            $sample = $newsample;
            push @arr, $sample;
        } else {
            push @arr, $sample;
        }
    }
    print "\nTotal number of iterations run: $N_iterations\n\n" unless $self->{_command_line_mode};
    $self->{_generated_points} = \@arr;
}

# From the standpoint of matching the input histogram, the quality of the random
# points generated by the Metropolis-Hastings algorithm is affected by what sort of a
# probability distribution is used for the proposal density function.  This module
# uses a bivariate Gaussian density function for this purpose whose widths along x
# and y are controlled by constructor parameter "proposal_density_width" that
# defaults to 0.1.  The function here sets the values for the sigmax and sigmay
# parameters of this bivariate density.
sub set_sigmas_for_proposal_density {
    my $self = shift;
    die "\nyou must call read_file_for_bounding_box() before you can call\n" .   
        "set_sigmas_for_proposal_density()$!\n" unless $self->{_bounding_box};
    my $param = $self->{_proposal_density_width};
    my $box = $self->{_bounding_box};
    $self->{_sigmax_for_proposal_density} = ($box->[0][1] - $box->[0][0]) * $param;
    $self->{_sigmay_for_proposal_density} = ($box->[1][1] - $box->[1][0]) * $param;
}

# This function is called by metropolis_hastings() for generating the next point in a
# random walk in the 2D plane:
sub get_sample_using_proposal {
    my $self = shift;
    my $x = shift;
    my $mean = $x;      # for proposal_prob($y|$x)  =  norm($x, $sigma ** 2) 
    my $sigmax = $self->{_sigmax_for_proposal_density};
    my $sigmay = $self->{_sigmay_for_proposal_density};
    my @SIGMA = ( [$sigmax**2, 0], [0, $sigmay**2] );
    my $sample = Math::Random::random_multivariate_normal( 1, @$mean, @SIGMA );
    my $gaussian_exponent = - 0.5 * ( (($sample->[0] - $mean->[0])**2 / $sigmax**2 ) + 
                                (($sample->[1] - $mean->[1])**2 / $sigmay**2 ) ) ;
    my $prob = ( 1.0 / (2 * PI * $sigmax * $sigmay ) ) * euler( $gaussian_exponent );
    return ($sample, $prob);
}            

# This function returns the desired density at the candidate point produced by the by
# proposal density function.  The desired density is calculated by applying bilinear
# interpolation to the bin counts to the four nearest four points in the normalized
# version of the input histogram.
sub desired_density {
    my $self = shift;
    my $sample = shift;
    my $histref = $self->{_normalized_input_hist};
    my $bbsize = $self->{_bounding_box};
    my $horiz_delta = $self->{_x_delta};
    my $vert_delta = $self->{_y_delta};
    print "horiz_delta: $horiz_delta    vert_delta: $vert_delta\n" if $self->{_debug};
    return 0 if $sample->[0] < $bbsize->[0][0] || $sample->[0] > $bbsize->[0][1] ||
                $sample->[1] < $bbsize->[1][0] || $sample->[1] > $bbsize->[1][1];
    print "horizontal extent: $bbsize->[0][0]  $bbsize->[0][1]\n" if $self->{_debug};
    print "vertical extent: $bbsize->[1][0]  $bbsize->[1][1]\n" if $self->{_debug};
    my $bin_horiz = int( ($sample->[0] - $bbsize->[0][0]) / $horiz_delta );
    my $bin_vert = int( ($sample->[1] - $bbsize->[1][0]) / $vert_delta );
    print "bin 2D index: horiz: $bin_horiz   vert: $bin_vert  for sample value @$sample\n"
        if $self->{_debug};
    my $prob00 = $histref->[$bin_vert][$bin_horiz] || 0;
    return $prob00 if (($bin_horiz + 1) >= @{$histref->[0]}) || (($bin_vert + 1) >= @{$histref});
    my $prob01 = $histref->[$bin_vert + 1][$bin_horiz] || 0;
    my $prob10 = $histref->[$bin_vert][$bin_horiz+ 1] || 0;



( run in 4.084 seconds using v1.01-cache-2.11-cpan-96521ef73a4 )