Algorithm-ExpectationMaximization
view release on metacpan or search on metacpan
examples/cleanup_directory.pl view on Meta::CPAN
# There should be no need to call this script ordinarily.
# When the Algorithm::KMeans module creates new cluster files,
# it automatically delete all previously created such files.
# Such files are named ClusterX.dat for X starting with X = 0.
# The files __temp_* are created by the visualization script.
# However, when the program terminates properly, it should
# automatically delete those files.
unlink glob "Cluster*.dat";
unlink glob "__temp_*";
unlink glob "__cluster_*.dat";
unlink glob "__temp2_*";
unlink glob "__contour_*";
unlink glob "__contour2_*";
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
die "\nYour mask size (including `N' and 1's and 0's) does not match\n" .
"the size of at least one of the data records in the file.\n"
unless scalar(@mask) == scalar(@splits);
my $record_name = shift @splits;
$data_hash{$record_name} = \@splits;
push @data_tags, $record_name;
}
$self->{_data} = \%data_hash;
$self->{_data_id_tags} = \@data_tags;
$self->{_N} = scalar @data_tags;
# Need to make the following call to set the global mean and covariance:
# my $covariance = $self->estimate_mean_and_covariance(\@data_tags);
# Need to make the following call to set the global eigenvec eigenval sets:
# $self->eigen_analysis_of_covariance($covariance);
if ( defined($self->{_K}) && ($self->{_K} > 0) ) {
carp "\n\nWARNING: YOUR K VALUE IS TOO LARGE.\n The number of data " .
"points must satisfy the relation N > 2xK**2 where K is " .
"the number of clusters requested for the clusters to be " .
"meaningful $!"
if ( $self->{_N} < (2 * $self->{_K} ** 2) );
print "\n\n\n";
}
}
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
sub clustering_quality_fisher {
my $self = shift;
my @cluster_quality_indices;
my $fisher_trace = 0;
my $S_w =
Math::GSL::Matrix->new($self->{_data_dimensions}, $self->{_data_dimensions});
$S_w->zero;
my $S_b =
Math::GSL::Matrix->new($self->{_data_dimensions}, $self->{_data_dimensions});
$S_b->zero;
my $global_mean = Math::GSL::Matrix->new($self->{_data_dimensions},1);
$global_mean->zero;
foreach my $cluster_index(0..$self->{_K}-1) {
$global_mean = $self->{_class_priors}->[$cluster_index] *
$self->{_cluster_means}->[$cluster_index];
}
foreach my $cluster_index(0..$self->{_K}-1) {
$S_w += $self->{_cluster_covariances}->[$cluster_index] *
$self->{_class_priors}->[$cluster_index];
my $class_mean_minus_global_mean = $self->{_cluster_means}->[$cluster_index]
- $global_mean;
my $outer_product = outer_product( $class_mean_minus_global_mean,
$class_mean_minus_global_mean );
$S_b += $self->{_class_priors}->[$cluster_index] * $outer_product;
}
my $fisher = matrix_multiply($S_w->inverse, $S_b);
return $fisher unless defined blessed($fisher);
return matrix_trace($fisher);
}
sub display_seeding_stats {
my $self = shift;
foreach my $cluster_index(0..$self->{_K}-1) {
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
my $squared_sum = 0;
foreach my $i (0..$how_many-1) {
$squared_sum += ($ele1[$i] - $ele2[$i])**2;
}
return sqrt $squared_sum;
}
sub write_naive_bayes_clusters_to_files {
my $self = shift;
my @clusters = @{$self->{_clusters}};
unlink glob "naive_bayes_cluster*.txt";
foreach my $i (1..@clusters) {
my $filename = "naive_bayes_cluster" . $i . ".txt";
print "Writing cluster $i to file $filename\n"
if $self->{_terminal_output};
open FILEHANDLE, "| sort > $filename" or die "Unable to open file: $!";
foreach my $ele (@{$clusters[$i-1]}) {
print FILEHANDLE "$ele\n";
}
close FILEHANDLE;
}
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
foreach my $cluster_index (0..$self->{_K}-1) {
push @class_distributions, [];
}
foreach my $data_tag (@{$self->{_data_id_tags}}) {
foreach my $cluster_index (0..$self->{_K}-1) {
push @{$class_distributions[$cluster_index]}, $data_tag
if $self->{_expected_class_probs}->{$data_tag}->[$cluster_index]
> $theta;
}
}
unlink glob "posterior_prob_cluster*.txt";
foreach my $i (1..@class_distributions) {
my $filename = "posterior_prob_cluster" . $i . ".txt";
print "Writing posterior prob cluster $i to file $filename\n"
if $self->{_terminal_output};
open FILEHANDLE, "| sort > $filename" or die "Unable to open file: $!";
foreach my $ele (@{$class_distributions[$i-1]}) {
print FILEHANDLE "$ele\n";
}
close FILEHANDLE;
}
}
sub DESTROY {
unlink "__temp_" . basename($_[0]->{_datafile});
unlink "__temp_data_" . basename($_[0]->{_datafile});
unlink "__temp2_" . basename($_[0]->{_datafile});
unlink glob "__temp1dhist*";
unlink glob "__contour*";
}
############################# Visualization Code ###############################
# The visualize_clusters() implementation displays as a plot in your terminal window
# the clusters constructed by the EM algorithm. It can show either 2D plots or
# 3D plots that you can rotate interactively for better visualization. For
# multidimensional data, as to which 2D or 3D dimensions are used for visualization
# is controlled by the mask you must supply as an argument to the method. Should it
# happen that only one on bit is specified for the mask, visualize_clusters()
( run in 1.409 second using v1.01-cache-2.11-cpan-49f99fa48dc )