Algorithm-ExpectationMaximization

 view release on metacpan or  search on metacpan

examples/cleanup_directory.pl  view on Meta::CPAN

#  There should be no need to call this script ordinarily.

#  When the Algorithm::KMeans module creates new cluster files,
#  it automatically delete all previously created such files.
#  Such files are named ClusterX.dat for X starting with X = 0.
#  The files __temp_* are created by the visualization script.
#  However, when the program terminates properly, it should 
#  automatically delete those files.


unlink glob "Cluster*.dat";

unlink glob "__temp_*";

unlink glob "__cluster_*.dat";

unlink glob "__temp2_*";

unlink glob "__contour_*";

unlink glob "__contour2_*";

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

        die "\nYour mask size (including `N' and 1's and 0's) does not match\n" .
            "the size of at least one of the data records in the file.\n"
            unless scalar(@mask) == scalar(@splits);
        my $record_name = shift @splits;
        $data_hash{$record_name} = \@splits;
        push @data_tags, $record_name;
    }
    $self->{_data} = \%data_hash;
    $self->{_data_id_tags} = \@data_tags;
    $self->{_N} = scalar @data_tags;
    # Need to make the following call to set the global mean and covariance:
    # my $covariance =  $self->estimate_mean_and_covariance(\@data_tags);
    # Need to make the following call to set the global eigenvec eigenval sets:
    # $self->eigen_analysis_of_covariance($covariance);
    if ( defined($self->{_K}) && ($self->{_K} > 0) ) {
        carp "\n\nWARNING: YOUR K VALUE IS TOO LARGE.\n The number of data " .
             "points must satisfy the relation N > 2xK**2 where K is " .
             "the number of clusters requested for the clusters to be " .
             "meaningful $!" 
                         if ( $self->{_N} < (2 * $self->{_K} ** 2) );
        print "\n\n\n";
    }
}

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

sub clustering_quality_fisher {
    my $self = shift;
    my @cluster_quality_indices;
    my $fisher_trace = 0;
    my $S_w = 
        Math::GSL::Matrix->new($self->{_data_dimensions}, $self->{_data_dimensions});
    $S_w->zero;
    my $S_b = 
        Math::GSL::Matrix->new($self->{_data_dimensions}, $self->{_data_dimensions});
    $S_b->zero;
    my $global_mean = Math::GSL::Matrix->new($self->{_data_dimensions},1);    
    $global_mean->zero;
    foreach my $cluster_index(0..$self->{_K}-1) { 
        $global_mean = $self->{_class_priors}->[$cluster_index] *
                                    $self->{_cluster_means}->[$cluster_index];
    }
    foreach my $cluster_index(0..$self->{_K}-1) {      
        $S_w +=  $self->{_cluster_covariances}->[$cluster_index] * 
                            $self->{_class_priors}->[$cluster_index];
        my $class_mean_minus_global_mean = $self->{_cluster_means}->[$cluster_index] 
                                                               - $global_mean;
        my $outer_product = outer_product( $class_mean_minus_global_mean, 
                                               $class_mean_minus_global_mean );
        $S_b +=  $self->{_class_priors}->[$cluster_index] * $outer_product;
    }
    my $fisher = matrix_multiply($S_w->inverse, $S_b);
    return $fisher unless defined blessed($fisher);
    return matrix_trace($fisher);
}

sub display_seeding_stats {
    my $self = shift;
    foreach my $cluster_index(0..$self->{_K}-1) {      

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

    my $squared_sum = 0;
    foreach my $i (0..$how_many-1) {
        $squared_sum += ($ele1[$i] - $ele2[$i])**2;
    }    
    return sqrt $squared_sum;
}

sub write_naive_bayes_clusters_to_files {
    my $self = shift;
    my @clusters = @{$self->{_clusters}};
    unlink glob "naive_bayes_cluster*.txt";
    foreach my $i (1..@clusters) {
        my $filename = "naive_bayes_cluster" . $i . ".txt";
        print "Writing cluster $i to file $filename\n"
                            if $self->{_terminal_output};
        open FILEHANDLE, "| sort > $filename" or die "Unable to open file: $!";
        foreach my $ele (@{$clusters[$i-1]}) {        
            print FILEHANDLE "$ele\n";
        }
        close FILEHANDLE;
    }

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

    foreach my $cluster_index (0..$self->{_K}-1) {
        push @class_distributions, [];
    }
    foreach my $data_tag (@{$self->{_data_id_tags}}) {
        foreach my $cluster_index (0..$self->{_K}-1) {
            push @{$class_distributions[$cluster_index]}, $data_tag
                if $self->{_expected_class_probs}->{$data_tag}->[$cluster_index] 
                                            > $theta;
        }
    }
    unlink glob "posterior_prob_cluster*.txt";
    foreach my $i (1..@class_distributions) {
        my $filename = "posterior_prob_cluster" . $i . ".txt";
        print "Writing posterior prob cluster $i to file $filename\n"
                            if $self->{_terminal_output};
        open FILEHANDLE, "| sort > $filename" or die "Unable to open file: $!";
        foreach my $ele (@{$class_distributions[$i-1]}) {        
            print FILEHANDLE "$ele\n";
        }
        close FILEHANDLE;
    }
}

sub DESTROY {
    unlink "__temp_" . basename($_[0]->{_datafile});
    unlink "__temp_data_" . basename($_[0]->{_datafile});
    unlink "__temp2_" . basename($_[0]->{_datafile});
    unlink glob "__temp1dhist*";
    unlink glob "__contour*";
}

#############################  Visualization Code ###############################

#  The visualize_clusters() implementation displays as a plot in your terminal window
#  the clusters constructed by the EM algorithm.  It can show either 2D plots or
#  3D plots that you can rotate interactively for better visualization.  For
#  multidimensional data, as to which 2D or 3D dimensions are used for visualization
#  is controlled by the mask you must supply as an argument to the method.  Should it
#  happen that only one on bit is specified for the mask, visualize_clusters()



( run in 1.409 second using v1.01-cache-2.11-cpan-49f99fa48dc )