Algorithm-ExpectationMaximization

 view release on metacpan or  search on metacpan

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

    $self->{_clusters} = $clusters;
    $self->{_cluster_centers} = $cluster_centers;  
}

# Used by the kmeans part of the code for the initialization of the EM algorithm:
sub cluster_for_given_K {
    my $self = shift;
    my $K = shift;
    my $cluster_centers = $self->get_initial_cluster_centers($K);
    my $clusters = $self->assign_data_to_clusters_initial($cluster_centers);  
    my $cluster_nonexistant_flag = 0;
    foreach my $trial (0..2) {
        ($clusters, $cluster_centers) =
                         $self->assign_data_to_clusters( $clusters, $K );
        my $num_of_clusters_returned = @$clusters;
        foreach my $cluster (@$clusters) {
            $cluster_nonexistant_flag = 1 if ((!defined $cluster) 
                                             ||  (@$cluster == 0));
        }
        last unless $cluster_nonexistant_flag;
    }
    return ($clusters, $cluster_centers);
}

# Used by the kmeans part of the code for the initialization of the EM algorithm:
sub get_initial_cluster_centers {
    my $self = shift;
    my $K = shift;
    if ($self->{_data_dimensions} == 1) {
        my @one_d_data;

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

# safety, we keep track of the number of iterations. If this number reaches 100, we
# exit the while() loop anyway.  In most cases, this limit will not be reached.
sub assign_data_to_clusters {
    my $self = shift;
    my $clusters = shift;
    my $K = shift;
    my $final_cluster_centers;
    my $iteration_index = 0;
    while (1) {
        my $new_clusters;
        my $assignment_changed_flag = 0;
        my $current_cluster_center_index = 0;
        my $cluster_size_zero_condition = 0;
        my $how_many = @$clusters;
        my $cluster_centers = $self->update_cluster_centers( 
                                    deep_copy_AoA_with_nulls( $clusters ) );
        $iteration_index++;
        foreach my $cluster (@$clusters) {
            my $current_cluster_center = 
                          $cluster_centers->[$current_cluster_center_index];
            foreach my $ele (@$cluster) {

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

                }
                my ($min, $best_center_index) = 
                              minimum( \@dist_from_clust_centers );
                my $best_cluster_center = 
                                 $cluster_centers->[$best_center_index];
                if (vector_equal($current_cluster_center, 
                                         $best_cluster_center)){
                    push @{$new_clusters->[$current_cluster_center_index]}, 
                                  $ele;
                } else {
                    $assignment_changed_flag = 1;             
                    push @{$new_clusters->[$best_center_index]}, $ele;
                }
            }
            $current_cluster_center_index++;
        }
        # Now make sure that we still have K clusters since K is fixed:
        next if ((@$new_clusters != @$clusters) && ($iteration_index < 100));
        # Now make sure that none of the K clusters is an empty cluster:
        foreach my $newcluster (@$new_clusters) {
            $cluster_size_zero_condition = 1 if ((!defined $newcluster) 

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

        }        
        foreach my $local_cluster (@$new_clusters) {
            if ( (!defined $local_cluster) || (@$local_cluster == 0) ) {
                push @$local_cluster, pop @$largest_cluster;
            }
        }
        next if (($cluster_size_zero_condition) && ($iteration_index < 100));
        last if $iteration_index == 100;
        # Now do a deep copy of new_clusters into clusters
	$clusters = deep_copy_AoA( $new_clusters );
        last if $assignment_changed_flag == 0;
    }
    $final_cluster_centers = $self->update_cluster_centers( $clusters );
    return ($clusters, $final_cluster_centers);
}

# Used by the kmeans part of the code: After each new assignment of the data points
# to the clusters on the basis of the current values for the cluster centers, we call
# the routine shown here for updating the values of the cluster centers.
sub update_cluster_centers {
    my $self = shift;

lib/Algorithm/ExpectationMaximization.pm  view on Meta::CPAN

    my @legal_params = qw / datafile
                            mask
                            K
                            terminal_output
                            max_em_iterations
                            seeding
                            class_priors
                            seed_tags
                            debug
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

sub get_value_index_hash {
    my $arr = shift;
    my %hash;
    foreach my $index (0..@$arr-1) {
        $hash{$arr->[$index]} = $index if $arr->[$index] > 0;
    }
    return \%hash;
}



( run in 0.380 second using v1.01-cache-2.11-cpan-94b05bcf43c )