Algorithm-ExpectationMaximization
view release on metacpan or search on metacpan
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
$self->{_clusters} = $clusters;
$self->{_cluster_centers} = $cluster_centers;
}
# Used by the kmeans part of the code for the initialization of the EM algorithm:
sub cluster_for_given_K {
my $self = shift;
my $K = shift;
my $cluster_centers = $self->get_initial_cluster_centers($K);
my $clusters = $self->assign_data_to_clusters_initial($cluster_centers);
my $cluster_nonexistant_flag = 0;
foreach my $trial (0..2) {
($clusters, $cluster_centers) =
$self->assign_data_to_clusters( $clusters, $K );
my $num_of_clusters_returned = @$clusters;
foreach my $cluster (@$clusters) {
$cluster_nonexistant_flag = 1 if ((!defined $cluster)
|| (@$cluster == 0));
}
last unless $cluster_nonexistant_flag;
}
return ($clusters, $cluster_centers);
}
# Used by the kmeans part of the code for the initialization of the EM algorithm:
sub get_initial_cluster_centers {
my $self = shift;
my $K = shift;
if ($self->{_data_dimensions} == 1) {
my @one_d_data;
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
# safety, we keep track of the number of iterations. If this number reaches 100, we
# exit the while() loop anyway. In most cases, this limit will not be reached.
sub assign_data_to_clusters {
my $self = shift;
my $clusters = shift;
my $K = shift;
my $final_cluster_centers;
my $iteration_index = 0;
while (1) {
my $new_clusters;
my $assignment_changed_flag = 0;
my $current_cluster_center_index = 0;
my $cluster_size_zero_condition = 0;
my $how_many = @$clusters;
my $cluster_centers = $self->update_cluster_centers(
deep_copy_AoA_with_nulls( $clusters ) );
$iteration_index++;
foreach my $cluster (@$clusters) {
my $current_cluster_center =
$cluster_centers->[$current_cluster_center_index];
foreach my $ele (@$cluster) {
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
}
my ($min, $best_center_index) =
minimum( \@dist_from_clust_centers );
my $best_cluster_center =
$cluster_centers->[$best_center_index];
if (vector_equal($current_cluster_center,
$best_cluster_center)){
push @{$new_clusters->[$current_cluster_center_index]},
$ele;
} else {
$assignment_changed_flag = 1;
push @{$new_clusters->[$best_center_index]}, $ele;
}
}
$current_cluster_center_index++;
}
# Now make sure that we still have K clusters since K is fixed:
next if ((@$new_clusters != @$clusters) && ($iteration_index < 100));
# Now make sure that none of the K clusters is an empty cluster:
foreach my $newcluster (@$new_clusters) {
$cluster_size_zero_condition = 1 if ((!defined $newcluster)
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
}
foreach my $local_cluster (@$new_clusters) {
if ( (!defined $local_cluster) || (@$local_cluster == 0) ) {
push @$local_cluster, pop @$largest_cluster;
}
}
next if (($cluster_size_zero_condition) && ($iteration_index < 100));
last if $iteration_index == 100;
# Now do a deep copy of new_clusters into clusters
$clusters = deep_copy_AoA( $new_clusters );
last if $assignment_changed_flag == 0;
}
$final_cluster_centers = $self->update_cluster_centers( $clusters );
return ($clusters, $final_cluster_centers);
}
# Used by the kmeans part of the code: After each new assignment of the data points
# to the clusters on the basis of the current values for the cluster centers, we call
# the routine shown here for updating the values of the cluster centers.
sub update_cluster_centers {
my $self = shift;
lib/Algorithm/ExpectationMaximization.pm view on Meta::CPAN
my @legal_params = qw / datafile
mask
K
terminal_output
max_em_iterations
seeding
class_priors
seed_tags
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub get_value_index_hash {
my $arr = shift;
my %hash;
foreach my $index (0..@$arr-1) {
$hash{$arr->[$index]} = $index if $arr->[$index] > 0;
}
return \%hash;
}
( run in 0.380 second using v1.01-cache-2.11-cpan-94b05bcf43c )