flag results from the CPAN

flag

Algorithm-KMeans

view release on metacpan or search on metacpan

lib/Algorithm/KMeans.pm view on Meta::CPAN

        push @array_of_clusters, $clusters;
        push @array_of_cluster_centers, $cluster_centers;
    }
    my ($min, $max) = minmax( \@QoC_values );
    my $K_best_relative_to_Kmin = get_index_at_value($min, \@QoC_values );
    my $K_best = $K_best_relative_to_Kmin + $Kmin;
    if ($self->{_terminal_output}) {
        print "\nDisplaying final clusters for best K (= $K_best) :\n";
        display_clusters( $array_of_clusters[$K_best_relative_to_Kmin] );
        $self->display_cluster_centers($array_of_clusters[$K_best_relative_to_Kmin]);
        print "\nBest clustering achieved for K=$K_best with QoC = $min\n" if defined $min;
        my @printableQoC = grep {$_} @QoC_values;
        print "\nQoC values array (the smaller the value, the better it is) for different " . 
              "K starting with K=$Kmin:  @printableQoC\n";
    }
    $self->{_K_best} = $K_best;
    foreach my $i (0..@QoC_values-1) {
        my $k = $i + $Kmin;
        $self->{_QoC_values}->{"$k"} = $QoC_values[$i]; 
    }
    $self->{_clusters} = $array_of_clusters[$K_best_relative_to_Kmin];
    $self->{_cluster_centers} =  
                $array_of_cluster_centers[$K_best_relative_to_Kmin];
}

# This is the function to call if you already know what value you want to use for K,
# the number of expected clusters.  The purpose of this function is to do the
# initialization of the cluster centers and to carry out the initial assignment of
# the data to the clusters with the initial cluster centers.  The initialization
# consists of 3 steps: Construct a random sequence of K integers between 0 and N-1
# where N is the number of data points to be clustered; 2) Call
# get_initial_cluster_centers() to index into the data array with the random integers
# to get a list of K data points that would serve as the initial cluster centers; and
# (3) Call assign_data_to_clusters_initial() to assign the rest of the data to each
# of the K clusters on the basis of the proximity to the cluster centers.
sub cluster_for_given_K {
    my $self = shift;
    my $K = shift;
    my @all_data_ids = @{$self->{_data_id_tags}};
    my $cluster_centers;
    if ($self->{_cluster_seeding} eq 'smart') {
        $cluster_centers = $self->get_initial_cluster_centers_smart($K);
    } elsif ($self->{_cluster_seeding} eq 'random') {
        $cluster_centers = $self->get_initial_cluster_centers($K);
    } else {
        die "You must either choose 'smart' for cluster_seeding or 'random'. " .
            "Fix your constructor call." 
    }
    my $clusters;
    if ($self->{_use_mahalanobis_metric}) {
        my $clusters_and_determinants = 
                   $self->assign_data_to_clusters_initial_mahalanobis($cluster_centers);  
        $clusters = $clusters_and_determinants->[0];
        my @determinants = @{$clusters_and_determinants->[1]};
        my ($min,$max) = minmax(\@determinants);
        die "In cluster_for_given_K(): min determinant of covariance matrix for at " .
            "least one cluster is too small" if $min / $max < 0.001;
    } else {
        $clusters = $self->assign_data_to_clusters_initial($cluster_centers);  
    }
    my $cluster_nonexistent_flag = 0;
    foreach my $trial (0..2) {
        if ($self->{_use_mahalanobis_metric}) {
            ($clusters, $cluster_centers) = $self->assign_data_to_clusters_mahalanobis($clusters, $K);
        } else {
            ($clusters, $cluster_centers) = $self->assign_data_to_clusters( $clusters, $K );
        }
        my $num_of_clusters_returned = @$clusters;
        foreach my $cluster (@$clusters) {
            $cluster_nonexistent_flag = 1 if ((!defined $cluster) ||  (@$cluster == 0));
        }
        last unless $cluster_nonexistent_flag;
    }
    return ($clusters, $cluster_centers);
}

# This function is used when you set the "cluster_seeding" option to 'random' in the
# constructor.  Returns a set of K random integers.  These serve as indices to reach
# into the data array.  A data element whose index is one of the random numbers
# returned by this routine serves as an initial cluster center.  Note the quality
# check it runs on the list of K random integers constructed.  We first make sure
# that all K random integers are different.  Subsequently, we carry out a quality
# assessment of the K random integers constructed.  This quality measure consists of
# the ratio of the values spanned by the random integers to the value of N, the total
# number of data points to be clustered.  Currently, if this ratio is less than 0.3,
# we discard the K integers and try again.
sub initialize_cluster_centers {
    my $self = shift;
    my $K = shift;
    my $data_store_size = $self->{_N};
    my @cluster_center_indices;
    while (1) {
        foreach my $i (0..$K-1) {
            $cluster_center_indices[$i] = int rand( $data_store_size );
            next if $i == 0;
            foreach my $j (0..$i-1) {
                while ( $cluster_center_indices[$j] == 
                        $cluster_center_indices[$i] ) {
                    my $old = $cluster_center_indices[$i];
                    $cluster_center_indices[$i] = int rand($data_store_size);
                }
            }
        }
        my ($min,$max) = minmax(\@cluster_center_indices );
        my $quality = ($max - $min) / $data_store_size;
        last if $quality > 0.3;
    }
    return @cluster_center_indices;
}

# This function is used when you set the "cluster_seeding" option to 'random' in the
# constructor.  This routine merely reaches into the data array with the random
# integers, as constructed by the previous routine, serving as indices and fetching
# values corresponding to those indices.  The fetched data samples serve as the
# initial cluster centers.
sub get_initial_cluster_centers {
    my $self = shift;
    my $K = shift;
    my @cluster_center_indices = $self->initialize_cluster_centers($K);
    my @result;
    foreach my $i (@cluster_center_indices) {    
        my $tag = $self->{_data_id_tags}[$i];     
        push @result, $self->{_data}->{$tag};
    }
    return \@result;
}

# This method is invoked when you choose the 'smart' option for the "cluster_seeding"
# option in the constructor.  It subjects the data to a principal components analysis
# to figure out the direction of maximal variance.  Subsequently, it tries to locate
# K peaks in a smoothed histogram of the data points projected onto the maximal
# variance direction.

lib/Algorithm/KMeans.pm view on Meta::CPAN

    my $self = shift;
    my @cluster_centers = @{ shift @_ };
    my @clusters;
    foreach my $ele (@{$self->{_data_id_tags}}) {
        my $best_cluster;
        my @dist_from_clust_centers;
        foreach my $center (@cluster_centers) {
            push @dist_from_clust_centers, $self->distance($ele, $center);
        }
        my ($min, $best_center_index) = minimum( \@dist_from_clust_centers );
        push @{$clusters[$best_center_index]}, $ele;
    }
    return \@clusters;
}    

sub assign_data_to_clusters_initial_mahalanobis {
    my $self = shift;
    my @cluster_centers = @{ shift @_ };
    my @clusters;
    foreach my $ele (@{$self->{_data_id_tags}}) {
        my $best_cluster;
        my @dist_from_clust_centers;
        foreach my $center (@cluster_centers) {
            push @dist_from_clust_centers, $self->distance($ele, $center);
        }
        my ($min, $best_center_index) = minimum( \@dist_from_clust_centers );
        push @{$clusters[$best_center_index]}, $ele if defined $best_center_index;
    }
    # Since a cluster center may not correspond to any particular sample, it is possible
    # for one of the elements of the array @clusters to be null using the above 
    # strategy for populating the initial clusters.  Let's say there are five cluster
    # centers in the array @cluster_centers.  The $best_center_index may populate the
    # the elements of the array @clusters for the indices 0, 1, 2, 4, which would leave
    # $clusters[3] as undefined.  So, in what follows, we must first check if all of
    # the elements of @clusters are defined.
    my @determinants;
    foreach my $cluster(@clusters) {
        die "The clustering program started with bad initialization.  Please start over" 
            unless defined $cluster;
        my $covariance = $self->estimate_cluster_covariance($cluster);
        my $determinant = $covariance->det();
        push @determinants, $determinant;
    }
    return [\@clusters, \@determinants];
}    

# This is the main routine that along with the update_cluster_centers() routine
# constitute the two key steps of the K-Means algorithm.  In most cases, the infinite
# while() loop will terminate automatically when the cluster assignments of the data
# points remain unchanged. For the sake of safety, we keep track of the number of
# iterations. If this number reaches 100, we exit the while() loop anyway.  In most
# cases, this limit will not be reached.
sub assign_data_to_clusters {
    my $self = shift;
    my $clusters = shift;
    my $K = shift;
    my $final_cluster_centers;
    my $iteration_index = 0;
    while (1) {
        my $new_clusters;
        my $assignment_changed_flag = 0;
        my $current_cluster_center_index = 0;
        my $cluster_size_zero_condition = 0;
        my $how_many = @$clusters;
        my $cluster_centers = $self->update_cluster_centers( deep_copy_AoA_with_nulls( $clusters ) );
        $iteration_index++;
        foreach my $cluster (@$clusters) {
            my $current_cluster_center = $cluster_centers->[$current_cluster_center_index];
            foreach my $ele (@$cluster) {
                my @dist_from_clust_centers;
                foreach my $center (@$cluster_centers) {
                    push @dist_from_clust_centers, 
                               $self->distance($ele, $center);
                }
                my ($min, $best_center_index) = minimum( \@dist_from_clust_centers );
                my $best_cluster_center = $cluster_centers->[$best_center_index];
                if (vector_equal($current_cluster_center, $best_cluster_center)){
                    push @{$new_clusters->[$current_cluster_center_index]}, $ele;
                } else {
                    $assignment_changed_flag = 1;             
                    push @{$new_clusters->[$best_center_index]}, $ele;
                }
            }
            $current_cluster_center_index++;
        }
        next if ((@$new_clusters != @$clusters) && ($iteration_index < 100));
        # Now make sure that none of the K clusters is an empty cluster:
        foreach my $newcluster (@$new_clusters) {
            $cluster_size_zero_condition = 1 if ((!defined $newcluster) or  (@$newcluster == 0));
        }
        push @$new_clusters, (undef) x ($K - @$new_clusters) if @$new_clusters < $K;
        # During clustering for a fixed K, should a cluster inadvertently
        # become empty, steal a member from the largest cluster to hopefully
        # spawn a new cluster:
        my $largest_cluster;
        foreach my $local_cluster (@$new_clusters) {
            next if !defined $local_cluster;
            $largest_cluster = $local_cluster if !defined $largest_cluster;
            if (@$local_cluster > @$largest_cluster) {
                $largest_cluster = $local_cluster; 
            }
        }        
        foreach my $local_cluster (@$new_clusters) {
            if ( (!defined $local_cluster) || (@$local_cluster == 0) ) {
                push @$local_cluster, pop @$largest_cluster;
            }
        }
        next if (($cluster_size_zero_condition) && ($iteration_index < 100));
        last if $iteration_index == 100;
	$clusters = deep_copy_AoA( $new_clusters );
        last if $assignment_changed_flag == 0;
    }
    $final_cluster_centers = $self->update_cluster_centers( $clusters );
    return ($clusters, $final_cluster_centers);
}

sub assign_data_to_clusters_mahalanobis {
    my $self = shift;
    my $clusters = shift;
    my $K = shift;
    my $final_cluster_centers;
    my $iteration_index = 0;
    while (1) {
        my $new_clusters;
        my $assignment_changed_flag = 0;
        my $current_cluster_center_index = 0;
        my $cluster_size_zero_condition = 0;
        my $how_many = @$clusters;
        my $cluster_centers_and_covariances = 
        $self->update_cluster_centers_and_covariances_mahalanobis(deep_copy_AoA_with_nulls($clusters));
        my $cluster_centers = $cluster_centers_and_covariances->[0];
        my $cluster_covariances = $cluster_centers_and_covariances->[1];
        $iteration_index++;
        foreach my $cluster (@$clusters) {
            my $current_cluster_center = $cluster_centers->[$current_cluster_center_index];
            my $current_cluster_covariance = $cluster_covariances->[$current_cluster_center_index]; 
            foreach my $ele (@$cluster) {
                my @mahalanobis_dist_from_clust_centers;
                foreach my $i (0..@$clusters-1) {
                    my $center = $cluster_centers->[$i];
                    my $covariance = $cluster_covariances->[$i];
                    my $maha_distance;
                    eval {
                        $maha_distance = $self->distance_mahalanobis($ele, $center, $covariance);
                    };
                    next if $@;
                    push @mahalanobis_dist_from_clust_centers, $maha_distance; 
                }
                my ($min, $best_center_index) = minimum( \@mahalanobis_dist_from_clust_centers );
                die "The Mahalanobis metric may not be appropriate for the data" 
                    unless defined $best_center_index;
                my $best_cluster_center = $cluster_centers->[$best_center_index];
                if (vector_equal($current_cluster_center, $best_cluster_center)){
                    push @{$new_clusters->[$current_cluster_center_index]}, $ele;
                } else {
                    $assignment_changed_flag = 1;             
                    push @{$new_clusters->[$best_center_index]}, $ele;
                }
            }
            $current_cluster_center_index++;
        }
        next if ((@$new_clusters != @$clusters) && ($iteration_index < 100));
        # Now make sure that none of the K clusters is an empty cluster:
        foreach my $newcluster (@$new_clusters) {
            $cluster_size_zero_condition = 1 if ((!defined $newcluster) or  (@$newcluster == 0));
        }
        push @$new_clusters, (undef) x ($K - @$new_clusters) if @$new_clusters < $K;
        # During clustering for a fixed K, should a cluster inadvertently
        # become empty, steal a member from the largest cluster to hopefully
        # spawn a new cluster:
        my $largest_cluster;
        foreach my $local_cluster (@$new_clusters) {
            next if !defined $local_cluster;
            $largest_cluster = $local_cluster if !defined $largest_cluster;
            if (@$local_cluster > @$largest_cluster) {
                $largest_cluster = $local_cluster; 
            }
        }        
        foreach my $local_cluster (@$new_clusters) {
            if ( (!defined $local_cluster) || (@$local_cluster == 0) ) {
                push @$local_cluster, pop @$largest_cluster;
            }
        }
        next if (($cluster_size_zero_condition) && ($iteration_index < 100));
        last if $iteration_index == 100;
        # Now do a deep copy of new_clusters into clusters
	$clusters = deep_copy_AoA( $new_clusters );
        last if $assignment_changed_flag == 0;
    }
    $final_cluster_centers = $self->update_cluster_centers( $clusters );
    return ($clusters, $final_cluster_centers);
}

sub update_cluster_centers_and_covariances_mahalanobis {
    my $self = shift;
    my @clusters = @{ shift @_ };
    my @new_cluster_centers;
    my @new_cluster_covariances;
    # During clustering for a fixed K, should a cluster inadvertently become empty,
    # steal a member from the largest cluster to hopefully spawn a new cluster:
    my $largest_cluster;
    foreach my $cluster (@clusters) {
        next if !defined $cluster;
        $largest_cluster = $cluster if !defined $largest_cluster;
        if (@$cluster > @$largest_cluster) {
            $largest_cluster = $cluster; 
        }
    }        
    foreach my $cluster (@clusters) {
        if ( (!defined $cluster) || (@$cluster == 0) ) {
            push @$cluster, pop @$largest_cluster;
        }
    }
    foreach my $cluster (@clusters) {
        die "Cluster became empty --- untenable condition " .
            "for a given K.  Try again. \n" if !defined $cluster;
        my $cluster_size = @$cluster;
        die "Cluster size is zero --- untenable.\n" if $cluster_size == 0;
        my @new_cluster_center = @{$self->add_point_coords( $cluster )};
        @new_cluster_center = map {my $x = $_/$cluster_size; $x} @new_cluster_center;
        push @new_cluster_centers, \@new_cluster_center;
        # for covariance calculation:
        my ($num_rows,$num_cols) = ($self->{_data_dimensions}, scalar(@$cluster));
        my $matrix = Math::GSL::Matrix->new($num_rows,$num_cols);
        my $mean_vec = Math::GSL::Matrix->new($num_rows,1);
        # All the record labels are stored in the array $self->{_data_id_tags}.  The
        # actual data for clustering is stored in a hash at $self->{_data} whose keys are
        # the record labels; the value associated with each key is the array holding the
        # corresponding numerical multidimensional data.
        foreach my $j (0..$num_cols-1) {
            my $tag = $cluster->[$j];            
            my $data = $self->{_data}->{$tag};
            my @diff_from_mean = vector_subtract($data, \@new_cluster_center);
            $matrix->set_col($j, \@diff_from_mean);
        }
        my $transposed = transpose( $matrix );
        my $covariance = matrix_multiply( $matrix, $transposed );
        $covariance *= 1.0 / $num_cols;
        if ($self->{_debug}) {
            print "\nDisplaying the Covariance Matrix for cluster:";
            display_matrix( $covariance );
        }
        push @new_cluster_covariances, $covariance;
    }
    return [\@new_cluster_centers, \@new_cluster_covariances];
}

# After each new assignment of the data points to the clusters on the basis of the

lib/Algorithm/KMeans.pm view on Meta::CPAN

    }
}

sub variance_normalization {
    my %data_hash = %{shift @_};
    my @all_data_points = values %data_hash;
    my $dimensions = @{$all_data_points[0]};
    my @data_projections;
    foreach my $data_point (@all_data_points) {
        my $i = 0;
        foreach my $proj (@$data_point) {
            push @{$data_projections[$i++]}, $proj;
        }
    }
    my @variance_vec;
    foreach my $vec (@data_projections) {
        my ($mean, $variance) = mean_and_variance( $vec );
        push @variance_vec, $variance;
    }
    my %new_data_hash;
    while (my ($label, $data) = each(%data_hash) ) {
        my @new_data;
        foreach my $i (0..@{$data}-1) {
            my $new = $data->[$i] / sqrt($variance_vec[$i]);
            push @new_data, $data->[$i] / sqrt($variance_vec[$i]);
        }
        $new_data_hash{$label} = \@new_data;
    }
    return \%new_data_hash;
}

sub mean_and_variance {
    my @data = @{shift @_};
    my ($mean, $variance);
    foreach my $i (1..@data) {
        if ($i == 1) {
            $mean = $data[0];
            $variance = 0;
        } else {
            $mean = ( (($i-1)/$i) * $mean ) + $data[$i-1] / $i;
            $variance = ( (($i-1)/$i) * $variance )  + ($data[$i-1]-$mean)**2 / ($i-1);
        }
    }
    return ($mean, $variance);
}

sub check_for_illegal_params {
    my @params = @_;
    my @legal_params = qw / datafile
                            mask
                            K
                            Kmin
                            Kmax
                            terminal_output
                            write_clusters_to_files
                            do_variance_normalization
                            cluster_seeding
                            use_mahalanobis_metric
                            debug
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

sub get_value_index_hash {
    my $arr = shift;
    my %hash;
    foreach my $index (0..@$arr-1) {
        $hash{$arr->[$index]} = $index if $arr->[$index] > 0;
    }
    return \%hash;
}

sub non_maximum_suppression {
    my $arr = shift;
    my @output = (0) x @$arr;
    my @final_output = (0) x @$arr;
    my %hash;
    my @array_of_runs = ([$arr->[0]]);
    foreach my $index (1..@$arr-1) {
        if ($arr->[$index] == $arr->[$index-1]) {
            push @{$array_of_runs[-1]}, $arr->[$index];
        } else {  
            push @array_of_runs, [$arr->[$index]];
        }
    }
    my $runstart_index = 0;
    foreach my $run_index (1..@array_of_runs-2) {
        $runstart_index += @{$array_of_runs[$run_index-1]};
        if ($array_of_runs[$run_index]->[0] > 
            $array_of_runs[$run_index-1]->[0]  &&
            $array_of_runs[$run_index]->[0] > 
            $array_of_runs[$run_index+1]->[0]) {
            my $run_center = @{$array_of_runs[$run_index]} / 2;
            my $assignment_index = $runstart_index + $run_center;
            $output[$assignment_index] = $arr->[$assignment_index];
        }
    }
    if ($array_of_runs[-1]->[0] > $array_of_runs[-2]->[0]) {
        $runstart_index += @{$array_of_runs[-2]};
        my $run_center = @{$array_of_runs[-1]} / 2;
        my $assignment_index = $runstart_index + $run_center;
        $output[$assignment_index] = $arr->[$assignment_index];
    }
    if ($array_of_runs[0]->[0] > $array_of_runs[1]->[0]) {
        my $run_center = @{$array_of_runs[0]} / 2;
        $output[$run_center] = $arr->[$run_center];
    }
    return \@output;
}

sub display_matrix {
    my $matrix = shift;
    my $nrows = $matrix->rows();
    my $ncols = $matrix->cols();
    print "\n\nDisplaying matrix of size $nrows rows and $ncols columns:\n";
    foreach my $i (0..$nrows-1) {
        my $row = $matrix->row($i);
        my @row_as_list = $row->as_list;
        print "@row_as_list\n";
    }
    print "\n\n";

lib/Algorithm/KMeans.pm view on Meta::CPAN


  # Now construct an instance of the clusterer.  The parameter K controls the number
  # of clusters.  If you know how many clusters you want (let's say 3), call

  my $clusterer = Algorithm::KMeans->new( datafile        => $datafile,
                                          mask            => $mask,
                                          K               => 3,
                                          cluster_seeding => 'random',
                                          terminal_output => 1,
                                          write_clusters_to_files => 1,
                                        );

  # By default, this constructor call will set you up for clustering based on
  # Euclidean distances.  If you want the module to use Mahalanobis distances, your
  # constructor call will look like:

  my $clusterer = Algorithm::KMeans->new( datafile        => $datafile,
                                          mask            => $mask,
                                          K               => 3,
                                          cluster_seeding => 'random',
                                          use_mahalanobis_metric => 1,
                                          terminal_output => 1,
                                          write_clusters_to_files => 1,
                                        );

  # For both constructor calls shown above, you can use smart seeding of the clusters
  # by changing 'random' to 'smart' for the cluster_seeding option.  See the
  # explanation of smart seeding in the Methods section of this documentation.

  # If your data is such that its variability along the different dimensions of the
  # data space is significantly different, you may get better clustering if you first
  # normalize your data by setting the constructor parameter
  # do_variance_normalization as shown below:

  my $clusterer = Algorithm::KMeans->new( datafile => $datafile,
                                          mask     => $mask,
                                          K        => 3,
                                          cluster_seeding => 'smart',    # or 'random'
                                          terminal_output => 1,
                                          do_variance_normalization => 1,
                                          write_clusters_to_files => 1,
                                        );

  # But bear in mind that such data normalization may actually decrease the
  # performance of the clusterer if the variability in the data is more a result of
  # the separation between the means than a consequence of intra-cluster variance.

  # Set K to 0 if you want the module to figure out the optimum number of clusters
  # from the data. (It is best to run this option with the terminal_output set to 1
  # so that you can see the different value of QoC for the different K):

  my $clusterer = Algorithm::KMeans->new( datafile => $datafile,
                                          mask     => $mask,
                                          K        => 0,
                                          cluster_seeding => 'random',    # or 'smart'
                                          terminal_output => 1,
                                          write_clusters_to_files => 1,
                                        );

  # Although not shown above, you can obviously set the 'do_variance_normalization'
  # flag here also if you wish.

  # For very large data files, setting K to 0 will result in searching through too
  # many values for K.  For such cases, you can range limit the values of K to search
  # through by

  my $clusterer = Algorithm::KMeans->new( datafile => $datafile,
                                          mask     => "N111",
                                          Kmin     => 3,
                                          Kmax     => 10,
                                          cluster_seeding => 'random',    # or 'smart'
                                          terminal_output => 1,
                                          write_clusters_to_files => 1,
                                        );

  # FOR ALL CASES ABOVE, YOU'D NEED TO MAKE THE FOLLOWING CALLS ON THE CLUSTERER
  # INSTANCE TO ACTUALLY CLUSTER THE DATA:

  $clusterer->read_data_from_file();
  $clusterer->kmeans();

  # If you want to directly access the clusters and the cluster centers in your own
  # top-level script, replace the above two statements with:

  $clusterer->read_data_from_file();
  my ($clusters_hash, $cluster_centers_hash) = $clusterer->kmeans();

  # You can subsequently access the clusters directly in your own code, as in:

  foreach my $cluster_id (sort keys %{$clusters_hash}) {
      print "\n$cluster_id   =>   @{$clusters_hash->{$cluster_id}}\n";
  }
  foreach my $cluster_id (sort keys %{$cluster_centers_hash}) {
      print "\n$cluster_id   =>   @{$cluster_centers_hash->{$cluster_id}}\n";
  }


  # CLUSTER VISUALIZATION:

  # You must first set the mask for cluster visualization. This mask tells the module
  # which 2D or 3D subspace of the original data space you wish to visualize the
  # clusters in:

  my $visualization_mask = "111";
  $clusterer->visualize_clusters($visualization_mask);


  # SYNTHETIC DATA GENERATION:

  # The module has been provided with a class method for generating multivariate data
  # for experimenting with clustering.  The data generation is controlled by the
  # contents of the parameter file that is supplied as an argument to the data
  # generator method.  The mean and covariance matrix entries in the parameter file
  # must be according to the syntax shown in the param.txt file in the examples
  # directory. It is best to edit this file as needed:

  my $parameter_file = "param.txt";
  my $out_datafile = "mydatafile.dat";
  Algorithm::KMeans->cluster_data_generator(
                          input_parameter_file => $parameter_file,
                          output_datafile => $out_datafile,

( run in 2.537 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )