DATA results from the CPAN

DATA

Algorithm-KMeans

view release on metacpan or search on metacpan

examples/cluster_and_visualize_with_data_visualization.pl view on Meta::CPAN

# In order to see the effects of variance normalization of the data (each data
# coordinate is normalized by the standard-deviation along that coordinate axis), it
# is sometimes useful to see both the raw data and its normalized form.  The
# following two calls accomplish that:
$clusterer->visualize_data($visualization_mask, 'original');
$clusterer->visualize_data($visualization_mask, 'normed');


# Finally, you can visualize the clusters.  BUT NOTE THAT THE VISUALIZATION MASK FOR
# CLUSTER VISUALIZATION WILL, IN GENERAL, BE INDEPENDENT OF THE VISUALIZATION MASK
# FOR VIEWING THE DATA:
$clusterer->visualize_clusters($visualization_mask);

examples/which_cluster_for_new_data.pl view on Meta::CPAN

foreach my $cluster_id (sort keys %{$clusters_hash}) {
    print "\n$cluster_id   =>   @{$clusters_hash->{$cluster_id}}\n";
}

print "\nDisplaying cluster centers in the terminal window:\n";
foreach my $cluster_id (sort keys %{$cluster_centers_hash}) {
    print "\n$cluster_id   =>   @{$cluster_centers_hash->{$cluster_id}}\n";
}


# FIND CLUSTER IDENTITY OF A NEW DATA RECORD:

my $new_datum = [20,4,0];                  # for mydatafile1.dat
#my $new_datum = [20,4];                    # for mydatafile3.dat
my $cluster_name = $clusterer->which_cluster_for_new_data_element($new_datum);
print "\nUsing Euclidean distances: The data element @$new_datum belongs to cluster: $cluster_name\n";

my $cluster_name2 = 
            $clusterer->which_cluster_for_new_data_element_mahalanobis($new_datum);
print "\nUsing Mahalanobis distances: The data element @$new_datum belongs to cluster: $cluster_name2\n";

lib/Algorithm/KMeans.pm view on Meta::CPAN

  my $clusterer = Algorithm::KMeans->new( datafile => $datafile,
                                          mask     => "N111",
                                          Kmin     => 3,
                                          Kmax     => 10,
                                          cluster_seeding => 'random',    # or 'smart'
                                          terminal_output => 1,
                                          write_clusters_to_files => 1,
                                        );

  # FOR ALL CASES ABOVE, YOU'D NEED TO MAKE THE FOLLOWING CALLS ON THE CLUSTERER
  # INSTANCE TO ACTUALLY CLUSTER THE DATA:

  $clusterer->read_data_from_file();
  $clusterer->kmeans();

  # If you want to directly access the clusters and the cluster centers in your own
  # top-level script, replace the above two statements with:

  $clusterer->read_data_from_file();
  my ($clusters_hash, $cluster_centers_hash) = $clusterer->kmeans();

lib/Algorithm/KMeans.pm view on Meta::CPAN

  # CLUSTER VISUALIZATION:

  # You must first set the mask for cluster visualization. This mask tells the module
  # which 2D or 3D subspace of the original data space you wish to visualize the
  # clusters in:

  my $visualization_mask = "111";
  $clusterer->visualize_clusters($visualization_mask);


  # SYNTHETIC DATA GENERATION:

  # The module has been provided with a class method for generating multivariate data
  # for experimenting with clustering.  The data generation is controlled by the
  # contents of the parameter file that is supplied as an argument to the data
  # generator method.  The mean and covariance matrix entries in the parameter file
  # must be according to the syntax shown in the param.txt file in the examples
  # directory. It is best to edit this file as needed:

  my $parameter_file = "param.txt";
  my $out_datafile = "mydatafile.dat";

( run in 1.204 second using v1.01-cache-2.11-cpan-140bd7fdf52 )