Algorithm-KMeans

 view release on metacpan or  search on metacpan

lib/Algorithm/KMeans.pm  view on Meta::CPAN

package Algorithm::KMeans;

#------------------------------------------------------------------------------------
# Copyright (c) 2014 Avinash Kak. All rights reserved.  This program is free
# software.  You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::KMeans is a Perl module for clustering multidimensional data.
# -----------------------------------------------------------------------------------

#use 5.10.0;
use strict;
use warnings;
use Carp;
use File::Basename;
use Math::Random;
use Graphics::GnuplotIF;
use Math::GSL::Matrix;


our $VERSION = '2.05';

# from Perl docs:
my $_num_regex =  '^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$'; 

# Constructor:
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument " .
          "--- perhaps a misspelling\n" 
          if check_for_illegal_params(@params) == 0;
    bless {
        _datafile                 =>   $args{datafile} || croak("datafile required"),
        _mask                     =>   $args{mask}     || croak("mask required"),
        _K                        =>   $args{K}        || 0,
        _K_min                    =>   $args{Kmin} || 'unknown',
        _K_max                    =>   $args{Kmax} || 'unknown',
        _cluster_seeding          =>   $args{cluster_seeding} || croak("must choose smart or random ".
                                                                       "for cluster seeding"),
        _var_normalize            =>   $args{do_variance_normalization} || 0,
        _use_mahalanobis_metric   =>   $args{use_mahalanobis_metric} || 0,  
        _clusters_2_files         =>   $args{write_clusters_to_files} || 0,
        _terminal_output          =>   $args{terminal_output} || 0,
        _debug                    =>   $args{debug} || 0,
        _N                        =>   0,
        _K_best                   =>   'unknown',
        _original_data            =>   {},
        _data                     =>   {},
        _data_id_tags             =>   [],
        _QoC_values               =>   {},
        _clusters                 =>   [],
        _cluster_centers          =>   [],
        _clusters_hash            =>   {},
        _cluster_centers_hash     =>   {},
        _cluster_covariances_hash =>   {},
        _data_dimensions          =>   0,

    }, $class;
}

sub read_data_from_file {
    my $self = shift;
    my $filename = $self->{_datafile};
    $self->read_data_from_file_csv() if $filename =~ /.csv$/;
    $self->read_data_from_file_dat() if $filename =~ /.dat$/;



( run in 1.190 second using v1.01-cache-2.11-cpan-e1769b4cff6 )