Algorithm-ExpectationMaximization

 view release on metacpan or  search on metacpan

examples/canned_example1.pl  view on Meta::CPAN

#!/usr/bin/perl -w

#use lib '../blib/lib', '../blib/arch';

### canned_example1.pl

use strict;
use Algorithm::ExpectationMaximization;


my $datafile = "mydatafile1.dat";              # from param1.txt


#  IMPORTANT: You need to set the mask at three different locations in this script.
#  The one shown below is for reading the data file.  At the next location, the mask
#  is for visualizing the raw data.  At the third location, the mask is for
#  visualizing the final clusters.  The mask shown below means that the symbolic data
#  for each data record is in the first column, and that the next three columns are
#  to be used for clustering.
my $mask = "N11";    

my $clusterer = Algorithm::ExpectationMaximization->new(
                                datafile            => $datafile,
                                mask                => $mask,
                                K                   => 3,
                                max_em_iterations   => 300,
                                seeding             => 'random',
                                terminal_output     => 1,
                                debug               => 0,
                );

$clusterer->read_data_from_file();

# For visualizing the raw data:
my $data_visualization_mask = "11";
$clusterer->visualize_data($data_visualization_mask);
$clusterer->plot_hardcopy_data($data_visualization_mask);

srand(time);
$clusterer->seed_the_clusters();
$clusterer->EM();
$clusterer->run_bayes_classifier();
$clusterer->write_naive_bayes_clusters_to_files();

my $clusters = $clusterer->return_disjoint_clusters();
# Once you have the clusters in your own top-level script,
# you can now examine the contents of the clusters by the
# following sort of code:
print "\n\nDisjoint clusters obtained with Naive Bayes' classifier:\n\n";
foreach my $index (0..@$clusters-1) {
    print "Cluster $index (Naive Bayes):   @{$clusters->[$index]}\n\n"
}
print "----------------------------------------------------\n\n";

my $theta1 = 0.2;
print "Possibly overlapping clusters based on posterior probabilities " .
    "exceeding the threshold $theta1:\n\n";
my $posterior_prob_clusters =
     $clusterer->return_clusters_with_posterior_probs_above_threshold($theta1);
foreach my $index (0..@$posterior_prob_clusters-1) {
    print "Cluster $index (based on posterior probs exceeding $theta1): " .
          "@{$posterior_prob_clusters->[$index]}\n\n"
}



( run in 0.503 second using v1.01-cache-2.11-cpan-6b5c3043376 )