Algorithm-ExpectationMaximization
view release on metacpan or search on metacpan
examples/canned_example3.pl view on Meta::CPAN
#!/usr/bin/perl -w
#use lib '../blib/lib', '../blib/arch';
### canned_example3.pl
use strict;
use Algorithm::ExpectationMaximization;
my $datafile = "mydatafile3.dat";
my $mask = "N11";
my $clusterer = Algorithm::ExpectationMaximization->new(
datafile => $datafile,
mask => $mask,
K => 3,
max_em_iterations => 300,
seeding => 'manual',
seed_tags => ['a7', 'b3', 'c4'],
terminal_output => 1,
debug => 0,
);
$clusterer->read_data_from_file();
my $data_visualization_mask = "11";
$clusterer->visualize_data($data_visualization_mask);
$clusterer->plot_hardcopy_data($data_visualization_mask);
srand(time);
$clusterer->seed_the_clusters();
$clusterer->EM();
$clusterer->run_bayes_classifier();
$clusterer->write_naive_bayes_clusters_to_files();
my $clusters = $clusterer->return_disjoint_clusters();
# Once you have the clusters in your own top-level script,
# you can now examine the contents of the clusters by the
# following sort of code:
print "\n\nDisjoint clusters obtained with Naive Bayes' classifier:\n\n";
foreach my $index (0..@$clusters-1) {
print "Cluster $index (Naive Bayes): @{$clusters->[$index]}\n\n"
}
print "----------------------------------------------------\n\n";
my $theta1 = 0.2;
print "Possibly overlapping clusters based on posterior probabilities " .
"exceeding the threshold $theta1:\n\n";
my $posterior_prob_clusters =
$clusterer->return_clusters_with_posterior_probs_above_threshold($theta1);
foreach my $index (0..@$posterior_prob_clusters-1) {
print "Cluster $index (based on posterior probs exceeding $theta1): " .
"@{$posterior_prob_clusters->[$index]}\n\n"
}
$clusterer->write_posterior_prob_clusters_above_threshold_to_files($theta1);
print "\n----------------------------------------------------\n\n";
my $theta2 = 0.00001;
print "Showing the data element membership in each Gaussian. Only those " .
"data points are included in each Gaussian where the probability " .
"exceeds the threshold $theta2. Note that the larger the covariance " .
( run in 0.624 second using v1.01-cache-2.11-cpan-6b5c3043376 )