Algorithm-KMeans
view release on metacpan or search on metacpan
lib/Algorithm/KMeans.pm view on Meta::CPAN
package Algorithm::KMeans;
#------------------------------------------------------------------------------------
# Copyright (c) 2014 Avinash Kak. All rights reserved. This program is free
# software. You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::KMeans is a Perl module for clustering multidimensional data.
# -----------------------------------------------------------------------------------
#use 5.10.0;
use strict;
use warnings;
use Carp;
use File::Basename;
use Math::Random;
use Graphics::GnuplotIF;
use Math::GSL::Matrix;
our $VERSION = '2.05';
# from Perl docs:
my $_num_regex = '^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$';
# Constructor:
sub new {
my ($class, %args) = @_;
my @params = keys %args;
croak "\nYou have used a wrong name for a keyword argument " .
"--- perhaps a misspelling\n"
if check_for_illegal_params(@params) == 0;
bless {
_datafile => $args{datafile} || croak("datafile required"),
_mask => $args{mask} || croak("mask required"),
_K => $args{K} || 0,
_K_min => $args{Kmin} || 'unknown',
_K_max => $args{Kmax} || 'unknown',
_cluster_seeding => $args{cluster_seeding} || croak("must choose smart or random ".
"for cluster seeding"),
_var_normalize => $args{do_variance_normalization} || 0,
_use_mahalanobis_metric => $args{use_mahalanobis_metric} || 0,
_clusters_2_files => $args{write_clusters_to_files} || 0,
_terminal_output => $args{terminal_output} || 0,
_debug => $args{debug} || 0,
_N => 0,
_K_best => 'unknown',
_original_data => {},
_data => {},
_data_id_tags => [],
_QoC_values => {},
_clusters => [],
_cluster_centers => [],
_clusters_hash => {},
_cluster_centers_hash => {},
_cluster_covariances_hash => {},
_data_dimensions => 0,
}, $class;
}
sub read_data_from_file {
my $self = shift;
my $filename = $self->{_datafile};
$self->read_data_from_file_csv() if $filename =~ /.csv$/;
$self->read_data_from_file_dat() if $filename =~ /.dat$/;
}
sub read_data_from_file_csv {
my $self = shift;
my $numregex = '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
my $filename = $self->{_datafile} || die "you did not specify a file with the data to be clustered";
my $mask = $self->{_mask};
my @mask = split //, $mask;
$self->{_data_dimensions} = scalar grep {$_ eq '1'} @mask;
print "data dimensionality: $self->{_data_dimensions} \n"if $self->{_terminal_output};
open FILEIN, $filename or die "Unable to open $filename: $!";
die("Aborted. get_training_data_csv() is only for CSV files") unless $filename =~ /\.csv$/;
local $/ = undef;
my @all_data = split /\s+/, <FILEIN>;
my %data_hash = ();
my @data_tags = ();
foreach my $record (@all_data) {
my @splits = split /,/, $record;
die "\nYour mask size (including `N' and 1's and 0's) does not match\n" .
"the size of at least one of the data records in the file: $!"
unless scalar(@mask) == scalar(@splits);
my $record_name = shift @splits;
$data_hash{$record_name} = \@splits;
push @data_tags, $record_name;
}
$self->{_original_data} = \%data_hash;
$self->{_data_id_tags} = \@data_tags;
$self->{_N} = scalar @data_tags;
if ($self->{_var_normalize}) {
$self->{_data} = variance_normalization( $self->{_original_data} );
} else {
$self->{_data} = deep_copy_hash( $self->{_original_data} );
}
# Need to make the following call to set the global mean and covariance:
# my $covariance = $self->estimate_mean_and_covariance(\@data_tags);
# Need to make the following call to set the global eigenvec eigenval sets:
# $self->eigen_analysis_of_covariance($covariance);
if ( defined($self->{_K}) && ($self->{_K} > 0) ) {
carp "\n\nWARNING: YOUR K VALUE IS TOO LARGE.\n The number of data " .
lib/Algorithm/KMeans.pm view on Meta::CPAN
# assessment of the K random integers constructed. This quality measure consists of
# the ratio of the values spanned by the random integers to the value of N, the total
# number of data points to be clustered. Currently, if this ratio is less than 0.3,
# we discard the K integers and try again.
sub initialize_cluster_centers {
my $self = shift;
my $K = shift;
my $data_store_size = $self->{_N};
my @cluster_center_indices;
while (1) {
foreach my $i (0..$K-1) {
$cluster_center_indices[$i] = int rand( $data_store_size );
next if $i == 0;
foreach my $j (0..$i-1) {
while ( $cluster_center_indices[$j] ==
$cluster_center_indices[$i] ) {
my $old = $cluster_center_indices[$i];
$cluster_center_indices[$i] = int rand($data_store_size);
}
}
}
my ($min,$max) = minmax(\@cluster_center_indices );
my $quality = ($max - $min) / $data_store_size;
last if $quality > 0.3;
}
return @cluster_center_indices;
}
# This function is used when you set the "cluster_seeding" option to 'random' in the
# constructor. This routine merely reaches into the data array with the random
# integers, as constructed by the previous routine, serving as indices and fetching
# values corresponding to those indices. The fetched data samples serve as the
# initial cluster centers.
sub get_initial_cluster_centers {
my $self = shift;
my $K = shift;
my @cluster_center_indices = $self->initialize_cluster_centers($K);
my @result;
foreach my $i (@cluster_center_indices) {
my $tag = $self->{_data_id_tags}[$i];
push @result, $self->{_data}->{$tag};
}
return \@result;
}
# This method is invoked when you choose the 'smart' option for the "cluster_seeding"
# option in the constructor. It subjects the data to a principal components analysis
# to figure out the direction of maximal variance. Subsequently, it tries to locate
# K peaks in a smoothed histogram of the data points projected onto the maximal
# variance direction.
sub get_initial_cluster_centers_smart {
my $self = shift;
my $K = shift;
if ($self->{_data_dimensions} == 1) {
my @one_d_data;
foreach my $j (0..$self->{_N}-1) {
my $tag = $self->{_data_id_tags}[$j];
push @one_d_data, $self->{_data}->{$tag}->[0];
}
my @peak_points = find_peak_points_in_given_direction(\@one_d_data,$K);
print "highest points at data values: @peak_points\n" if $self->{_debug};
my @cluster_centers;
foreach my $peakpoint (@peak_points) {
push @cluster_centers, [$peakpoint];
}
return \@cluster_centers;
}
my ($num_rows,$num_cols) = ($self->{_data_dimensions},$self->{_N});
my $matrix = Math::GSL::Matrix->new($num_rows,$num_cols);
my $mean_vec = Math::GSL::Matrix->new($num_rows,1);
# All the record labels are stored in the array $self->{_data_id_tags}. The
# actual data for clustering is stored in a hash at $self->{_data} whose keys are
# the record labels; the value associated with each key is the array holding the
# corresponding numerical multidimensional data.
foreach my $j (0..$num_cols-1) {
my $tag = $self->{_data_id_tags}[$j];
my $data = $self->{_data}->{$tag};
$matrix->set_col($j, $data);
}
if ($self->{_debug}) {
print "\nDisplaying the original data as a matrix:";
display_matrix( $matrix );
}
foreach my $j (0..$num_cols-1) {
$mean_vec += $matrix->col($j);
}
$mean_vec *= 1.0 / $num_cols;
if ($self->{_debug}) {
print "Displaying the mean vector for the data:";
display_matrix( $mean_vec );
}
foreach my $j (0..$num_cols-1) {
my @new_col = ($matrix->col($j) - $mean_vec)->as_list;
$matrix->set_col($j, \@new_col);
}
if ($self->{_debug}) {
print "Displaying mean subtracted data as a matrix:";
display_matrix( $matrix );
}
my $transposed = transpose( $matrix );
if ($self->{_debug}) {
print "Displaying transposed data matrix:";
display_matrix( $transposed );
}
my $covariance = matrix_multiply( $matrix, $transposed );
$covariance *= 1.0 / $num_cols;
if ($self->{_debug}) {
print "\nDisplaying the Covariance Matrix for your data:";
display_matrix( $covariance );
}
my ($eigenvalues, $eigenvectors) = $covariance->eigenpair;
my $num_of_eigens = @$eigenvalues;
my $largest_eigen_index = 0;
my $smallest_eigen_index = 0;
print "Eigenvalue 0: $eigenvalues->[0]\n" if $self->{_debug};
foreach my $i (1..$num_of_eigens-1) {
$largest_eigen_index = $i if $eigenvalues->[$i] > $eigenvalues->[$largest_eigen_index];
$smallest_eigen_index = $i if $eigenvalues->[$i] < $eigenvalues->[$smallest_eigen_index];
print "Eigenvalue $i: $eigenvalues->[$i]\n" if $self->{_debug};
}
print "\nlargest eigen index: $largest_eigen_index\n" if $self->{_debug};
print "\nsmallest eigen index: $smallest_eigen_index\n\n" if $self->{_debug};
foreach my $i (0..$num_of_eigens-1) {
my @vec = $eigenvectors->[$i]->as_list;
print "Eigenvector $i: @vec\n" if $self->{_debug};
}
my @largest_eigen_vec = $eigenvectors->[$largest_eigen_index]->as_list;
print "\nLargest eigenvector: @largest_eigen_vec\n" if $self->{_debug};
my @max_var_direction;
# Each element of the array @largest_eigen_vec is a Math::Complex object
foreach my $k (0..@largest_eigen_vec-1) {
my ($mag, $theta) = $largest_eigen_vec[$k] =~ /\[(\d*\.\d+),(\S+)\]/;
if ($theta eq '0') {
$max_var_direction[$k] = $mag;
} elsif ($theta eq 'pi') {
$max_var_direction[$k] = -1.0 * $mag;
} else {
die "eigendecomposition of covariance matrix produced a complex " .
"eigenvector --- something is wrong";
}
}
print "\nMaximum Variance Direction: @max_var_direction\n\n" if $self->{_debug};
# We now project all data points on the largest eigenvector.
# Each projection will yield a single point on the eigenvector.
my @projections;
foreach my $j (0..$self->{_N}-1) {
my $tag = $self->{_data_id_tags}[$j];
my $data = $self->{_data}->{$tag};
die "Dimensionality of the largest eigenvector does not "
. "match the dimensionality of the data"
unless @max_var_direction == $self->{_data_dimensions};
my $projection = vector_multiply($data, \@max_var_direction);
push @projections, $projection;
}
print "All projection points: @projections\n" if $self->{_debug};
my @peak_points = find_peak_points_in_given_direction(\@projections, $K);
print "highest points at points along largest eigenvec: @peak_points\n" if $self->{_debug};
my @cluster_centers;
foreach my $peakpoint (@peak_points) {
my @actual_peak_coords = map {$peakpoint * $_} @max_var_direction;
push @cluster_centers, \@actual_peak_coords;
}
return \@cluster_centers;
}
# This method is invoked when you choose the 'smart' option for the "cluster_seeding"
# option in the constructor. It is called by the previous method to locate K peaks
# in a smoothed histogram of the data points projected onto the maximal variance
# direction.
sub find_peak_points_in_given_direction {
my $dataref = shift;
my $how_many = shift;
my @data = @$dataref;
my ($min, $max) = minmax(\@data);
my $num_points = @data;
my @sorted_data = sort {$a <=> $b} @data;
my $scale = $max - $min;
foreach my $index (0..$#sorted_data-1) {
$sorted_data[$index] = ($sorted_data[$index] - $min) / $scale;
}
my $avg_diff = 0;
foreach my $index (0..$#sorted_data-1) {
my $diff = $sorted_data[$index+1] - $sorted_data[$index];
$avg_diff += ($diff - $avg_diff) / ($index + 1);
}
my $delta = 1.0 / 1000.0;
my @accumulator = (0) x 1000;
foreach my $index (0..@sorted_data-1) {
my $cell_index = int($sorted_data[$index] / $delta);
my $smoothness = 40;
for my $index ($cell_index-$smoothness..$cell_index+$smoothness) {
next if $index < 0 || $index > 999;
$accumulator[$index]++;
}
}
my $peaks_array = non_maximum_suppression( \@accumulator );
my $peaks_index_hash = get_value_index_hash( $peaks_array );
my @K_highest_peak_locations;
my $k = 0;
foreach my $peak (sort {$b <=> $a} keys %$peaks_index_hash) {
my $unscaled_peak_point = $min + $peaks_index_hash->{$peak} * $scale * $delta;
push @K_highest_peak_locations, $unscaled_peak_point
if $k < $how_many;
last if ++$k == $how_many;
}
return @K_highest_peak_locations;
}
# The purpose of this routine is to form initial clusters by assigning the data
# samples to the initial clusters formed by the previous routine on the basis of the
# best proximity of the data samples to the different cluster centers.
sub assign_data_to_clusters_initial {
my $self = shift;
my @cluster_centers = @{ shift @_ };
my @clusters;
foreach my $ele (@{$self->{_data_id_tags}}) {
my $best_cluster;
lib/Algorithm/KMeans.pm view on Meta::CPAN
foreach my $local_cluster (@$new_clusters) {
if ( (!defined $local_cluster) || (@$local_cluster == 0) ) {
push @$local_cluster, pop @$largest_cluster;
}
}
next if (($cluster_size_zero_condition) && ($iteration_index < 100));
last if $iteration_index == 100;
# Now do a deep copy of new_clusters into clusters
$clusters = deep_copy_AoA( $new_clusters );
last if $assignment_changed_flag == 0;
}
$final_cluster_centers = $self->update_cluster_centers( $clusters );
return ($clusters, $final_cluster_centers);
}
sub update_cluster_centers_and_covariances_mahalanobis {
my $self = shift;
my @clusters = @{ shift @_ };
my @new_cluster_centers;
my @new_cluster_covariances;
# During clustering for a fixed K, should a cluster inadvertently become empty,
# steal a member from the largest cluster to hopefully spawn a new cluster:
my $largest_cluster;
foreach my $cluster (@clusters) {
next if !defined $cluster;
$largest_cluster = $cluster if !defined $largest_cluster;
if (@$cluster > @$largest_cluster) {
$largest_cluster = $cluster;
}
}
foreach my $cluster (@clusters) {
if ( (!defined $cluster) || (@$cluster == 0) ) {
push @$cluster, pop @$largest_cluster;
}
}
foreach my $cluster (@clusters) {
die "Cluster became empty --- untenable condition " .
"for a given K. Try again. \n" if !defined $cluster;
my $cluster_size = @$cluster;
die "Cluster size is zero --- untenable.\n" if $cluster_size == 0;
my @new_cluster_center = @{$self->add_point_coords( $cluster )};
@new_cluster_center = map {my $x = $_/$cluster_size; $x} @new_cluster_center;
push @new_cluster_centers, \@new_cluster_center;
# for covariance calculation:
my ($num_rows,$num_cols) = ($self->{_data_dimensions}, scalar(@$cluster));
my $matrix = Math::GSL::Matrix->new($num_rows,$num_cols);
my $mean_vec = Math::GSL::Matrix->new($num_rows,1);
# All the record labels are stored in the array $self->{_data_id_tags}. The
# actual data for clustering is stored in a hash at $self->{_data} whose keys are
# the record labels; the value associated with each key is the array holding the
# corresponding numerical multidimensional data.
foreach my $j (0..$num_cols-1) {
my $tag = $cluster->[$j];
my $data = $self->{_data}->{$tag};
my @diff_from_mean = vector_subtract($data, \@new_cluster_center);
$matrix->set_col($j, \@diff_from_mean);
}
my $transposed = transpose( $matrix );
my $covariance = matrix_multiply( $matrix, $transposed );
$covariance *= 1.0 / $num_cols;
if ($self->{_debug}) {
print "\nDisplaying the Covariance Matrix for cluster:";
display_matrix( $covariance );
}
push @new_cluster_covariances, $covariance;
}
return [\@new_cluster_centers, \@new_cluster_covariances];
}
# After each new assignment of the data points to the clusters on the basis of the
# current values for the cluster centers, we call the routine shown here for updating
# the values of the cluster centers.
sub update_cluster_centers {
my $self = shift;
my @clusters = @{ shift @_ };
my @new_cluster_centers;
# During clustering for a fixed K, should a cluster inadvertently become empty,
# steal a member from the largest cluster to hopefully spawn a new cluster:
my $largest_cluster;
foreach my $cluster (@clusters) {
next if !defined $cluster;
$largest_cluster = $cluster if !defined $largest_cluster;
if (@$cluster > @$largest_cluster) {
$largest_cluster = $cluster;
}
}
foreach my $cluster (@clusters) {
if ( (!defined $cluster) || (@$cluster == 0) ) {
push @$cluster, pop @$largest_cluster;
}
}
foreach my $cluster (@clusters) {
die "Cluster became empty --- untenable condition " .
"for a given K. Try again. \n" if !defined $cluster;
my $cluster_size = @$cluster;
die "Cluster size is zero --- untenable.\n" if $cluster_size == 0;
my @new_cluster_center = @{$self->add_point_coords( $cluster )};
@new_cluster_center = map {my $x = $_/$cluster_size; $x}
@new_cluster_center;
push @new_cluster_centers, \@new_cluster_center;
}
return \@new_cluster_centers;
}
sub which_cluster_for_new_data_element {
my $self = shift;
my $ele = shift;
die "The dimensionality of the new data element is not correct: $!"
unless @$ele == $self->{_data_dimensions};
my %distance_to_new_ele_hash;
foreach my $cluster_id (sort keys %{$self->{_cluster_centers_hash}}) {
$distance_to_new_ele_hash{$cluster_id} = $self->distance2($ele,
$self->{_cluster_centers_hash}->{$cluster_id});
}
my @values = values %distance_to_new_ele_hash;
my ($min,$max) = minmax(\@values);
my $answer;
foreach my $cluster_id (keys %distance_to_new_ele_hash) {
$answer = $cluster_id if $distance_to_new_ele_hash{$cluster_id} == $min;
}
return $answer;
lib/Algorithm/KMeans.pm view on Meta::CPAN
} else {
die "covariance matrix may not have an inverse";
}
my $determinant = $covariance_inverse->det();
my $distance = $transposed * $covariance_inverse * $vec;
my @distance = $distance->as_list;
$distance = $distance[0];
return sqrt $distance;
}
# Same as the previous method except that the first argument can be the actual
# coordinates of the data element. Our goal is to find the Mahalanobis distance
# from a given data element to a cluster whose center and covariance are known. As
# for the previous method, it requires three arguments.
sub distance_mahalanobis2 {
my $self = shift;
my $ele = shift; # is now a ref to the array of coords for a point
my $cluster_center = shift;
my $cluster_covariance = shift;
return undef unless defined $cluster_covariance;
my $det_of_covar = $cluster_covariance->det();
my @diff_from_mean = vector_subtract($ele, $cluster_center);
my $vec = Math::GSL::Matrix->new($self->{_data_dimensions},1);
$vec->set_col(0, \@diff_from_mean);
my $transposed = transpose( $vec );
my $covariance_inverse;
if ($cluster_covariance->det() > 0.001) {
$covariance_inverse = $cluster_covariance->inverse;
} else {
die "covariance matrix may not have an inverse";
}
my $determinant = $covariance_inverse->det();
my $distance = $transposed * $covariance_inverse * $vec;
my @distance = $distance->as_list;
$distance = $distance[0];
return sqrt $distance;
}
sub estimate_cluster_covariance {
my $self = shift;
my $cluster = shift;
my $cluster_size = @$cluster;
my @cluster_center = @{$self->add_point_coords( $cluster )};
@cluster_center = map {my $x = $_/$cluster_size; $x} @cluster_center;
# for covariance calculation:
my ($num_rows,$num_cols) = ($self->{_data_dimensions}, scalar(@$cluster));
my $matrix = Math::GSL::Matrix->new($num_rows,$num_cols);
my $mean_vec = Math::GSL::Matrix->new($num_rows,1);
# All the record labels are stored in the array $self->{_data_id_tags}. The
# actual data for clustering is stored in a hash at $self->{_data} whose keys are
# the record labels; the value associated with each key is the array holding the
# corresponding numerical multidimensional data.
foreach my $j (0..$num_cols-1) {
my $tag = $cluster->[$j];
my $data = $self->{_data}->{$tag};
my @diff_from_mean = vector_subtract($data, \@cluster_center);
$matrix->set_col($j, \@diff_from_mean);
}
my $transposed = transpose( $matrix );
my $covariance = $matrix * $transposed;
$covariance *= 1.0 / $num_cols;
if ($self->{_debug}) {
print "\nDisplaying the Covariance Matrix for cluster:";
display_matrix( $covariance );
}
return $covariance;
}
sub write_clusters_to_files {
my $self = shift;
my @clusters = @{$self->{_clusters}};
unlink glob "cluster*.dat";
foreach my $i (0..@clusters-1) {
my $filename = "cluster" . $i . ".txt";
print "\nWriting cluster $i to file $filename\n" if $self->{_terminal_output};
open FILEHANDLE, "| sort > $filename" or die "Unable to open file: $!";
foreach my $ele (@{$clusters[$i]}) {
print FILEHANDLE "$ele ";
}
close FILEHANDLE;
}
}
sub get_K_best {
my $self = shift;
croak "You need to run the clusterer with K=0 option " .
"before you can call this method" if $self->{_K_best} eq 'unknown';
print "\nThe best value of K: $self->{_K_best}\n" if $self->{_terminal_output};
return $self->{_K_best};
}
sub show_QoC_values {
my $self = shift;
croak "\nYou need to run the clusterer with K=0 option before you can call this method"
if $self->{_K_best} eq 'unknown';
print "\nShown below are K on the left and the QoC values on the right (the smaller " .
"the QoC, the better it is)\n";
foreach my $key (sort keys %{$self->{_QoC_values}} ) {
print " $key => $self->{_QoC_values}->{$key}\n" if defined $self->{_QoC_values}->{$key};
}
}
sub DESTROY {
unlink "__temp_" . basename($_[0]->{_datafile});
unlink "__temp_data_" . basename($_[0]->{_datafile});
unlink "__temp_normed_data_" . basename($_[0]->{_datafile});
}
################################## Visualization Code ###################################
# It makes sense to call visualize_clusters() only AFTER you have called kmeans().
#
# The visualize_clusters() implementation automatically figures out whether it
# should do a 2D plot or a 3D plot. If the number of on bits in the mask that is
# supplied as one of the arguments is greater than 2, it does a 3D plot for the
# first three data coordinates. That is, the clusters will be displayed in the 3D
# space formed by the first three data coordinates. On the other hand, if the number
# of on bits in the mask is exactly 2, it does a 2D plot. Should it happen that
# only one on bit is specified for the mask, visualize_clusters() aborts.
#
# The visualization code consists of first accessing each of clusters created by the
# kmeans() subroutine. Note that the clusters contain only the symbolic names for
lib/Algorithm/KMeans.pm view on Meta::CPAN
my $j = int rand( $i + 1 );
@$arr[$i, $j] = @$arr[$j, $i];
}
}
sub variance_normalization {
my %data_hash = %{shift @_};
my @all_data_points = values %data_hash;
my $dimensions = @{$all_data_points[0]};
my @data_projections;
foreach my $data_point (@all_data_points) {
my $i = 0;
foreach my $proj (@$data_point) {
push @{$data_projections[$i++]}, $proj;
}
}
my @variance_vec;
foreach my $vec (@data_projections) {
my ($mean, $variance) = mean_and_variance( $vec );
push @variance_vec, $variance;
}
my %new_data_hash;
while (my ($label, $data) = each(%data_hash) ) {
my @new_data;
foreach my $i (0..@{$data}-1) {
my $new = $data->[$i] / sqrt($variance_vec[$i]);
push @new_data, $data->[$i] / sqrt($variance_vec[$i]);
}
$new_data_hash{$label} = \@new_data;
}
return \%new_data_hash;
}
sub mean_and_variance {
my @data = @{shift @_};
my ($mean, $variance);
foreach my $i (1..@data) {
if ($i == 1) {
$mean = $data[0];
$variance = 0;
} else {
$mean = ( (($i-1)/$i) * $mean ) + $data[$i-1] / $i;
$variance = ( (($i-1)/$i) * $variance ) + ($data[$i-1]-$mean)**2 / ($i-1);
}
}
return ($mean, $variance);
}
sub check_for_illegal_params {
my @params = @_;
my @legal_params = qw / datafile
mask
K
Kmin
Kmax
terminal_output
write_clusters_to_files
do_variance_normalization
cluster_seeding
use_mahalanobis_metric
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
sub get_value_index_hash {
my $arr = shift;
my %hash;
foreach my $index (0..@$arr-1) {
$hash{$arr->[$index]} = $index if $arr->[$index] > 0;
}
return \%hash;
}
sub non_maximum_suppression {
my $arr = shift;
my @output = (0) x @$arr;
my @final_output = (0) x @$arr;
my %hash;
my @array_of_runs = ([$arr->[0]]);
foreach my $index (1..@$arr-1) {
if ($arr->[$index] == $arr->[$index-1]) {
push @{$array_of_runs[-1]}, $arr->[$index];
} else {
push @array_of_runs, [$arr->[$index]];
}
}
my $runstart_index = 0;
foreach my $run_index (1..@array_of_runs-2) {
$runstart_index += @{$array_of_runs[$run_index-1]};
if ($array_of_runs[$run_index]->[0] >
$array_of_runs[$run_index-1]->[0] &&
$array_of_runs[$run_index]->[0] >
$array_of_runs[$run_index+1]->[0]) {
my $run_center = @{$array_of_runs[$run_index]} / 2;
my $assignment_index = $runstart_index + $run_center;
$output[$assignment_index] = $arr->[$assignment_index];
}
}
if ($array_of_runs[-1]->[0] > $array_of_runs[-2]->[0]) {
$runstart_index += @{$array_of_runs[-2]};
my $run_center = @{$array_of_runs[-1]} / 2;
my $assignment_index = $runstart_index + $run_center;
$output[$assignment_index] = $arr->[$assignment_index];
}
if ($array_of_runs[0]->[0] > $array_of_runs[1]->[0]) {
my $run_center = @{$array_of_runs[0]} / 2;
$output[$run_center] = $arr->[$run_center];
}
return \@output;
( run in 1.155 second using v1.01-cache-2.11-cpan-0bd6704ced7 )