Algorithm-FuzzyCmeans
view release on metacpan or search on metacpan
lib/Algorithm/FuzzyCmeans.pm view on Meta::CPAN
package Algorithm::FuzzyCmeans;
use strict;
use warnings;
use base qw(Class::Accessor::Fast);
use Carp;
use List::MoreUtils qw(any);
use List::Util qw(shuffle);
use UNIVERSAL::require;
our $VERSION = '0.02';
__PACKAGE__->mk_accessors($_) for qw(vectors centroids memberships m distance);
use constant DEFAULT_M => 2.0;
sub new {
my $class = shift;
my $self = $class->SUPER::new( {@_} );
$self->vectors({}) if !$self->vectors;
$self->centroids([]) if !$self->centroids;
$self->memberships({}) if !$self->memberships;
$self->m(DEFAULT_M) if !defined $self->m;
croak '`m\' parameter must be more than 1.0' if $self->m <= 1.0;
my $dist_class = delete $self->{distance_class};
$dist_class ||= 'Algorithm::FuzzyCmeans::Distance::Cosine';
$dist_class->require or croak $@;
$self->distance($dist_class->new());
return $self;
}
sub add_document {
my ($self, $id, $vector) = @_;
return if !defined $id || !$vector;
$self->vectors->{$id} = $vector;
}
sub do_clustering {
my ($self, $num_cluster, $num_iter) = @_;
$self->_choose_random_centroids($num_cluster);
for (my $i = 0; $i < $num_iter; $i++) {
$self->_calc_memberships();
$self->_calc_centroids($num_cluster);
}
}
sub _choose_random_centroids {
my ($self, $num_centroid) = @_;
my @ids = keys %{ $self->vectors };
@ids = shuffle @ids;
my @centroids = map { $self->vectors->{$_} } @ids[0 .. $num_centroid-1];
$self->centroids(\@centroids);
}
sub _calc_memberships {
my $self = shift;
$self->memberships({});
my $num_centroid = scalar @{ $self->centroids };
foreach my $id (keys %{ $self->vectors }) {
my @distances;
foreach my $centroid (@{ $self->centroids }) {
my $dist = $self->distance->distance(
$self->vectors->{$id}, $centroid);
push @distances, $dist;
}
if (any { $_ == 0 } @distances) {
foreach my $dist (@distances) {
push @{ $self->memberships->{$id} }, $dist == 0 ? 1 : 0;
}
}
else {
for (my $i = 0; $i < $num_centroid; $i++) {
my $membership;
for (my $j = 0; $j < $num_centroid; $j++) {
my $x = $distances[$i] / $distances[$j];
$membership += $x * $x;
}
$membership **= (-1) / ($self->m - 1);
( run in 0.889 second using v1.01-cache-2.11-cpan-39bf76dae61 )