Algorithm-FuzzyCmeans

 view release on metacpan or  search on metacpan

lib/Algorithm/FuzzyCmeans.pm  view on Meta::CPAN

package Algorithm::FuzzyCmeans;

use strict;
use warnings;
use base qw(Class::Accessor::Fast);
use Carp;
use List::MoreUtils qw(any);
use List::Util qw(shuffle);
use UNIVERSAL::require;

our $VERSION = '0.02';

__PACKAGE__->mk_accessors($_) for qw(vectors centroids memberships m distance);

use constant DEFAULT_M => 2.0;

sub new {
    my $class = shift;
    my $self = $class->SUPER::new( {@_} );
    $self->vectors({})  if !$self->vectors;
    $self->centroids([])  if !$self->centroids;
    $self->memberships({})  if !$self->memberships;
    $self->m(DEFAULT_M) if !defined $self->m;
    croak '`m\' parameter must be more than 1.0' if $self->m <= 1.0;

    my $dist_class = delete $self->{distance_class};
    $dist_class ||= 'Algorithm::FuzzyCmeans::Distance::Cosine';
    $dist_class->require or croak $@;
    $self->distance($dist_class->new());
    return $self;
}

sub add_document {
    my ($self, $id, $vector) = @_;
    return if !defined $id || !$vector;
    $self->vectors->{$id} = $vector;
}

sub do_clustering {
    my ($self, $num_cluster, $num_iter) = @_;
    $self->_choose_random_centroids($num_cluster);
    for (my $i = 0; $i < $num_iter; $i++) {
        $self->_calc_memberships();
        $self->_calc_centroids($num_cluster);
    }
}

sub _choose_random_centroids {
    my ($self, $num_centroid) = @_;
    my @ids = keys %{ $self->vectors };
    @ids = shuffle @ids;
    my @centroids = map { $self->vectors->{$_} } @ids[0 .. $num_centroid-1];
    $self->centroids(\@centroids);
}

sub _calc_memberships {
    my $self = shift;
    $self->memberships({});
    my $num_centroid = scalar @{ $self->centroids };
    foreach my $id (keys %{ $self->vectors }) {
        my @distances;
        foreach my $centroid (@{ $self->centroids }) {
            my $dist = $self->distance->distance(
                $self->vectors->{$id}, $centroid);
            push @distances, $dist;
        }
        if (any { $_ == 0 } @distances) {
            foreach my $dist (@distances) {
                push @{ $self->memberships->{$id} }, $dist == 0 ? 1 : 0;
            }
        }
        else {
            for (my $i = 0; $i < $num_centroid; $i++) {
                my $membership;
                for (my $j = 0; $j < $num_centroid; $j++) {
                    my $x = $distances[$i] / $distances[$j];
                    $membership += $x * $x;
                }
                $membership **= (-1) / ($self->m - 1);



( run in 0.889 second using v1.01-cache-2.11-cpan-39bf76dae61 )