Algorithm-ClusterPoints

 view release on metacpan or  search on metacpan

lib/Algorithm/ClusterPoints.pm  view on Meta::CPAN

package Algorithm::ClusterPoints;

our $VERSION = '0.08';

use strict;
use warnings;

use constant sqr2 => sqrt(2);
use constant isqr2 => 1/sqr2;

use POSIX qw(floor ceil);
use List::Util qw(max min);
use Carp;

use Data::Dumper;

my $packing = ($] >= 5.008 ? 'w*' : 'V*');

sub new {
    @_ & 1 or croak 'Usage: Algorithm::ClusterPoints->new(%options)';
    my ($class, %opts) = @_;

    my $dimension = delete $opts{dimension};
    $dimension = 2 unless defined $dimension;
    $dimension < 1 and croak "positive dimension required";
    my $radius = delete $opts{radius};
    my $minimum_size = delete $opts{minimum_size};
    my $ordered = delete $opts{ordered};
    my $scales = delete $opts{scales};
    my $dimensional_groups = delete $opts{dimensional_groups};

    %opts and croak "unknown constructor option(s) '".join("', '", sort keys %opts)."'";

    my $self = bless { radius => 1.0,
                       minimum_size => 1,
                       ordered => 0,
                       dimension => $dimension,
                       coords => [ map [], 1..$dimension ],
                       scales => [ map 1, 1..$dimension ],
                       dimensional_groups => [[0..$dimension-1]],
                     }, $class;

    $self->radius($radius) if defined $radius;
    $self->minimum_size($minimum_size) if defined $minimum_size;
    $self->ordered($ordered) if defined $ordered;
    if (defined $scales) {
        ref $scales eq 'ARRAY' or croak 'ARRAY reference expected for "scales" option';
        $self->scales(@$scales);
    }
    if (defined $dimensional_groups) {
        ref $dimensional_groups eq 'ARRAY' or croak 'ARRAY reference expected for "dimensional_groups" option';
        $self->dimensional_groups(@$dimensional_groups);
    }
    $self;
}

sub add_point {
    my $self = shift;
    my $dimension = $self->{dimension};
    @_ % $dimension and croak 'coordinates list size is not a multiple of the problem dimension';
    delete $self->{_clusters};
    my $ix = @{$self->{coords}[0]};
    while (@_) {
        push @$_, shift
            for (@{$self->{coords}});
    }
    $ix;
}

*add_points = \&add_point;

sub point_coords {
    @_ == 2 or croak 'Usage: $clp->point_coords($index)';
    my ($self, $ix) = @_;
    my $top = $#{$self->{coords}[0]};

lib/Algorithm/ClusterPoints.pm  view on Meta::CPAN

            else {
                $to = each %cluster;
                $to_cells = ($cluster2cell{$to} ||= [$to]);
            }
            push @$to_cells, $cell;
            $cell2cluster{$cell} = $to;
        }
        else {
            $cell2cluster{$cell} = $cell;
        }
    }

    my @clusters;
    while (my ($cluster, $cells) = each %cluster2cell) {
        my @points = map @{delete $cell{$_}}, @$cells;
        if (@points >= $self->{minimum_size}) {
            @points = sort { $a <=> $b } @points if $self->{ordered};
            push @clusters, \@points;
        }
    }
    push @clusters, grep { @$_ >= $self->{minimum_size} } values %cell;

    @clusters = sort { $a->[0] <=> $b->[0] } @clusters if $self->{ordered};

    return \@clusters;
}

my %delta_hypercylinder; # cache

sub _delta_hypercylinder {

    my $dhc = $delta_hypercylinder{_hypercylinder_id @_} ||= do {
        my @subdimension;
        for my $group (@_) {
            $subdimension[$_] = @$group for @$group;
        }
        my @top = map ceil(sqrt($_)), @subdimension;
        # calculate minimum hypercube
        my @delta_hypercylinder = [];
        for my $dimension (0..$#subdimension) {
            my @next;
            for my $dhc (@delta_hypercylinder) {
                my $top = $top[$dimension];
                push @next, map [@$dhc, $_], -$top..$top;
            }
            @delta_hypercylinder = @next;
        }

        # filter out hyperpixels out of the hypercylinder
        for my $group (@_) {
            @delta_hypercylinder = grep {
                my $sum = 0;
                for (@$_[@$group]) {
                    my $min = ($_ ? abs($_) - 1 : 0);
                    $sum += $min * $min;
                }
                $sum < @$group;
            } @delta_hypercylinder;
        }

        # print Data::Dumper->Dump([\@delta_hypercylinder], [qw($hc)]);

        \@delta_hypercylinder
    };
    # print Data::Dumper->Dump([$dhc], [qw($hc)]);
    @$dhc;
}

sub _print_clusters {
    print join(',', @$_), "\n" for sort { $a->[0] <=> $b->[0] } @_;
}

sub _make_clusters_ix_any {
    my $self = shift;

    my $dimension = $self->{dimension};
    my @coords = $self->_scaled_coords;
    my $coords = \@coords;
    my $top = $#{$coords[0]};
    $top >= 0 or croak "points have not been added";
    my $groups = $self->{dimensional_groups};

    my (@fls, @ifls, @rifls);

    for my $group (@$groups) {
        my $istep = 1.000001 * sqrt(@$group);
        for my $dimension (@$group) {
            my $coord = $coords[$dimension];
            my $min = min @$coord;
            my @fl = map floor($istep * ($_ - $min)), @$coord;
            $fls[$dimension] = \@fl;
            my %ifl;
            my $c = 1;
            $ifl{$_} ||= $c++ for @fl;
            $ifls[$dimension] = \%ifl;
            my %rifl = reverse %ifl;
            $rifls[$dimension] = \%rifl;
        }
    }

    my %cell;
    my $dimension_top = $dimension - 1;
    for my $i (0..$top) {
        my $cell = pack $packing => map $ifls[$_]{$fls[$_][$i]}, 0..$dimension_top;
        push @{$cell{$cell}}, $i;
    }
    # print STDERR "\%cell:\n";
    # _print_clusters(values %cell);

    my %cell2cluster; # n to 1 relation
    my %cluster2cell;

    my @delta_hypercylinder = _delta_hypercylinder @$groups;
    # print STDERR "delta_hypercylinder\n", Dumper \@delta_hypercylinder;

    while(defined (my $cell = each %cell)) {
        my %cluster;
        my @ifl = unpack $packing => $cell;
        my @fl = map $rifls[$_]{$ifl[$_]}, 0..$dimension_top;

        for my $delta (@delta_hypercylinder) {
            # print STDERR "\$delta: @$delta\n";
            my @ifl = map { $ifls[$_]{$fl[$_] + $delta->[$_]} || next } 0..$dimension_top;
            # next if grep !defined, @ifl;
            my $neighbor = pack $packing => @ifl;



( run in 0.629 second using v1.01-cache-2.11-cpan-39bf76dae61 )