Algorithm-DistanceMatrix

 view release on metacpan or  search on metacpan

lib/Algorithm/DistanceMatrix.pm  view on Meta::CPAN

#!/usr/bin/env perl
# ABSTRACT: Compute distance matrix for any distance metric

package Algorithm::DistanceMatrix;
BEGIN {
  $Algorithm::DistanceMatrix::VERSION = '0.04';
}
use Moose;

has 'mode' =>(
    is => 'rw',
    isa => 'Str',
    default => 'lower',
    );

  
has 'metric' => (
    is=>'rw',
    isa=>'CodeRef',
    default=>sub{abs($_[0]-$_[1])},
    );


has 'objects' => (
    is => 'rw',
    isa => 'ArrayRef',
    );
    
    
sub distancematrix {
    my ($self, ) = @_;
    # Callback function
    my $metric = $self->metric;
    my $objects = $self->objects;
    my $n = @$objects;
    my $distances = [];
    for (my $i = 0; $i < $n; $i++) {
        # This initialization is required to prevent 'undef' at [0,0], 
        $distances->[$i] ||= [];
        # Diagonal or full matrix?
        my $start = $self->mode =~ /full/i ? 0 : $i+1;
        for (my $j = $start; $j < $n; $j++) {
            # Use a pointer, then determine if it's row-major or col-major order
            # Swap i and j if lower diagonal (default)
            my $ref = $self->mode =~ /lower/i ? 
                \$distances->[$j][$i] : \$distances->[$i][$j];  
            # Callback function provides the distance
            $$ref = $metric->($objects->[$i], $objects->[$j]);
        }
    }
    # Last diagonal element is undef, unless explicitly computed
    $distances->[$n-1] = [(undef)x$n] if $self->mode =~ /upper/i;
    return $distances;
}


__PACKAGE__->meta->make_immutable;
no Moose;
1;
__END__
=pod

=head1 NAME

Algorithm::DistanceMatrix - Compute distance matrix for any distance metric

=head1 VERSION

version 0.04

=head1 SYNOPSIS

 use Algorithm::DistanceMatrix;
 my $m = Algorithm::DistanceMatrix->new(
     metric=>\&mydistance,objects=\@myarray);
 my $distmatrix =  $m->distancematrix;
 
 use Algorithm::Cluster qw/treecluster/;
 # method=>
 # s: single-linkage clustering
 # http://en.wikipedia.org/wiki/Single-linkage_clustering
 # m: maximum- (or complete-) linkage clustering
 # http://en.wikipedia.org/wiki/Complete_linkage_clustering
 # a: average-linkage clustering (UPGMA)
 # http://en.wikipedia.org/wiki/UPGMA
 
 my $tree = treecluster(data=>$distmat, method=>'a');
 
 # Get your objects and the cluster IDs they belong to, assuming 5 clusters
 my $cluster_ids = $tree->cut(5);
 # Index corresponds to that of the original objects
 print $objects->[2], ' belongs to cluster ', $cluster_ids->[2], "\n";

=head1 DESCRIPTION

This is a small helper package for L<Algorithm::Cluster>. That module provides 
many facilities for clustering data. It also provides a C<distancematrix> function,
but assumes tabular data, which is the standard for gene expression data. 

If your data is tabular, you should first have a look at C<distancematrix> in
L<Algorithm::Cluster>



( run in 0.961 second using v1.01-cache-2.11-cpan-acebb50784d )