Algorithm-Cluster

 view release on metacpan or  search on metacpan

perl/Record.pm  view on Meta::CPAN

    if (defined $param{geneclusters}) {
        $geneclusters = $param{geneclusters};
        if (ref($geneclusters) eq "ARRAY") {
            if (scalar @{$geneclusters} != $ngenes) {
                die "k-means solution found, but its size does not agree with the number of genes";
            }
            $gene_cluster_type = 'k'; # k-means clustering result
        }
        elsif (ref($geneclusters) eq "Algorithm::Cluster::Tree") {
            $gene_cluster_type = 'h'; # hierarchical clustering result
            my $n = $geneclusters->length;
            if ($n != $ngenes - 1) {
                die "Size of the hierarchical clustering tree ($n) should be equal to the number of genes ($ngenes) minus one";
            }
        }
        else {
            die "Cannot understand gene clustering result! $!";
        }
    }
    if (defined $param{expclusters}) {
        $expclusters = $param{expclusters};
        if (ref($expclusters) eq "ARRAY") {
            if (scalar @$expclusters != $nexps) {
                die "k-means solution found, but its size does not agree with the number of experiments";
            }
            $exp_cluster_type = 'k'; # k-means clustering result
        }
        elsif (ref($expclusters) eq "Algorithm::Cluster::Tree") {
            $exp_cluster_type = 'h'; # hierarchical clustering result
            my $n = $expclusters->length;
            if ($n != $nexps - 1) {
                die "Size of the hierarchical clustering tree ($n) should be equal to the number of experiments ($nexps) minus one";
            }
        }
        else {
            die "Cannot understand experiment clustering result! $!";
        }
    }
    my @gorder;
    if (defined $self->{gorder}) {
        @gorder = $self->{gorder};
    }
    else {
        @gorder = (0..$ngenes-1);
    }
    my @eorder;
    if (defined $self->{eorder}) {
        @eorder = $self->{eorder};
    }
    else {
        @eorder = (0..$nexps-1);
    }
    if (defined $gene_cluster_type and defined $exp_cluster_type) {
        if ($gene_cluster_type ne $exp_cluster_type) {
            die 'found one k-means and one hierarchical clustering solution in geneclusters and expclusters';
        }
    }
    my $gid = 0;
    my $aid = 0;
    my $filename = $jobname;
    my $postfix = '';
    my @geneindex;
    my @expindex;
    if ($gene_cluster_type eq 'h') {
        # Hierarchical clustering result
        @geneindex = _savetree(jobname   => $jobname,
                               tree      => $geneclusters,
                               order     => \@gorder,
                               transpose =>  0);
        $gid = 1;
    }
    elsif ($gene_cluster_type eq 'k') {
        # k-means clustering result
        $filename = $jobname . '_K';
        my $k = -1;
        foreach (@$geneclusters) {
            if ($_ > $k) {
                $k = $_;
            }
        }
        $k++;
        my $kggfilename = $jobname . "_K_G$k.kgg";
        @geneindex = $self->_savekmeans(filename => $kggfilename,
                                        clusterids => \@$geneclusters,
                                        order => \@gorder,
                                        transpose => 0);
        $postfix = "_G$k";
    }
    else {
        @geneindex = sort { $gorder[$a] <=> $gorder[$b] } (0..$ngenes-1);
    }
    if ($exp_cluster_type eq 'h') {
        # Hierarchical clustering result
        @expindex = _savetree(jobname   => $jobname,
                              tree      => $expclusters,
                              order     => \@eorder,
                              transpose =>  1);
        $aid = 1;
    }
    elsif ($exp_cluster_type eq 'k') {
        # k-means clustering result
        $filename = $jobname . '_K';
        my $k = -1;
        foreach (@$expclusters) {
            if ($_ > $k) {
                $k = $_;
            }
        }
        $k++;
        my $kagfilename = $jobname . "_K_A$k.kag";
        @expindex = $self->_savekmeans(filename => $kagfilename,
                                       clusterids => \@$expclusters,
                                       order => \@eorder,
                                       transpose => 1);
        $postfix = $postfix . "_A$k";
    }
    else {
        @expindex = sort { $eorder[$a] <=> $eorder[$b] } (0..$nexps-1);
    }
    $filename = $filename . $postfix;
    $self->_savedata(jobname    => $filename,
                     gid        => $gid,
                     aid        => $aid,
                     geneindex  => \@geneindex,
                     expindex   => \@expindex);
}

sub _savetree {
    my %param = @_;
    my $jobname = $param{jobname};
    my $tree = $param{tree};
    my @order = @{$param{order}};
    my $transpose = $param{transpose};
    my ($extension, $keyword);
    if ($transpose==0) {
        $extension = 'gtr';
        $keyword = 'GENE';
    }
    else {
        $extension = 'atr';
        $keyword = 'ARRY';
    }
    my $nnodes = $tree->length;
    open OUTPUT, ">$jobname.$extension" or die 'Error: Unable to open output file';
    my @nodeID = ('') x $nnodes;
    my @nodedist;
    my $i;
    for ($i = 0; $i < $nnodes; $i++) {
        my $node = $tree->get($i);
        push (@nodedist, $node->distance);
    }
    for (my $nodeindex = 0; $nodeindex < $nnodes; $nodeindex++) {
        my $min1 = $tree->get($nodeindex)->left;
        my $min2 = $tree->get($nodeindex)->right;
        $nodeID[$nodeindex] = "NODE" . ($nodeindex+1) . "X";
        print OUTPUT $nodeID[$nodeindex];
        print OUTPUT "\t";
        if ($min1 < 0) {
            my $index1 = -$min1-1;
            print OUTPUT $nodeID[$index1];
            print OUTPUT "\t";
            if ($nodedist[$index1] > $nodedist[$nodeindex]) {
            	$nodedist[$nodeindex] = $nodedist[$index1];
            }
        }
        else {
            print OUTPUT $keyword . $min1 . "X\t";
        }
        if ($min2 < 0) {
            my $index2 = -$min2-1;
            print OUTPUT $nodeID[$index2];
            print OUTPUT "\t";
            if ($nodedist[$index2] > $nodedist[$nodeindex]) {
            	$nodedist[$nodeindex] = $nodedist[$index2];
            }
        }
        else {
            print OUTPUT $keyword . $min2 . "X\t";
        }
        print OUTPUT 1.0-$nodedist[$nodeindex];



( run in 0.798 second using v1.01-cache-2.11-cpan-ceb78f64989 )