Algorithm-Cluster

 view release on metacpan or  search on metacpan

perl/Record.pm  view on Meta::CPAN

    my $expclusters;
    my $gene_cluster_type;
    my $exp_cluster_type;
    if (defined $param{geneclusters}) {
        $geneclusters = $param{geneclusters};
        if (ref($geneclusters) eq "ARRAY") {
            if (scalar @{$geneclusters} != $ngenes) {
                die "k-means solution found, but its size does not agree with the number of genes";
            }
            $gene_cluster_type = 'k'; # k-means clustering result
        }
        elsif (ref($geneclusters) eq "Algorithm::Cluster::Tree") {
            $gene_cluster_type = 'h'; # hierarchical clustering result
            my $n = $geneclusters->length;
            if ($n != $ngenes - 1) {
                die "Size of the hierarchical clustering tree ($n) should be equal to the number of genes ($ngenes) minus one";
            }
        }
        else {
            die "Cannot understand gene clustering result! $!";
        }
    }
    if (defined $param{expclusters}) {
        $expclusters = $param{expclusters};
        if (ref($expclusters) eq "ARRAY") {
            if (scalar @$expclusters != $nexps) {
                die "k-means solution found, but its size does not agree with the number of experiments";
            }
            $exp_cluster_type = 'k'; # k-means clustering result
        }
        elsif (ref($expclusters) eq "Algorithm::Cluster::Tree") {
            $exp_cluster_type = 'h'; # hierarchical clustering result
            my $n = $expclusters->length;
            if ($n != $nexps - 1) {
                die "Size of the hierarchical clustering tree ($n) should be equal to the number of experiments ($nexps) minus one";
            }
        }
        else {
            die "Cannot understand experiment clustering result! $!";
        }
    }
    my @gorder;
    if (defined $self->{gorder}) {
        @gorder = $self->{gorder};
    }
    else {
        @gorder = (0..$ngenes-1);
    }
    my @eorder;
    if (defined $self->{eorder}) {
        @eorder = $self->{eorder};
    }
    else {
        @eorder = (0..$nexps-1);
    }
    if (defined $gene_cluster_type and defined $exp_cluster_type) {
        if ($gene_cluster_type ne $exp_cluster_type) {
            die 'found one k-means and one hierarchical clustering solution in geneclusters and expclusters';
        }
    }
    my $gid = 0;
    my $aid = 0;
    my $filename = $jobname;
    my $postfix = '';
    my @geneindex;
    my @expindex;
    if ($gene_cluster_type eq 'h') {
        # Hierarchical clustering result
        @geneindex = _savetree(jobname   => $jobname,
                               tree      => $geneclusters,
                               order     => \@gorder,
                               transpose =>  0);
        $gid = 1;
    }
    elsif ($gene_cluster_type eq 'k') {
        # k-means clustering result
        $filename = $jobname . '_K';
        my $k = -1;
        foreach (@$geneclusters) {
            if ($_ > $k) {
                $k = $_;
            }
        }
        $k++;
        my $kggfilename = $jobname . "_K_G$k.kgg";
        @geneindex = $self->_savekmeans(filename => $kggfilename,
                                        clusterids => \@$geneclusters,
                                        order => \@gorder,
                                        transpose => 0);
        $postfix = "_G$k";
    }
    else {
        @geneindex = sort { $gorder[$a] <=> $gorder[$b] } (0..$ngenes-1);
    }
    if ($exp_cluster_type eq 'h') {
        # Hierarchical clustering result
        @expindex = _savetree(jobname   => $jobname,
                              tree      => $expclusters,
                              order     => \@eorder,
                              transpose =>  1);
        $aid = 1;
    }
    elsif ($exp_cluster_type eq 'k') {
        # k-means clustering result
        $filename = $jobname . '_K';
        my $k = -1;
        foreach (@$expclusters) {
            if ($_ > $k) {
                $k = $_;
            }
        }
        $k++;
        my $kagfilename = $jobname . "_K_A$k.kag";
        @expindex = $self->_savekmeans(filename => $kagfilename,
                                       clusterids => \@$expclusters,
                                       order => \@eorder,
                                       transpose => 1);
        $postfix = $postfix . "_A$k";
    }
    else {
        @expindex = sort { $eorder[$a] <=> $eorder[$b] } (0..$nexps-1);
    }
    $filename = $filename . $postfix;
    $self->_savedata(jobname    => $filename,
                     gid        => $gid,
                     aid        => $aid,
                     geneindex  => \@geneindex,
                     expindex   => \@expindex);
}

sub _savetree {
    my %param = @_;
    my $jobname = $param{jobname};
    my $tree = $param{tree};
    my @order = @{$param{order}};
    my $transpose = $param{transpose};
    my ($extension, $keyword);
    if ($transpose==0) {
        $extension = 'gtr';
        $keyword = 'GENE';
    }
    else {
        $extension = 'atr';
        $keyword = 'ARRY';
    }
    my $nnodes = $tree->length;
    open OUTPUT, ">$jobname.$extension" or die 'Error: Unable to open output file';
    my @nodeID = ('') x $nnodes;
    my @nodedist;
    my $i;
    for ($i = 0; $i < $nnodes; $i++) {
        my $node = $tree->get($i);
        push (@nodedist, $node->distance);
    }
    for (my $nodeindex = 0; $nodeindex < $nnodes; $nodeindex++) {
        my $min1 = $tree->get($nodeindex)->left;
        my $min2 = $tree->get($nodeindex)->right;
        $nodeID[$nodeindex] = "NODE" . ($nodeindex+1) . "X";
        print OUTPUT $nodeID[$nodeindex];
        print OUTPUT "\t";
        if ($min1 < 0) {
            my $index1 = -$min1-1;
            print OUTPUT $nodeID[$index1];
            print OUTPUT "\t";
            if ($nodedist[$index1] > $nodedist[$nodeindex]) {
            	$nodedist[$nodeindex] = $nodedist[$index1];
            }
        }
        else {
            print OUTPUT $keyword . $min1 . "X\t";
        }
        if ($min2 < 0) {
            my $index2 = -$min2-1;
            print OUTPUT $nodeID[$index2];
            print OUTPUT "\t";
            if ($nodedist[$index2] > $nodedist[$nodeindex]) {
            	$nodedist[$nodeindex] = $nodedist[$index2];
            }
        }
        else {
            print OUTPUT $keyword . $min2 . "X\t";
        }
        print OUTPUT 1.0-$nodedist[$nodeindex];
        print OUTPUT "\n";
    }
    close(OUTPUT);
    # Now set up order based on the tree structure
    return $tree->sort(\@order);
}

sub _savekmeans {
    my ($self, %param) = @_;
    my $filename = $param{filename};
    my @clusterids = @{$param{clusterids}};
    my @order = @{$param{order}};
    my $transpose = $param{transpose};
    my $label;
    my @names;
    if ($transpose == 0) {
        $label = $self->{uniqid};
        @names = @{$self->{geneid}};
    }
    else {
        $label = 'ARRAY';
        @names = @{$self->{expid}};
    }
    open OUTPUT, ">$filename" or die 'Error: Unable to open output file';
    print OUTPUT "$label\tGROUP\n";
    my $n = scalar @names;
    my @result = sort { $order[$a] <=> $order[$b] } (0..$n-1);
    my @sortedindex;
    my $cluster = 0;
    while (scalar @sortedindex < $n) {
        foreach (@result) {
            my $j = $_;
            my $cid = $clusterids[$j];
            if ($clusterids[$j]==$cluster) {
                print OUTPUT $names[$j] . "\t$cluster\n";
                push (@sortedindex, $j);
            }
        }
        $cluster++;
    }
    close(OUTPUT);
    return @sortedindex;
}

sub _savedata {
    my ($self, %param) = @_;
    my $jobname = $param{jobname};
    my $gid = $param{gid};
    my $aid = $param{aid};
    my @geneindex = @{$param{geneindex}};
    my @expindex = @{$param{expindex}};
    my @genename;
    if (defined $self->{genename}) {
        @genename = @{$self->{genename}};
    }
    else {
        @genename = @{$self->{geneid}};
    }
    my $ngenes = scalar @{$self->{geneid}};
    my $nexps = scalar @{$self->{expid}};
    open OUTPUT, ">$jobname.cdt" or die 'Error: Unable to open output file';
    my @mask;
    if (defined $self->{mask}) {
        @mask = @{$self->{mask}};
    }
    else {
        @mask = ([(1) x $nexps]) x $ngenes;
        # Each row contains identical shallow copies of the same vector;
        # modifying one row would affect the other rows.
    }
    my @gweight;
    if (defined $self->{gweight}) {
        @gweight = @{$self->{gweight}};
    }
    else {
        @gweight = (1) x $ngenes;
    }
    my @eweight;
    if (defined $self->{eweight}) {
        @eweight = @{$self->{eweight}};
    }
    else {
        @eweight = (1) x $nexps;
    }
    if ($gid) {
    	print OUTPUT "GID\t";
    }
    print OUTPUT $self->{uniqid};
    print OUTPUT "\tNAME\tGWEIGHT";
    # Now add headers for data columns
    foreach (@expindex) {
        print OUTPUT "\t" . $self->{expid}[$_];
    }
    print OUTPUT "\n";
    if ($aid) {
        print OUTPUT "AID";
        if ($gid) {
            print OUTPUT "\t";
        }
        print OUTPUT "\t\t";
        foreach (@expindex) {
            print OUTPUT "\tARRY" . $_ . 'X';
        }
        print OUTPUT "\n";
    }
    print OUTPUT "EWEIGHT";
    if ($gid) {
        print OUTPUT "\t";
    }
    print OUTPUT "\t\t";
    foreach (@expindex) {
        print OUTPUT "\t" . $eweight[$_];
    }
    print OUTPUT "\n";
    foreach (@geneindex) {
        my $i = $_;
        if ($gid) {
            print OUTPUT "GENE" . $i . "X\t";
        }
        print OUTPUT $self->{geneid}[$i] . "\t" . $genename[$i] . "\t" . $gweight[$i];
        foreach (@expindex) {
            my $j = $_;
            print OUTPUT "\t";
            if ($mask[$i][$j]) {
                print OUTPUT $self->{data}[$i][$j];
            }
        }
        print OUTPUT "\n";
    }
    close(OUTPUT);
}

1;



( run in 1.207 second using v1.01-cache-2.11-cpan-5735350b133 )