Bio-BPWrapper

 view release on metacpan or  search on metacpan

lib/Bio/BPWrapper/TreeManipulations.pm  view on Meta::CPAN

sub edge2tree {
    my $edgeFile = shift;
    open EG, "<", $edgeFile || die "can't read parent-child edge file\n";
    $rootnode = Bio::Tree::Node->new(-id=>'root');
    my $tr = Bio::Tree::Tree->new();
    my @nds = ($rootnode);
    my %parent;
    my %seen_edge;
    while(<EG>) {
	next unless /^(\S+)\s+(\S+)/;
	my ($pa, $ch) = ($1, $2);
	$seen_edge{$pa}{$ch}++; # number of events
	$parent{$ch} = $pa unless $parent{$ch}; # seen before
    }
    close EG;

#    print Dumper(\%parent);

    my %seen_parent;
    my %add_node;
#    my @nds;
    foreach my $ch (keys %parent) {
	my $pa = $parent{$ch};
	$seen_parent{$pa}++; 
	push @nds, Bio::Tree::Node->new(-id=>$ch, -branch_length => $seen_edge{$pa}{$ch});
	$add_node{$ch}++;
    }

    # special treatment to set outgroup (which has no parent specified in the edge table) as root
    # add all as chid of root
    foreach my $pa (keys %seen_parent) {
	next if $add_node{$pa};
	my $orphan = Bio::Tree::Node->new(-id=>$pa, -branch_length => 1); # ST213 in test file "edges-pars.tsv"
	push @nds, $orphan;
	$parent{$pa} = 'root';
    }

    # attach descendant
#    my %attached;
    foreach my $node (@nds) {
	next if $node eq $rootnode;
	my $id = $node->id(); # print $id, "\t";
	my $p_id = $parent{$id}; # print $p_id, "\n";
#	next if $attached{$p_id}++;
	my @nds = grep { $_->id() eq $p_id } @nds;
	if (@nds) {
	    my $p_node = shift @nds;
	    $p_node->add_Descendent($node);
	} else {
	    die "no parent $id\n";
	}
    }    
    $tr->set_root_node($rootnode);
    return $tr;
}


sub reorder_by_ref {
    die "reference node id missing\n" unless $opts{'ref'};
    my $id = 0;
    &_flip_if_not_in_top_clade($rootnode, $opts{'ref'}, \$id);
    $print_tree = 1;
}

sub _flip_if_not_in_top_clade { # by resetting creation_id & sortby option of each_Descendent
    my ($nd, $ref, $refid) = @_;
    $nd->_creation_id($$refid++);
#    print STDERR $nd->internal_id(), ":\t";
    if ($nd->is_Leaf()) {
#	print STDERR "\n";
	return }
    my @des = $nd->each_Descendent();
    my @des_reordered;
    for (my $i=0; $i<=$#des; $i++) {
	my $in_des = 0;
	my $id = $des[$i]->internal_id();
	my @otus = &_each_leaf($des[$i]);
	foreach (@otus) { $in_des = 1 if $ref eq $_->id }
#	print STDERR join("|", map { $_->id } @otus), " => ", $in_des, ";";
	if ($in_des) {
	    unshift @des_reordered, $des[$i];
	} else {
	    push @des_reordered, $des[$i];
	}
    }
    foreach (@des_reordered) {
	$_->_creation_id($$refid++);
#	print STDERR $_->internal_id(), ";";
    }
#    print STDERR "\n";
    my @des_new = $nd->each_Descendent('creation'); # key sort function!!
    foreach my $de (@des_new) {
	&_flip_if_not_in_top_clade($de, $opts{'ref'}, $refid);
    }
}


# trim a node to a single OTU representative if all branch lengths of its descendant OTUs <= $cut
sub trim_tips {
    die "Usage: $0 --trim-tips <num>\n" unless $opts{'trim-tips'};
    my $cut = $opts{'trim-tips'};

    my @trim_nodes;
    my $group_ct = 0;
    &identify_nodes_to_trim_by_walk_from_root($rootnode, \$cut, \@trim_nodes, \$group_ct);
    my %otu_sets;
    my $set_ct = 1;
    foreach my $ref_trim (@trim_nodes) {
	foreach (@$ref_trim) {
	    $otu_sets{$_} = $set_ct;
	}
	$set_ct++;
#	print STDERR join "\t", sort @$ref_trim;
#	print STDERR "\n";
    }


    foreach (@otus) {
	next if $otu_sets{$_->id};
	$otu_sets{$_->id} = $set_ct++;
    }
    #    print Dumper(\%otu_sets);
    print STDERR "#Trim tree from tip  with a cutoff of d=$cut\n";
    print STDERR "otu\tnr_set_id\n";
    foreach (sort {$otu_sets{$a} <=> $otu_sets{$b}} keys %otu_sets) {
	print STDERR $_, "\t", $otu_sets{$_}, "\n";
    }

    $print_tree = 1;
}

sub identify_nodes_to_trim_by_walk_from_root {
    my $node = shift; # internal node only
    my $ref_cut = shift;
    my $ref_group = shift;
    my $ref_ct = shift;

    return if $node->is_Leaf;
    my %des_otus; # save distances
    my $trim = 1; # default to trim
    # trim a node if all OTU distance to it is <= cut
    foreach my $des ($node -> get_all_Descendents()) {
	next unless $des->is_Leaf;
	#push @des_otus, $des;
	# distance to a desc OTU
	my $dist = $tree->distance($node, $des);
	$des_otus{$des->id} = { 'otu' => $des, 'dist' => $dist };
	$trim = 0 if $dist > $$ref_cut; # don't trim is any distance to OTU > $cut
    }

    if ($trim) { # trim &  retain the first OTU; stop descending
	my @leafs = sort keys %des_otus; # make a copy
	my $pa = $node->ancestor();



( run in 0.690 second using v1.01-cache-2.11-cpan-39bf76dae61 )