Bio-MUST-Tools-TreeParsing
view release on metacpan or search on metacpan
bin/tree-clan-splitter.pl view on Meta::CPAN
my $ali = Ali->load($alifile);
$ali->degap_seqs;
### Storing clan in: $clanzerofile
$ali->store($clanzerofile);
}
### Moving to next tree
next TREE;
}
### Reading clans for: $stripped_intree
# my $clan_hash;
my $clan_hash = get_clan_hash($boot_hash);
### Done reading clans for: $stripped_intree
my @ps_clans;
my @sorted_clans = sort { $clan_hash->{size}{$a} <=> $clan_hash->{size}{$b} }
keys %{ $clan_hash->{size} };
#### $clan_hash
#### @sorted_clans
SPLIT_CLAN:
for my $clan (@sorted_clans) {
my @idxs = @{ $clan_hash->{idxs}{$clan} };
my @seq_ids = map { $seq_for{$_}{seq_id} } @idxs;
my @full_orgs = map { $_->full_org } @seq_ids;
##### $clan
##### @idxs
# skip clans with only one species
next SPLIT_CLAN if scalar uniq (@full_orgs) < 2;
my @groups = map { $seq_for{$_}{group} } @idxs;
my ($type) = uniq(@groups);
# keep only strictly targeted clans
next SPLIT_CLAN unless scalar uniq (@groups) == 1;
next SPLIT_CLAN unless $type eq 'target';
push @ps_clans, $clan;
##### $type
##### $clan
##### @idxs
##### @groups
}
#### @ps_clans
unless ( scalar @ps_clans ) {
### No targeted clan(s) found for: $stripped_intree
next TREE;
}
### Checking clans layout...
my ($is_included, $composition_for) = check_inclusions($clan_hash, \@ps_clans);
my @main_clans = grep { ! %$is_included{$_} } @ps_clans;
#### @main_clans
# fit ali to ali2phylip filtering
my $alifile = $stripped_intree . '.ali';
my $base_ali = Ali->load($alifile);
$base_ali->dont_guess;
my $alist = $tree->alphabetical_list;
#### $list
my $ali = $alist->filtered_ali($base_ali);
my $n_ali;
IDLIST:
for my $main_clan ( @main_clans ) {
$n_ali++;
#### $main_clan
my @clan_idxs = @{ $clan_hash->{idxs}{$main_clan} };
my @seq_ids = map { $seq_for{$_}{seq_id} } @clan_idxs;
my @full_ids = map { $_->full_id } @seq_ids;
# apply list to Ali
my $list = IdList->new( ids => \@full_ids );
my $pruned_ali = $list->filtered_ali($ali);
my $outfile = change_suffix($alifile, "-$n_ali.ali");
# build corresponding para files
my $nlist = $list->negative_list($ali);
my $para_ali = $nlist->filtered_ali($ali);
my $parafile = change_suffix($alifile, "-$n_ali.para");
### Output alignment in: $outfile
### Output para file in: $parafile
$pruned_ali->degap_seqs;
$pruned_ali->store($outfile);
$para_ali->degap_seqs;
$para_ali->store($parafile);
}
}
SUBROUTINES:
sub _build_classifier {
if ($ARGV_otu_file) {
my @categories;
open my $in, '<', file($ARGV_otu_file);
while ( my $line = <$in> ) {
chomp $line;
my ($label, $otu) = split ':', $line;
my $list = IdList->new( ids => [ split ',', $otu ] );
my $filter = $tax->tax_filter( $list );
my $criterion = Criterion->new( tax_filter => $filter );
my $category = Category->new(
label => $label,
criteria => [ $criterion ],
);
push @categories, $category;
}
close $in;
return Classifier->new( categories => \@categories );
}
if ($ARGV_taxa_list) {
( run in 1.158 second using v1.01-cache-2.11-cpan-39bf76dae61 )