Bio-MUST-Tools-TreeParsing

 view release on metacpan or  search on metacpan

bin/tree-clan-splitter.pl  view on Meta::CPAN

            my $ali = Ali->load($alifile);
               $ali->degap_seqs;
            ### Storing clan in: $clanzerofile
               $ali->store($clanzerofile);
        }
        ### Moving to next tree
        next TREE;
    }

    ### Reading clans for: $stripped_intree
#    my $clan_hash;
    my $clan_hash = get_clan_hash($boot_hash);
    ### Done reading clans for: $stripped_intree

    my @ps_clans;
    my @sorted_clans = sort { $clan_hash->{size}{$a} <=> $clan_hash->{size}{$b} }
        keys %{ $clan_hash->{size} };
    #### $clan_hash
    #### @sorted_clans

    SPLIT_CLAN:
    for my $clan (@sorted_clans) {

        my @idxs      = @{ $clan_hash->{idxs}{$clan} };
        my @seq_ids   = map { $seq_for{$_}{seq_id} } @idxs;
        my @full_orgs = map { $_->full_org         } @seq_ids;
        ##### $clan
        ##### @idxs

        # skip clans with only one species
        next SPLIT_CLAN if scalar uniq (@full_orgs) < 2;

        my @groups = map { $seq_for{$_}{group} } @idxs;
        my ($type) = uniq(@groups);

        # keep only strictly targeted clans
        next SPLIT_CLAN unless scalar uniq (@groups) == 1;
        next SPLIT_CLAN unless $type eq 'target';

        push @ps_clans, $clan;
        ##### $type
        ##### $clan
        ##### @idxs
        ##### @groups
    }
    #### @ps_clans

    unless ( scalar @ps_clans ) {
        ### No targeted clan(s) found for: $stripped_intree
        next TREE;
    }

    ### Checking clans layout...
    my ($is_included, $composition_for) = check_inclusions($clan_hash, \@ps_clans);
    my @main_clans = grep { ! %$is_included{$_} } @ps_clans;
    #### @main_clans

    # fit ali to ali2phylip filtering
    my $alifile  = $stripped_intree . '.ali';
    my $base_ali = Ali->load($alifile);
       $base_ali->dont_guess;
    my $alist = $tree->alphabetical_list;
    #### $list
    my $ali = $alist->filtered_ali($base_ali);

    my $n_ali;

    IDLIST:
    for my $main_clan ( @main_clans ) {

        $n_ali++;
        #### $main_clan
        my @clan_idxs = @{ $clan_hash->{idxs}{$main_clan} };
        my @seq_ids   = map { $seq_for{$_}{seq_id} } @clan_idxs;
        my @full_ids  = map { $_->full_id } @seq_ids;

        # apply list to Ali
        my $list       = IdList->new( ids => \@full_ids );
        my $pruned_ali = $list->filtered_ali($ali);
        my $outfile    = change_suffix($alifile, "-$n_ali.ali");

        # build corresponding para files
        my $nlist    = $list->negative_list($ali);
        my $para_ali = $nlist->filtered_ali($ali);
        my $parafile = change_suffix($alifile, "-$n_ali.para");

        ### Output alignment in: $outfile
        ### Output para file in: $parafile
        $pruned_ali->degap_seqs;
        $pruned_ali->store($outfile);
        $para_ali->degap_seqs;
        $para_ali->store($parafile);
    }
}


SUBROUTINES:

sub _build_classifier {
    if ($ARGV_otu_file) {
        my @categories;
        open my $in, '<', file($ARGV_otu_file);
        while ( my $line = <$in> ) {
            chomp $line;
            my ($label, $otu) = split ':', $line;

            my $list      = IdList->new( ids => [ split ',', $otu ] );
            my $filter    = $tax->tax_filter( $list );
            my $criterion = Criterion->new( tax_filter => $filter );
            my $category  = Category->new(
                label    => $label,
                criteria => [ $criterion ],
            );
            push @categories, $category;
        }
        close $in;

        return Classifier->new( categories => \@categories );
    }
    if ($ARGV_taxa_list) {



( run in 1.158 second using v1.01-cache-2.11-cpan-39bf76dae61 )