Bio-MUST-Core

 view release on metacpan or  search on metacpan

lib/Bio/MUST/Core/Tree/Splits.pm  view on Meta::CPAN

    my $bp_key = shift;

    return  1 if $self->bp_for(     $bp_key);       #      standard clan (*)
    return -1 if $self->comp_bp_for($bp_key);       # complementary clan (.)

    return;                                         # undef if not found
}


sub clan_support {
    my $self   = shift;
    my $bp_key = shift;

    my $support   = $self->bp_for(     $bp_key);    #      standard clan (*)
       $support //= $self->comp_bp_for($bp_key);    # complementary clan (.)

    return $support;                                # silent undef if not found
}


sub sub_clans {
    my $self   = shift;
    my $bp_key = shift;

    # TODO: use some bit masking approach instead of explicit indices?
    my %is_wanted = map { $_ => 1 } _indices($bp_key, q{*});

    my @sub_clans;
    for (my $size = keys(%is_wanted) - 1; $size > 1; $size--) {
        push @sub_clans,
            grep { List::AllUtils::all { $is_wanted{$_} } _indices($_, q{*}) }
            grep { tr/*// == $size }
                $self->all_bp_keys,                 #      standard clans (*)
                $self->all_comp_bp_keys             # complementary clans (.)
        ;
    }   # examine clans of target size and keep those including only wanted ids

    # TODO: handle wantarray?
    return @sub_clans;
}


my %xor_for = (
    '..' => '.',
    '.*' => '*',
    '*.' => '*',
    '**' => '.',
);

sub xor_clans {                             ## no critic (RequireArgUnpacking)
    return join q{},
        zip_by { $xor_for{"$_[0]$_[1]"} } map { [ split // ] } @_[1..2];
}


sub get_node_for_split {
    my $self   = shift;
    my $tree   = shift;
    my $bp_key = shift;

    # transparently fetch Bio::Phylo component object
    # TODO: avoid code repetition?
    $tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');

    my $comp_bp_key = $bp_key =~ tr/.*/*./r;

    NODE:
    for my $node ( @{ $tree->get_entities } ) {
        my $node_key = $self->node2key($node);
        next NODE unless $node_key;
        return $node if $node_key eq $bp_key || $node_key eq $comp_bp_key;
    }

    carp "[BMC] Warning: cannot find split with key: $bp_key; returning undef";

    return;
}


sub score_split {
    my $self   = shift;
    my $filter = shift;
    my $bp_key = shift;

    #### $bp_key
    my $comp_bp_key = $bp_key =~ tr/.*/*./r;
    #### 1: join qq{\n}, q{}, map { $_->full_id } @{ $self->key2ids(     $bp_key) }
    #### 2: join qq{\n}, q{}, map { $_->full_id } @{ $self->key2ids($comp_bp_key) }

    # compute score...
    my ($score1, $seen1) = $filter->score( @{ $self->key2ids(     $bp_key) } );
    my ($score2, $seen2) = $filter->score( @{ $self->key2ids($comp_bp_key) } );
    # ... considering both possible split keys (standard and complementary)
    my $score = List::AllUtils::max( $score1 - $score2, $score2 - $score1 );
    #### $score

    # Note: the following would be useful to decorate nodes with scores...
    # ... but it would not work for terminals!
    # $self->_bp_for->{$bp_key} = $score;

    # TODO: warn only once?
    carp '[BMC] Warning: filter could not match any seq id!'
        unless $seen1 || $seen2;

    return $score;
}


sub get_split_that_maximizes {
    my $self   = shift;
    my $filter = shift;

    # return split for which method yields the highest value
    my $split = max_by { $self->score_split($filter, $_) } $self->all_bp_keys,
        $self->_trivial_bp_keys;        # also consider single seqs
    # Note: scalar context to get only one node!
    # TODO: handle ties to allow for additional criteria

    return $split;
}

lib/Bio/MUST/Core/Tree/Splits.pm  view on Meta::CPAN

#
#             # handle optional bootstrap support values
#             my $bp_val = $1 // 1;       # defaults to 1 if no support
#             $chunk += length $1 if defined $1;
#
#             # throw error if not balanced parentheses
#             unless (@bp_keys) {
#                 carp '[BMC] Warning: unbalanced parentheses in tree string;'
#                     . ' returning undef!';
#                 return;
#             }
#
#             # store bipartition...
#             my $bp_key = pop @bp_keys;
#             my $star_n = $bp_key =~ tr/*//;
#             # ... skipping full tree and trivial bipartitions
#             $bp_for{$bp_key} = 0 + $bp_val if $star_n > 1 && $star_n < $id_n;
#             next CHUNK;
#         }
#
#         # process OTU id
#         # TODO: consider using a variation of $FULL_ID to match this?
#         if ($tpl =~ m/\A ([A-Za-z0-9_\@\-\.\ \#]+)/xms) {
#
#             # put stars for corresponding OTU in all open bipartitions
#             my $seq_id = SeqId->new( full_id => $1 );
#             my $offset = $lookup->index_for( $seq_id->full_id );
#             substr $bp_keys[$_], $offset, 1, q{*} for 0..$#bp_keys;
#             $chunk = length $1;
#             next CHUNK;
#         }
#
#         # throw error if anything else found than expected cases
#         carp "[BMC] Warning: unexpected char in tree string: '$char';"
#             . ' returning undef!';
#         return;
#     }
#
#     continue {
#         # advance in topology string
#         substr $tpl, 0, $chunk, q{};
#
#         # read next char
#         $char = substr $tpl, 0, 1;
#     }
#
#     my $splits = $class->new(
#         # no data about rep_n here
#         lookup  => $lookup,
#         _bp_for => \%bp_for,
#     );
#
#     return $splits;
# }


sub new_from_tree {
    my $class = shift;
    my $tree  = shift;

    # transparently fetch Bio::Phylo component object
    # TODO: avoid code repetition?
    $tree = $tree->tree if $tree->isa('Bio::MUST::Core::Tree');

    # build lookup as fast as possible (no tree visitor method)
    my $lookup = IdList->new(
        ids => [ map { $_->get_name } @{ $tree->get_terminals } ]
    );

    # instantiate Splits object to benefit from ids2key method
    my $splits = $class->new(
        # no data about rep_n here
        lookup  => $lookup,
    );

    # compute bipartitions and store node metadata
    my %bp_for;

    NODE:
    for my $node ( @{ $tree->get_internals } ) {
        my $bp_key = $splits->node2key($node);
        next NODE unless $bp_key;

        $bp_for{$bp_key} = $node->get_name || 1;    # defaults to "seen"
    }

    # TODO: handle rooted trees where the root has a BP value but not children
    # e.g., mullidae-well-rooted.tre

    # complete Splits object
    $splits->_set_bp_for( \%bp_for );

    return $splits;
}

__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

=head1 NAME

Bio::MUST::Core::Tree::Splits - Tree splits (bipartitions)

=head1 VERSION

version 0.252040

=head1 SYNOPSIS

    # TODO

=head1 DESCRIPTION

    # TODO

=head1 METHODS

=head2 ids2key



( run in 0.872 second using v1.01-cache-2.11-cpan-39bf76dae61 )