BioPerl
view release on metacpan or search on metacpan
Bio/SeqFeature/Tools/Unflattener.pm view on Meta::CPAN
Args : see below
Arguments
-group: reference to list of Bio::SeqFeatureI objects
-resolver_method: a CODE reference
see the documentation above for an example of
a subroutine that can be used to resolve hierarchies
within groups
this is optional - a default subroutine will be used
NOTE: You should not need to call this method, unless you want fine
grained control over how the unflattening process.
=cut
sub unflatten_group{
my ($self,@args) = @_;
my($group, $resolver_method, $resolver_tag) =
$self->_rearrange([qw(GROUP
RESOLVER_METHOD
RESOLVER_TAG
)],
@args);
if ($self->verbose > 0) {
printf STDERR "UNFLATTENING GROUP:\n";
$self->_write_group($group, $self->group_tag);
}
my @sfs = @$group;
# we can safely ignore singletons (e.g. [source])
return $sfs[0] if @sfs == 1;
my $partonomy = $self->partonomy;
# $resolver_method is a reference to a SUB that will resolve
# ambiguous parent/child containment; for example, determining
# which mRNAs go with which CDSs
$resolver_method = $resolver_method || \&_resolve_container_for_sf;
# TAG BASED RESOLVING OF HIERARCHIES
#
# if the user specifies $resolver_tag, then we use this tag
# to pair up ambiguous parents and children;
#
# for example, the CDS feature may have a resolver tag of /derives_from
# which is a 'foreign key' into the /label tag of the mRNA feature
#
# this kind of tag-based resolution is possible for a certain subset
# of genbank records
#
# if no resolver tag is specified, we revert to the normal
# resolver_method
if ($resolver_tag) {
my $backup_resolver_method = $resolver_method;
# closure: $resolver_tag is remembered by this sub
my $sub =
sub {
my ($self, $sf, @possible_container_sfs) = @_;
my @container_sfs = ();
if ($sf->has_tag($resolver_tag)) {
my ($resolver_tagval) = $sf->get_tag_values($resolver_tag);
# if a feature has a resolver_tag (e.g. /derives_from)
# this specifies the /product, /symbol or /label for the
# parent feature
@container_sfs =
grep {
my $match = 0;
$self->_write_sf($_) if $self->verbose > 0;
foreach my $tag (qw(product symbol label)) {
if ($_->has_tag($tag)) {
my @vals =
$_->get_tag_values($tag);
if (grep {$_ eq $resolver_tagval} @vals) {
$match = 1;
last;
}
}
}
$match;
} @possible_container_sfs;
}
else {
return $backup_resolver_method->($sf, @possible_container_sfs);
}
return map {$_=>0} @container_sfs;
};
$resolver_method = $sub;
}
else {
# CONDITION: $resolver_tag is NOT set
$self->throw("assertion error") if $resolver_tag;
}
# we have now set $resolver_method to a subroutine for
# disambiguatimng parent/child relationships. we will
# now build the whole containment hierarchy for this group
# FIND TOP/ROOT SEQFEATURES
#
# find all the features for which there is no
# containing feature type (eg genes)
my @top_sfs =
grep {
!$self->get_container_type($_->primary_tag);
} @sfs;
# CONDITION: there must be at most one root
if (@top_sfs > 1) {
$self->_write_group($group, $self->group_tag);
printf STDERR "TOP SFS:\n";
$self->_write_sf($_) foreach @top_sfs;
$self->throw("multiple top-sfs in group");
}
my $top_sf = $top_sfs[0];
# CREATE INDEX OF SEQFEATURES BY TYPE
my %sfs_by_type = ();
foreach my $sf (@sfs) {
push(@{$sfs_by_type{$sf->primary_tag}}, $sf);
}
# containment index; keyed by child; lookup parent
# note: this index uses the stringified object reference of
# the object as a surrogate lookup key
my %container = (); # child -> parent
# ALGORITHM: build containment graph
#
# find all possible containers for each SF;
# for instance, for a CDS, the possible containers are all
# the mRNAs in the same group. For a mRNA, the possible
# containers are any SFs of type 'gene' (should only be 1).
# (these container-type mappings can be overridden)
#
# contention is resolved by checking coordinates of splice sites
# (this is the default, but can be overridden)
#
# most of the time, there is no problem identifying a unique
# parent for every child; this can be ambiguous when constructing
# CDS to mRNA relationships with lots of alternate splicing
#
# a hash of child->parent relationships is constructed (%container)
( run in 2.824 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )