BioPerl

 view release on metacpan or  search on metacpan

Bio/SeqFeature/Tools/Unflattener.pm  view on Meta::CPAN

 Args    : see below

Arguments

  -group:           reference to list of Bio::SeqFeatureI objects

  -resolver_method: a CODE reference
                    see the documentation above for an example of
                    a subroutine that can be used to resolve hierarchies
                    within groups

                    this is optional - a default subroutine will be used


NOTE: You should not need to call this method, unless you want fine
grained control over how the unflattening process.

=cut

sub unflatten_group{
   my ($self,@args) = @_;

   my($group, $resolver_method, $resolver_tag) =
     $self->_rearrange([qw(GROUP
                           RESOLVER_METHOD
			   RESOLVER_TAG
                          )],
                          @args);

   if ($self->verbose > 0) {
       printf STDERR "UNFLATTENING GROUP:\n";
       $self->_write_group($group, $self->group_tag);
   }

   my @sfs = @$group;

   # we can safely ignore singletons (e.g. [source])
   return $sfs[0] if @sfs == 1;

   my $partonomy = $self->partonomy;

   # $resolver_method is a reference to a SUB that will resolve
   # ambiguous parent/child containment; for example, determining
   # which mRNAs go with which CDSs
   $resolver_method = $resolver_method || \&_resolve_container_for_sf;

   # TAG BASED RESOLVING OF HIERARCHIES
   #
   # if the user specifies $resolver_tag, then we use this tag
   # to pair up ambiguous parents and children;
   #
   # for example, the CDS feature may have a resolver tag of /derives_from
   # which is a 'foreign key' into the /label tag of the mRNA feature
   #
   # this kind of tag-based resolution is possible for a certain subset
   # of genbank records
   #
   # if no resolver tag is specified, we revert to the normal
   # resolver_method
   if ($resolver_tag) {
       my $backup_resolver_method = $resolver_method;
       # closure: $resolver_tag is remembered by this sub
       my $sub = 
	 sub {
	     my ($self, $sf, @possible_container_sfs) = @_;
	     my @container_sfs = ();
	     if ($sf->has_tag($resolver_tag)) {
		 my ($resolver_tagval) = $sf->get_tag_values($resolver_tag);
		 # if a feature has a resolver_tag (e.g. /derives_from)
		 # this specifies the /product, /symbol or /label for the
		 # parent feature
		 @container_sfs = 
		   grep {
		       my $match = 0;
		       $self->_write_sf($_) if $self->verbose > 0;
		       foreach my $tag (qw(product symbol label)) {
			   if ($_->has_tag($tag)) {
			       my @vals =
				 $_->get_tag_values($tag);
			       if (grep {$_ eq $resolver_tagval} @vals) {
				   $match = 1;
				   last;
			       }
			   }   
		       }
		       $match;
		   } @possible_container_sfs;
	     } 
	     else {
		 return $backup_resolver_method->($sf, @possible_container_sfs);
	     }
	     return map {$_=>0} @container_sfs;
	 };
       $resolver_method = $sub;
   }
   else {
       # CONDITION: $resolver_tag is NOT set
       $self->throw("assertion error") if $resolver_tag;
   }
   # we have now set $resolver_method to a subroutine for
   # disambiguatimng parent/child relationships. we will
   # now build the whole containment hierarchy for this group


   # FIND TOP/ROOT SEQFEATURES
   #
   # find all the features for which there is no
   # containing feature type (eg genes)
   my @top_sfs =
     grep { 
         !$self->get_container_type($_->primary_tag);
     } @sfs;

   # CONDITION: there must be at most one root
   if (@top_sfs > 1) {
       $self->_write_group($group, $self->group_tag);
       printf STDERR "TOP SFS:\n";
       $self->_write_sf($_) foreach @top_sfs;
       $self->throw("multiple top-sfs in group");
   }
   my $top_sf = $top_sfs[0];

   # CREATE INDEX OF SEQFEATURES BY TYPE
   my %sfs_by_type = ();
   foreach my $sf (@sfs) {
       push(@{$sfs_by_type{$sf->primary_tag}}, $sf);
   }

   # containment index; keyed by child; lookup parent
   # note: this index uses the stringified object reference of
   # the object as a surrogate lookup key

   my %container = ();   # child -> parent

   # ALGORITHM: build containment graph
   #
   # find all possible containers for each SF;
   # for instance, for a CDS, the possible containers are all
   # the mRNAs in the same group. For a mRNA, the possible
   # containers are any SFs of type 'gene' (should only be 1).
   # (these container-type mappings can be overridden)
   #
   # contention is resolved by checking coordinates of splice sites
   # (this is the default, but can be overridden)
   #
   # most of the time, there is no problem identifying a unique
   # parent for every child; this can be ambiguous when constructing
   # CDS to mRNA relationships with lots of alternate splicing
   #
   # a hash of child->parent relationships is constructed (%container)



( run in 2.824 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )