Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/External/ExternalFeatureAdaptor.pm  view on Meta::CPAN

  }

  my %features;
  my $from_coord_system;

  my $fetch_method;

  #
  # Get all of the features from whatever coord system they are computed in
  #
  if($self->_supported('CLONE')) {
    $fetch_method = sub {
      my $self = shift;
      my $name = shift;
      my ($acc, $ver) = split(/\./, $name);
      $self->fetch_all_by_clone_accession($acc,$ver,@_);
    };
    $from_coord_system = $csa->fetch_by_name('clone');
  } elsif($self->_supported('ASSEMBLY')) {
    $from_coord_system = $csa->fetch_by_name('chromosome');
    $fetch_method = $self->can('fetch_all_by_chr_start_end');
  } elsif($self->_supported('CONTIG')) {
    $from_coord_system = $csa->fetch_by_name('contig');
    $fetch_method = $self->can('fetch_all_by_contig_name');
  } elsif($self->_supported('SUPERCONTIG')) {
    $from_coord_system = $csa->fetch_by_name('supercontig');
    $fetch_method = $self->can('fetch_all_by_supercontig_name');
  } else {
    $self->_no_valid_coord_systems();
  }

  if($from_coord_system->equals($coord_system)) {
    $features{$slice_seq_region} = &$fetch_method($self, $slice_seq_region,
                                                  $slice_start,$slice_end);
  } else {
    foreach my $segment (@{$slice->project($from_coord_system->name,
                                           $from_coord_system->version)}) {
      my ($start,$end,$pslice) = @$segment;
      $features{$pslice->seq_region_name } ||= [];
      push @{$features{$pslice->seq_region_name }},
           @{&$fetch_method($self, $pslice->seq_region_name,
                            $pslice->start(),
                            $pslice->end())};
    }
  }

  my @out;

  if(!$coord_system->equals($from_coord_system)) {             
    my $asma = $self->ensembl_db->get_AssemblyMapperAdaptor();
    my $mapper = $asma->fetch_by_CoordSystems($from_coord_system,
                                              $coord_system);
    my %slice_cache;
    my $slice_adaptor = $self->ensembl_db->get_SliceAdaptor();
    my $slice_setter;

    #convert the coordinates of each of the features retrieved
    foreach my $fseq_region (keys %features) {
      my $feats = $features{$fseq_region};
      next if(!$feats);
      $slice_setter = _guess_slice_setter($feats) if(!$slice_setter);

      foreach my $f (@$feats) {
        my($sr_id, $start, $end, $strand) = 
          $mapper->fastmap($fseq_region,$f->start,$f->end,$f->strand,
                           $from_coord_system);
        
        #maps to gap
        next if(!defined($sr_id));

        #maps to unexpected seq region, probably error in the externally
        if($sr_id ne $slice_seq_region_id) {
          warning("Externally created Feature mapped to [$sr_id] " .
                  "which is not on requested seq_region_id [$slice_seq_region_id]");
          next;
        }

        #update the coordinates of the feature
        &$slice_setter($f,$slice);
        $f->start($start);
        $f->end($end);
        $f->strand($strand);
        push @out, $f;
      }
    }
  } else {
    #we already know the seqregion the featues are on, we just have
    #to put them on the slice
    @out = @{$features{$slice_seq_region}};
    my $slice_setter = _guess_slice_setter(\@out);

    foreach my $f (@out) {
      &$slice_setter($f,$slice);
    }
  }

  # convert from assembly coords to slice coords
  # handle the circular slice case
  my $seq_region_len = $slice->seq_region_length();
  foreach my $f (@out) {
    my($f_start, $f_end, $f_strand);

    if ($slice->strand == 1) { # Positive strand
      $f_start = $f->start - $slice_start + 1;
      $f_end   = $f->end - $slice_start + 1;
      $f_strand = $f->strand;

      if ($slice->is_circular()) { # Handle cicular chromosomes
	if ($f_start > $f_end) { # Looking at a feature overlapping the chromsome origin.
	  if ($f_end > $slice_start) {
	    # Looking at the region in the beginning of the chromosome.
	    $f_start -= $seq_region_len;
	  }

	  if ($f_end < 0) {
	    $f_end += $seq_region_len;
	  }
	} else {
	  if ($slice_start > $slice_end && $f_end < 0) {
	    # Looking at the region overlapping the chromosome origin and 
	    # a feature which is at the beginning of the chromosome.
	    $f_start += $seq_region_len;
	    $f_end   += $seq_region_len;
	  }
	}
      }
    } else { # Negative strand
      my ($seq_region_start, $seq_region_end) = ($f->start, $f->end);
      $f_start = $slice_end - $seq_region_end + 1;
      $f_end = $slice_end - $seq_region_start + 1;
      $f_strand = $f->strand * -1;

      if ($slice->is_circular()) {
	if ($slice_start > $slice_end) { # slice spans origin or replication
	  if ($seq_region_start >= $slice_start) {
	    $f_end += $seq_region_len;
	    $f_start += $seq_region_len 
	      if $seq_region_end > $slice_start;

	  } elsif ($seq_region_start <= $slice_end) {
	    # do nothing
	  } elsif ($seq_region_end >= $slice_start) {
	    $f_start += $seq_region_len;
	    $f_end += $seq_region_len;
	  } elsif ($seq_region_end <= $slice_end) {
	    $f_end += $seq_region_len
	      if $f_end < 0;
	  } elsif ($seq_region_start > $seq_region_end) {
	    $f_end += $seq_region_len;
	  } else { }
	} else {
	  if ($seq_region_start <= $slice_end and $seq_region_end >= $slice_start) {
	    # do nothing
	  } elsif ($seq_region_start > $seq_region_end) {
	    if ($seq_region_start <= $slice_end) {
	      $f_start -= $seq_region_len;
	    } elsif ($seq_region_end >= $slice_start) {
	      $f_end += $seq_region_len;
	    } else { }
	  }
	}
      }
    }

    $f->start($f_start);
    $f->end($f_end);
    $f->strand($f_strand);    
  }
  
  return \@out;
}
  

sub _guess_slice_setter {
  my $features = shift;

  #we do not know what type of features these are.  They might
  #be bioperl features or old ensembl features, hopefully they are new
  #style features.  Try to come up with a setter method for the
  #slice.

  return undef if(!@$features);

  my ($f) = @$features;

  my $slice_setter;
  foreach my $method (qw(slice contig attach_seq)) {
    last if($slice_setter = $f->can($method));
  }
    
  if(!$slice_setter) {
    if($f->can('seqname')) {
      $slice_setter = sub { $_[0]->seqname($_[1]->seq_region_name()); };
    } else {
      $slice_setter = sub{} if(!$slice_setter);
    }
  }

  return $slice_setter;
}


=head2 fetch_all_by_contig_name

  Arg [1]    : string $contig_name
  Arg [2]    : int $start (optional)
               The start of the region on the contig to retrieve features on
               if not specified the whole of the contig is used.
  Arg [3]    : int $end (optional) 
               The end of the region on the contig to retrieve features on
               if not specified the whole of the contig is used.
  Example    : @fs = @{$self->fetch_all_by_contig_name('AB00879.1.1.39436')};
  Description: Retrieves features on the contig defined by the name 
               $contig_name in contig coordinates.

               If this method is overridden then the coordinate_systems 
               method must return 'CONTIG' as one of its values. 

               This method will work as is (i.e. without being overridden) 
               providing at least one other fetch method has 
               been overridden.               
  Returntype : reference to a list of Bio::SeqFeature objects in the contig
               coordinate system.
  Exceptions : thrown if the input argument is incorrect
               thrown if the coordinate_systems method returns the value 
               'CONTIG' and this method has not been overridden.
  Caller     : general, fetch_all_by_Slice

=cut

sub fetch_all_by_contig_name {
  my ($self, $contig_name, $start, $end) = @_;

  unless($contig_name) {



( run in 0.577 second using v1.01-cache-2.11-cpan-39bf76dae61 )