Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/Slice.pm  view on Meta::CPAN


=head1 NAME

Bio::EnsEMBL::Slice - Arbitary Slice of a genome

=head1 SYNOPSIS

  $sa = $db->get_SliceAdaptor;

  $slice =
    $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );

  # get some attributes of the slice
  my $seqname = $slice->seq_region_name();
  my $start   = $slice->start();
  my $end     = $slice->end();

  # get the sequence from the slice
  my $seq = $slice->seq();

  # get some features from the slice
  foreach $gene ( @{ $slice->get_all_Genes } ) {
    # do something with a gene
  }

  foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
    # do something with dna-dna alignments
  }

=head1 DESCRIPTION

A Slice object represents a region of a genome.  It can be used to retrieve
sequence or features from an area of interest.

NOTE: The Slice is defined by its Strand, but normal behaviour for get_all_*
methods is to return Features on both Strands. 

=head1 METHODS

=cut

package Bio::EnsEMBL::Slice;
$Bio::EnsEMBL::Slice::VERSION = '114.0.0';
use vars qw(@ISA);
use strict;

use Bio::PrimarySeqI;


use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
use Bio::EnsEMBL::RepeatMaskedSlice;
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::ProjectionSegment;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Iterator;
use Bio::EnsEMBL::DBSQL::MergedAdaptor;

use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::SeqRegionSynonym;
use Scalar::Util qw(weaken isweak);

# use Data::Dumper;

my $registry = "Bio::EnsEMBL::Registry";

@ISA = qw(Bio::PrimarySeqI);


=head2 new

  Arg [...]  : List of named arguments
               Bio::EnsEMBL::CoordSystem COORD_SYSTEM
               string SEQ_REGION_NAME,
               int    START,
               int    END,
               int    SEQ_REGION_LENGTH, (optional)
               string SEQ (optional)
               int    STRAND, (optional, defaults to 1)
               Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
  Example    : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
                                                 -start => 1,
                                                 -end => 10000,
                                                 -strand => 1,
                                                 -seq_region_name => 'X',
                                                 -seq_region_length => 12e6,
                                                 -adaptor => $slice_adaptor);
  Description: Creates a new slice object.  A slice represents a region
               of sequence in a particular coordinate system.  Slices can be
               used to retrieve sequence and features from an area of
               interest in a genome.

               Coordinates start at 1 and are inclusive.  Negative
               coordinates or coordinates exceeding the length of the
               seq_region are permitted.  Start must be less than or equal.
               to end regardless of the strand.

               Slice objects are immutable. Once instantiated their attributes
               (with the exception of the adaptor) may not be altered.  To
               change the attributes a new slice must be created.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
  Caller     : general, Bio::EnsEMBL::SliceAdaptor
  Status     : Stable

=cut

sub new {
  my $caller = shift;

  #new can be called as a class or object method
  my $class = ref($caller) || $caller;

  my ($seq, $coord_system, $seq_region_name, $seq_region_length,
      $start, $end, $strand, $adaptor, $empty) =
        rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
                      START END STRAND ADAPTOR EMPTY)], @_);

  if ( !defined($seq_region_name) ) {
    throw('SEQ_REGION_NAME argument is required');
  }

lib/Bio/EnsEMBL/Slice.pm  view on Meta::CPAN

  if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
    throw('SEQ must be the same length as the defined LENGTH not '
        . CORE::length($seq)
        . ' compared to '
        . ( $end - $start + 1 ) );
  }

  if(defined($coord_system)) {
   if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
     throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
   }
   if($coord_system->is_top_level()) {
     throw('Cannot create slice on toplevel CoordSystem.');
   }
  } else {
   warning("Slice without coordinate system");
  }

  $strand ||= 1;

  if($strand != 1 && $strand != -1) {
    throw('STRAND argument must be -1 or 1');
  }

  if(defined($adaptor)) {
    if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
      throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
    }
  }

  my $self = bless {'coord_system'      => $coord_system,
                'seq'               => $seq,
                'seq_region_name'   => $seq_region_name,
                'seq_region_length' => $seq_region_length,
                'start'             => int($start),
                'end'               => int($end),
                'strand'            => $strand}, $class;

  $self->adaptor($adaptor);

  return $self;

}

=head2 new_fast

  Arg [1]    : hashref to be blessed
  Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
  Exceptions : none
  Returntype : Bio::EnsEMBL::Slice
  Caller     : general
  Status     : Stable

=cut


sub new_fast {
  my $class = shift;
  my $hashref = shift;
  my $self = bless $hashref, $class;
  weaken($self->{adaptor})  if ( ! isweak($self->{adaptor}) );
  return $self;
}

=head2 adaptor

  Arg [1]    : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
  Example    : $adaptor = $slice->adaptor();
  Description: Getter/Setter for the slice object adaptor used
               by this slice for database interaction.
  Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor
  Exceptions : thorws if argument passed is not a SliceAdaptor
  Caller     : general
  Status     : Stable

=cut

sub adaptor{
   my $self = shift;

   if(@_) {
     my $ad = shift;
     if(defined($ad)) {
       if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
         throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
       }
     }
     weaken($self->{'adaptor'} = $ad);
   }

   return $self->{'adaptor'};
}



=head2 seq_region_name

  Arg [1]    : none
  Example    : $seq_region = $slice->seq_region_name();
  Description: Returns the name of the seq_region that this slice is on. For
               example if this slice is in chromosomal coordinates the
               seq_region_name might be 'X' or '10'.

               This function was formerly named chr_name, but since slices can
               now be on coordinate systems other than chromosomal it has been
               changed.
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub seq_region_name {
  my $self = shift;
  return $self->{'seq_region_name'};
}

=head2 seq_region_start

  Example    : $seq_region_start = $slice->seq_region_start();
  Description: Returns the start position of this slice relative to the
               start of the sequence region that it was created on.
               Since slices are always in genomic coordinates this is
               an alias to start()
  Returntype : int
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub seq_region_start {
    my $self = shift;
    return $self->start();
}

=head2 seq_region_end

  Example    : $seq_region_end = $slice->seq_region_end();
  Description: Returns the end position of this slice relative to the
               start of the sequence region that it was created on.
               Since slices are always in genomic coordinates this is
               an alias to end()
  Returntype : int
  Exceptions : none
  Caller     : general
  Status     : Stable

lib/Bio/EnsEMBL/Slice.pm  view on Meta::CPAN

  Arg   1    : int $start, refers to the start of the subslice relative to the input slice
  Arg   2    : int $end, refers to the end of the subslice relative to the input slice
  Arge [3]   : int $strand
  Example    : none
  Description: Makes another Slice that covers only part of this Slice
               If a Slice is requested which lies outside of the boundaries
               of this function will return undef.  This means that
               behaviour will be consistant whether or not the slice is
               attached to the database (i.e. if there is attached sequence
               to the slice).  Alternatively the expand() method or the
               SliceAdaptor::fetch_by_region method can be used instead.
  Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub sub_Slice {
  my ( $self, $start, $end, $strand ) = @_;

  my $length = $self->length();

  if( $start < 1 || $start > $length ) {
    # throw( "start argument not valid" );
    return undef;
  }

  if( $end < $start || $end > $length ) {
    # throw( "end argument not valid" )
    return undef;
  }

  my ( $new_start, $new_end, $new_strand, $new_seq );
  if( ! defined $strand ) {
    $strand = 1;
  }

  if( $self->{'strand'} == 1 ) {
    $new_start = $self->{'start'} + $start - 1;
    $new_end = $self->{'start'} + $end - 1;
    $new_strand = $strand;
  } else {
    $new_start = $self->{'end'} - $end + 1;;
    $new_end = $self->{'end'} - $start + 1;
    $new_strand = -$strand;
  }

  if( defined $self->{'seq'} ) {
    $new_seq = $self->subseq( $start, $end, $strand );
  }

  #fastest way to copy a slice is to do a shallow hash copy
  my $new_slice = {%{$self}};
  $new_slice->{'start'} = int($new_start);
  $new_slice->{'end'}   = int($new_end);
  $new_slice->{'strand'} = $new_strand;
  if( $new_seq ) {
    $new_slice->{'seq'} = $new_seq;
  }
  weaken($new_slice->{adaptor});

  return bless $new_slice, ref($self);
}



=head2 seq_region_Slice

  Arg [1]    : none
  Example    : $slice = $slice->seq_region_Slice();
  Description: Returns a slice which spans the whole seq_region which this slice
               is on.  For example if this is a slice which spans a small region
               of chromosome X, this method will return a slice which covers the
               entire chromosome X. The returned slice will always have strand
               of 1 and start of 1.  This method cannot be used if the sequence
               of the slice has been set manually.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : warning if called when sequence of Slice has been set manually.
  Caller     : general
  Status     : Stable

=cut

sub seq_region_Slice {
  my $self = shift;

  if($self->{'seq'}){
    warning("Cannot get a seq_region_Slice of a slice which has manually ".
            "attached sequence ");
    return undef;
  }

  # quick shallow copy
  my $slice;
  %{$slice} = %{$self};
  bless $slice, ref($self);
  weaken($slice->{adaptor});

  $slice->{'start'}  = 1;
  $slice->{'end'}    = $slice->{'seq_region_length'};
  $slice->{'strand'} = 1;

  return $slice;
}


=head2 get_seq_region_id

  Arg [1]    : none
  Example    : my $seq_region_id = $slice->get_seq_region_id();
  Description: Gets the internal identifier of the seq_region that this slice
               is on. Note that this function will not work correctly if this
               slice does not have an attached adaptor. Also note that it may
               be better to go through the SliceAdaptor::get_seq_region_id
               method if you are working with multiple databases since is
               possible to work with slices from databases with different
               internal seq_region identifiers.
  Returntype : int or undef if slices does not have attached adaptor
  Exceptions : warning if slice is not associated with a SliceAdaptor
  Caller     : assembly loading scripts, general
  Status     : Stable

=cut

sub get_seq_region_id {
  my ($self) = @_;
  if($self->adaptor) {
    return $self->adaptor->get_seq_region_id($self);
  } 
  else {
    warning('Cannot retrieve seq_region_id without attached adaptor.');
    return undef;
  }
}

=head2 get_genome_component

  Arg []     : none
  Example    : my $genome_component = $slice->get_genome_component();
  Description: Returns the genome component of the slice
  Returntype : Scalar; the identifier of the genome component of the slice
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub get_genome_component {
  my $self = shift;
  return $self->adaptor->get_genome_component_for_slice($self);
}

=head2 get_all_Attributes

  Arg [1]    : optional string $attrib_code
               The code of the attribute type to retrieve values for.
  Example    : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};



( run in 2.946 seconds using v1.01-cache-2.11-cpan-5735350b133 )