Bio-EnsEMBL
view release on metacpan or search on metacpan
lib/Bio/EnsEMBL/Slice.pm view on Meta::CPAN
=head1 NAME
Bio::EnsEMBL::Slice - Arbitary Slice of a genome
=head1 SYNOPSIS
$sa = $db->get_SliceAdaptor;
$slice =
$sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
# get some attributes of the slice
my $seqname = $slice->seq_region_name();
my $start = $slice->start();
my $end = $slice->end();
# get the sequence from the slice
my $seq = $slice->seq();
# get some features from the slice
foreach $gene ( @{ $slice->get_all_Genes } ) {
# do something with a gene
}
foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
# do something with dna-dna alignments
}
=head1 DESCRIPTION
A Slice object represents a region of a genome. It can be used to retrieve
sequence or features from an area of interest.
NOTE: The Slice is defined by its Strand, but normal behaviour for get_all_*
methods is to return Features on both Strands.
=head1 METHODS
=cut
package Bio::EnsEMBL::Slice;
$Bio::EnsEMBL::Slice::VERSION = '114.0.0';
use vars qw(@ISA);
use strict;
use Bio::PrimarySeqI;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
use Bio::EnsEMBL::RepeatMaskedSlice;
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::ProjectionSegment;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Iterator;
use Bio::EnsEMBL::DBSQL::MergedAdaptor;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::SeqRegionSynonym;
use Scalar::Util qw(weaken isweak);
# use Data::Dumper;
my $registry = "Bio::EnsEMBL::Registry";
@ISA = qw(Bio::PrimarySeqI);
=head2 new
Arg [...] : List of named arguments
Bio::EnsEMBL::CoordSystem COORD_SYSTEM
string SEQ_REGION_NAME,
int START,
int END,
int SEQ_REGION_LENGTH, (optional)
string SEQ (optional)
int STRAND, (optional, defaults to 1)
Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
-start => 1,
-end => 10000,
-strand => 1,
-seq_region_name => 'X',
-seq_region_length => 12e6,
-adaptor => $slice_adaptor);
Description: Creates a new slice object. A slice represents a region
of sequence in a particular coordinate system. Slices can be
used to retrieve sequence and features from an area of
interest in a genome.
Coordinates start at 1 and are inclusive. Negative
coordinates or coordinates exceeding the length of the
seq_region are permitted. Start must be less than or equal.
to end regardless of the strand.
Slice objects are immutable. Once instantiated their attributes
(with the exception of the adaptor) may not be altered. To
change the attributes a new slice must be created.
Returntype : Bio::EnsEMBL::Slice
Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
Caller : general, Bio::EnsEMBL::SliceAdaptor
Status : Stable
=cut
sub new {
my $caller = shift;
#new can be called as a class or object method
my $class = ref($caller) || $caller;
my ($seq, $coord_system, $seq_region_name, $seq_region_length,
$start, $end, $strand, $adaptor, $empty) =
rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
START END STRAND ADAPTOR EMPTY)], @_);
if ( !defined($seq_region_name) ) {
throw('SEQ_REGION_NAME argument is required');
}
lib/Bio/EnsEMBL/Slice.pm view on Meta::CPAN
if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
throw('SEQ must be the same length as the defined LENGTH not '
. CORE::length($seq)
. ' compared to '
. ( $end - $start + 1 ) );
}
if(defined($coord_system)) {
if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
}
if($coord_system->is_top_level()) {
throw('Cannot create slice on toplevel CoordSystem.');
}
} else {
warning("Slice without coordinate system");
}
$strand ||= 1;
if($strand != 1 && $strand != -1) {
throw('STRAND argument must be -1 or 1');
}
if(defined($adaptor)) {
if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
}
}
my $self = bless {'coord_system' => $coord_system,
'seq' => $seq,
'seq_region_name' => $seq_region_name,
'seq_region_length' => $seq_region_length,
'start' => int($start),
'end' => int($end),
'strand' => $strand}, $class;
$self->adaptor($adaptor);
return $self;
}
=head2 new_fast
Arg [1] : hashref to be blessed
Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
Exceptions : none
Returntype : Bio::EnsEMBL::Slice
Caller : general
Status : Stable
=cut
sub new_fast {
my $class = shift;
my $hashref = shift;
my $self = bless $hashref, $class;
weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
return $self;
}
=head2 adaptor
Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
Example : $adaptor = $slice->adaptor();
Description: Getter/Setter for the slice object adaptor used
by this slice for database interaction.
Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor
Exceptions : thorws if argument passed is not a SliceAdaptor
Caller : general
Status : Stable
=cut
sub adaptor{
my $self = shift;
if(@_) {
my $ad = shift;
if(defined($ad)) {
if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
}
}
weaken($self->{'adaptor'} = $ad);
}
return $self->{'adaptor'};
}
=head2 seq_region_name
Arg [1] : none
Example : $seq_region = $slice->seq_region_name();
Description: Returns the name of the seq_region that this slice is on. For
example if this slice is in chromosomal coordinates the
seq_region_name might be 'X' or '10'.
This function was formerly named chr_name, but since slices can
now be on coordinate systems other than chromosomal it has been
changed.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub seq_region_name {
my $self = shift;
return $self->{'seq_region_name'};
}
=head2 seq_region_start
Example : $seq_region_start = $slice->seq_region_start();
Description: Returns the start position of this slice relative to the
start of the sequence region that it was created on.
Since slices are always in genomic coordinates this is
an alias to start()
Returntype : int
Exceptions : none
Caller : general
Status : Stable
=cut
sub seq_region_start {
my $self = shift;
return $self->start();
}
=head2 seq_region_end
Example : $seq_region_end = $slice->seq_region_end();
Description: Returns the end position of this slice relative to the
start of the sequence region that it was created on.
Since slices are always in genomic coordinates this is
an alias to end()
Returntype : int
Exceptions : none
Caller : general
Status : Stable
lib/Bio/EnsEMBL/Slice.pm view on Meta::CPAN
Arg 1 : int $start, refers to the start of the subslice relative to the input slice
Arg 2 : int $end, refers to the end of the subslice relative to the input slice
Arge [3] : int $strand
Example : none
Description: Makes another Slice that covers only part of this Slice
If a Slice is requested which lies outside of the boundaries
of this function will return undef. This means that
behaviour will be consistant whether or not the slice is
attached to the database (i.e. if there is attached sequence
to the slice). Alternatively the expand() method or the
SliceAdaptor::fetch_by_region method can be used instead.
Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
Exceptions : none
Caller : general
Status : Stable
=cut
sub sub_Slice {
my ( $self, $start, $end, $strand ) = @_;
my $length = $self->length();
if( $start < 1 || $start > $length ) {
# throw( "start argument not valid" );
return undef;
}
if( $end < $start || $end > $length ) {
# throw( "end argument not valid" )
return undef;
}
my ( $new_start, $new_end, $new_strand, $new_seq );
if( ! defined $strand ) {
$strand = 1;
}
if( $self->{'strand'} == 1 ) {
$new_start = $self->{'start'} + $start - 1;
$new_end = $self->{'start'} + $end - 1;
$new_strand = $strand;
} else {
$new_start = $self->{'end'} - $end + 1;;
$new_end = $self->{'end'} - $start + 1;
$new_strand = -$strand;
}
if( defined $self->{'seq'} ) {
$new_seq = $self->subseq( $start, $end, $strand );
}
#fastest way to copy a slice is to do a shallow hash copy
my $new_slice = {%{$self}};
$new_slice->{'start'} = int($new_start);
$new_slice->{'end'} = int($new_end);
$new_slice->{'strand'} = $new_strand;
if( $new_seq ) {
$new_slice->{'seq'} = $new_seq;
}
weaken($new_slice->{adaptor});
return bless $new_slice, ref($self);
}
=head2 seq_region_Slice
Arg [1] : none
Example : $slice = $slice->seq_region_Slice();
Description: Returns a slice which spans the whole seq_region which this slice
is on. For example if this is a slice which spans a small region
of chromosome X, this method will return a slice which covers the
entire chromosome X. The returned slice will always have strand
of 1 and start of 1. This method cannot be used if the sequence
of the slice has been set manually.
Returntype : Bio::EnsEMBL::Slice
Exceptions : warning if called when sequence of Slice has been set manually.
Caller : general
Status : Stable
=cut
sub seq_region_Slice {
my $self = shift;
if($self->{'seq'}){
warning("Cannot get a seq_region_Slice of a slice which has manually ".
"attached sequence ");
return undef;
}
# quick shallow copy
my $slice;
%{$slice} = %{$self};
bless $slice, ref($self);
weaken($slice->{adaptor});
$slice->{'start'} = 1;
$slice->{'end'} = $slice->{'seq_region_length'};
$slice->{'strand'} = 1;
return $slice;
}
=head2 get_seq_region_id
Arg [1] : none
Example : my $seq_region_id = $slice->get_seq_region_id();
Description: Gets the internal identifier of the seq_region that this slice
is on. Note that this function will not work correctly if this
slice does not have an attached adaptor. Also note that it may
be better to go through the SliceAdaptor::get_seq_region_id
method if you are working with multiple databases since is
possible to work with slices from databases with different
internal seq_region identifiers.
Returntype : int or undef if slices does not have attached adaptor
Exceptions : warning if slice is not associated with a SliceAdaptor
Caller : assembly loading scripts, general
Status : Stable
=cut
sub get_seq_region_id {
my ($self) = @_;
if($self->adaptor) {
return $self->adaptor->get_seq_region_id($self);
}
else {
warning('Cannot retrieve seq_region_id without attached adaptor.');
return undef;
}
}
=head2 get_genome_component
Arg [] : none
Example : my $genome_component = $slice->get_genome_component();
Description: Returns the genome component of the slice
Returntype : Scalar; the identifier of the genome component of the slice
Exceptions : none
Caller : general
Status : Stable
=cut
sub get_genome_component {
my $self = shift;
return $self->adaptor->get_genome_component_for_slice($self);
}
=head2 get_all_Attributes
Arg [1] : optional string $attrib_code
The code of the attribute type to retrieve values for.
Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
( run in 2.946 seconds using v1.01-cache-2.11-cpan-5735350b133 )