Bio-EnsEMBL
view release on metacpan or search on metacpan
lib/Bio/EnsEMBL/Slice.pm view on Meta::CPAN
Exceptions : none
Caller : contigview, snpview
Status : Stable
=cut
sub get_all_LD_values {
my $self = shift;
my $population = shift;
my $ld_adaptor = $self->_get_VariationAdaptor('LDFeatureContainer');
if($ld_adaptor) {
return $ld_adaptor->fetch_by_Slice($self,$population);
}
return [];
}
=head2 _get_VariationFeatureAdaptor
Shortcut method here because VariationFeature is an often requested
adaptor type.
=cut
sub _get_VariationFeatureAdaptor {
my ($self, $dbtype) = @_;
return $self->_get_VariationAdaptor('VariationFeature', $dbtype);
}
=head2 _get_StructuralVariationFeatureAdaptor
Shortcut method here because StructuralVariationFeature is an often requested
adaptor type.
=cut
sub _get_StructuralVariationFeatureAdaptor {
my ($self, $dbtype) = @_;
return $self->_get_VariationAdaptor('StructuralVariationFeature', $dbtype);
}
=head2 _get_VariationAdaptor
Arg [1] : String object_type to retrieve an adaptor for
Arg [2] : String dbtype to search for the given adaptor in. Defaults to variation
Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
it will return nothing if the adaptor was not found
ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to variation)
Exceptions : none
=cut
sub _get_VariationAdaptor {
my ($self, $object_type, $dbtype) = @_;
# very important to do this defaulting since we *have* to assume the variation
# database is called 'variation'. Using the current group will not work because
# that will be something like 'core' (most likely), 'ensembl' or 'vega'.
$dbtype ||= 'variation';
# Also we do not care about Registry->get_db() for variation DBs
my $do_not_check_db = 1;
return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
}
=head2 _get_CoreAdaptor
Arg [1] : String object_type to retrieve an adaptor for
Arg [2] : String dbtype to search for the given adaptor in. Defaults to core
Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
it will return nothing if the adaptor was not found
ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to core-like dbs)
Exceptions : none
=cut
sub _get_CoreAdaptor {
my ($self, $object_type, $dbtype) = @_;
#Simple pass through
return $self->_get_Adaptor($object_type, $dbtype);
}
=head2 _get_Adaptor
Arg [1] : String object_type to retrieve an adaptor for
Arg [2] : String dbtype to search for the given adaptor in
Arg [3] : Boolean Turn off the checking of Registry->get_db() for your
adaptor.
Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
it will return nothing if the adaptor was not found. We consult the
"special" adaptors held by Bio::EnsEMBL::Registry::get_db() method and then
fall back to the normal methods of finding an adaptor.
This method will warn when adaptors are missing but will never through an
exception. It is up to the calling code to decide how to handle the unavailablity
of an adaptor.
ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
Exceptions : none
=cut
sub _get_Adaptor {
my ($self, $object_type, $dbtype, $do_not_check_db) = @_;
if(!$object_type) {
warning('Object type is a required parameter');
return;
}
my $adaptor = $self->adaptor();
if(!$adaptor) {
warning("Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
return;
}
my $local_db = $adaptor->db();
my $species = $local_db->species();
#First we query for the DBAdaptor using get_db(). This is a deprecated method
#call but "special" adaptors can be registered via this method. We must
#consult here 1st to find the possible special adaptor
if(!$do_not_check_db && $dbtype) {
my $db = $registry->get_db($local_db, $dbtype);
if($db) {
# If we got a return then use this DBAdaptor's species name, group and the given object type.
# Special adaptors can have different species names
$adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
}
else {
#Otherwise just use the current species, dbtype and object type
$adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
}
}
# Otherwise our query group is the one attached to the current adaptor
else {
#If not set use the group attached to the local adaptor
$dbtype ||= $local_db->group();
$adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
}
return $adaptor if $adaptor;
warning("No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
return;
}
=head2 get_all_VariationFeatures
Args [1] : (optional) ArrayRef $so_terms
SequenceOntology terms to limit the fetch to
Args [2] : (optional) boolean $without_children
Do not query using the children of the given SO terms
i.e. query using the given terms directly
Args [3] : (optional) ArrayRef $included_so
ArrayRef of SequenceOntology which should be queried for
without children. This argument allows you to combine SO terms with children
from argument 1 with extra non-child SO terms. e.g. you wish to query for
all protein_altering_variant (specified in argument 1) variations which
would be defined by child SO terms but also wanted stop_retained_variant linked variations
defined by this argument
Args [4] : (optional) string $dbtype
The dbtype of variation to obtain (i.e. can be different from the "variation" type).
This assumes that the extra db has been added to the DBAdaptor under this name (using the
DBConnection::add_db_adaptor method).
Description : Returns all germline variation features on this slice. This function will
only work correctly if the variation database has been attached to the core
database.
If $so_terms is specified, only variation features with a consequence type
that matches or is an ontological child of any of the supplied terms will
be returned
ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
Exceptions : none
Caller : contigview, snpview
Status : Stable
=cut
sub get_all_VariationFeatures{
my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
}
return [];
lib/Bio/EnsEMBL/Slice.pm view on Meta::CPAN
Exceptions : none
Caller : general
Status : Stable
=cut
sub get_all_Transcripts_by_source {
my ($self, $source, $load_exons) = @_;
return $self->get_all_Transcripts($load_exons, undef, undef, $source);
}
=head2 get_all_Exons
Arg [1] : none
Example : @exons = @{$slice->get_all_Exons};
Description: Gets all exons which overlap this slice. Note that these exons
will not be associated with any transcripts, so this may not
be terribly useful.
Returntype : reference to a list of Bio::EnsEMBL::Exons
Exceptions : none
Caller : general
Status : Stable
=cut
sub get_all_Exons {
my $self = shift;
if(!$self->adaptor()) {
warning('Cannot get Exons without attached adaptor');
return [];
}
return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
}
=head2 get_all_KaryotypeBands
Arg [1] : none
Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
Description: Retrieves the karyotype bands which this slice overlaps.
Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
Exceptions : none
Caller : general, contigview
Status : Stable
=cut
sub get_all_KaryotypeBands {
my ($self) = @_;
if (my $adaptor = $self->_get_CoreAdaptor('KaryotypeBand')) {
return $adaptor->fetch_all_by_Slice($self);
}
return [];
}
=head2 get_repeatmasked_seq
Arg [1] : listref of strings $logic_names (optional)
Arg [2] : int $soft_masking_enable (optional)
Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
The values are 0 or 1 for hard and soft masking respectively
The keys of the hash should be of 2 forms
"repeat_class_" . $repeat_consensus->repeat_class,
e.g. "repeat_class_SINE/MIR"
"repeat_name_" . $repeat_consensus->name
e.g. "repeat_name_MIR"
depending on which base you want to apply the not default
masking either the repeat_class or repeat_name. Both can be
specified in the same hash at the same time, but in that case,
repeat_name setting has priority over repeat_class. For example,
you may have hard masking as default, and you may want soft
masking of all repeat_class SINE/MIR, but repeat_name AluSp
(which are also from repeat_class SINE/MIR).
Your hash will be something like {"repeat_class_SINE/MIR" => 1,
"repeat_name_AluSp" => 0}
Example : $rm_slice = $slice->get_repeatmasked_seq();
$softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
masked sequence instead of the regular sequence.
Sequence returned by this new slice will have repeat regions
hardmasked by default (sequence replaced by N) or
or soft-masked when arg[2] = 1 (sequence in lowercase)
Will only work with database connection to get repeat features.
Returntype : Bio::EnsEMBL::RepeatMaskedSlice
Exceptions : none
Caller : general
Status : Stable
=cut
sub get_repeatmasked_seq {
my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
return Bio::EnsEMBL::RepeatMaskedSlice->new
(-START => $self->{'start'},
-END => $self->{'end'},
-STRAND => $self->{'strand'},
-ADAPTOR => $self->adaptor(),
-SEQ => $self->{'seq'},
-SEQ_REGION_NAME => $self->{'seq_region_name'},
-SEQ_REGION_LENGTH => $self->{'seq_region_length'},
-COORD_SYSTEM => $self->{'coord_system'},
-REPEAT_MASK => $logic_names,
-SOFT_MASK => $soft_mask,
-NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
}
=head2 _mask_features
Arg [1] : reference to a string $dnaref
Arg [2] : array_ref $repeats
reference to a list Bio::EnsEMBL::RepeatFeature
give the list of coordinates to replace with N or with
lower case
Arg [3] : int $soft_masking_enable (optional)
Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
The values are 0 or 1 for hard and soft masking respectively
The keys of the hash should be of 2 forms
"repeat_class_" . $repeat_consensus->repeat_class,
e.g. "repeat_class_SINE/MIR"
"repeat_name_" . $repeat_consensus->name
e.g. "repeat_name_MIR"
depending on which base you want to apply the not default masking either
the repeat_class or repeat_name. Both can be specified in the same hash
at the same time, but in that case, repeat_name setting has priority over
repeat_class. For example, you may have hard masking as default, and
you may want soft masking of all repeat_class SINE/MIR,
but repeat_name AluSp (which are also from repeat_class SINE/MIR).
Your hash will be something like {"repeat_class_SINE/MIR" => 1,
"repeat_name_AluSp" => 0}
Example : none
Description: replaces string positions described in the RepeatFeatures
with Ns (default setting), or with the lower case equivalent
(soft masking). The reference to a dna string which is passed
is changed in place.
Returntype : none
Exceptions : none
Caller : seq
Status : Stable
=cut
sub _mask_features {
my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
$soft_mask = 0 unless (defined $soft_mask);
$not_default_masking_cases = {} unless (defined $not_default_masking_cases);
# explicit CORE::length call, to avoid any confusion with the Slice
# length method
my $dnalen = CORE::length($$dnaref);
REP:foreach my $old_f (@{$repeats}) {
my $f = $old_f->transfer( $self );
my $start = $f->start;
my $end = $f->end;
my $length = ($end - $start) + 1;
# check if we get repeat completely outside of expected slice range
if ($end < 1 || $start > $dnalen) {
# warning("Unexpected: Repeat completely outside slice coordinates.");
next REP;
}
# repeat partly outside slice range, so correct
# the repeat start and length to the slice size if needed
if ($start < 1) {
$start = 1;
$length = ($end - $start) + 1;
}
# repeat partly outside slice range, so correct
# the repeat end and length to the slice size if needed
if ($end > $dnalen) {
$end = $dnalen;
$length = ($end - $start) + 1;
}
$start--;
my $padstr;
# if we decide to define masking on the base of the repeat_type, we'll need
# to add the following, and the other commented line few lines below.
my $rc_class;
my $rc_name;
if ($f->isa('Bio::EnsEMBL::RepeatFeature')) {
$rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
$rc_name = "repeat_name_" . $f->repeat_consensus->name;
}
my $masking_type;
$masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
$masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
$masking_type = $soft_mask unless (defined $masking_type);
if ($masking_type) {
$padstr = lc substr ($$dnaref,$start,$length);
} else {
$padstr = 'N' x $length;
}
substr ($$dnaref,$start,$length) = $padstr;
}
}
=head2 get_all_SearchFeatures
Arg [1] : scalar $ticket_ids
Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
Description: Retrieves all search features for stored blast
results for the ticket that overlap this slice
Returntype : listref of Bio::EnsEMBL::SeqFeatures
Exceptions : none
Caller : general (webby!)
Status : Stable
=cut
sub get_all_SearchFeatures {
my $self = shift;
my $ticket = shift;
local $_;
unless($ticket) {
throw("ticket argument is required");
}
if(!$self->adaptor()) {
warning("Cannot get SearchFeatures without an attached adaptor");
return [];
}
my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
my $offset = $self->start-1;
my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
foreach( @$features ) {
$_->start( $_->start - $offset );
$_->end( $_->end - $offset );
};
return $features;
}
=head2 get_all_AssemblyExceptionFeatures
Example : $slice->get_all_AssemblyExceptionFeatures();
Description: Retrieves all misc features which overlap this slice. If
a set code is provided only features which are members of
the requested set are returned.
Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
( run in 1.226 second using v1.01-cache-2.11-cpan-98e64b0badf )