Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/Slice.pm  view on Meta::CPAN

    Exceptions  : none
    Caller      : contigview, snpview
     Status     : Stable

=cut

sub get_all_LD_values {
  my $self = shift;
  my $population = shift;

  my $ld_adaptor = $self->_get_VariationAdaptor('LDFeatureContainer');
  if($ld_adaptor) {
    return $ld_adaptor->fetch_by_Slice($self,$population);
  }
  return [];
}

=head2 _get_VariationFeatureAdaptor

Shortcut method here because VariationFeature is an often requested
adaptor type.

=cut

sub _get_VariationFeatureAdaptor {
  my ($self, $dbtype) = @_;
  return $self->_get_VariationAdaptor('VariationFeature', $dbtype);
}

=head2 _get_StructuralVariationFeatureAdaptor

Shortcut method here because StructuralVariationFeature is an often requested
adaptor type.

=cut

sub _get_StructuralVariationFeatureAdaptor {
  my ($self, $dbtype) = @_;
  return $self->_get_VariationAdaptor('StructuralVariationFeature', $dbtype);
}

=head2 _get_VariationAdaptor

  Arg  [1]    : String object_type to retrieve an adaptor for
  Arg  [2]    : String dbtype to search for the given adaptor in. Defaults to variation
  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
                it will return nothing if the adaptor was not found
  ReturnType  : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to variation)
  Exceptions  : none

=cut

sub _get_VariationAdaptor {
  my ($self, $object_type, $dbtype) = @_;
  # very important to do this defaulting since we *have* to assume the variation
  # database is called 'variation'. Using the current group will not work because
  # that will be something like 'core' (most likely), 'ensembl' or 'vega'.
  $dbtype ||= 'variation';

  # Also we do not care about Registry->get_db() for variation DBs
  my $do_not_check_db = 1;
  
  return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
}

=head2 _get_CoreAdaptor

  Arg  [1]    : String object_type to retrieve an adaptor for
  Arg  [2]    : String dbtype to search for the given adaptor in. Defaults to core
  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
                it will return nothing if the adaptor was not found
  ReturnType  : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to core-like dbs)
  Exceptions  : none

=cut

sub _get_CoreAdaptor {
  my ($self, $object_type, $dbtype) = @_;
  #Simple pass through
  return $self->_get_Adaptor($object_type, $dbtype);
}

=head2 _get_Adaptor

  Arg  [1]    : String object_type to retrieve an adaptor for
  Arg  [2]    : String dbtype to search for the given adaptor in
  Arg  [3]    : Boolean Turn off the checking of Registry->get_db() for your 
                adaptor.
  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
                it will return nothing if the adaptor was not found. We consult the 
                "special" adaptors held by Bio::EnsEMBL::Registry::get_db() method and then
                fall back to the normal methods of finding an adaptor.

                This method will warn when adaptors are missing but will never through an
                exception. It is up to the calling code to decide how to handle the unavailablity
                of an adaptor.
  ReturnType  : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
  Exceptions  : none

=cut

sub _get_Adaptor {
  my ($self, $object_type, $dbtype, $do_not_check_db) = @_;

  if(!$object_type) {
    warning('Object type is a required parameter');
    return;
  }

  my $adaptor = $self->adaptor();
    
  if(!$adaptor) {
    warning("Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
    return;
  }

  my $local_db = $adaptor->db();
  my $species = $local_db->species();

  #First we query for the DBAdaptor using get_db(). This is a deprecated method
  #call but "special" adaptors can be registered via this method. We must
  #consult here 1st to find the possible special adaptor
  if(!$do_not_check_db && $dbtype) {
    my $db = $registry->get_db($local_db, $dbtype);
    if($db) {
      # If we got a return then use this DBAdaptor's species name, group and the given object type.
      # Special adaptors can have different species names
      $adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
    }
    else {
      #Otherwise just use the current species, dbtype and object type
      $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
    }
  }
  # Otherwise our query group is the one attached to the current adaptor
  else {
    #If not set use the group attached to the local adaptor 
    $dbtype ||= $local_db->group();
    $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
  }
  return $adaptor if $adaptor;

  warning("No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
  return;
}

=head2 get_all_VariationFeatures

    Args [1]    : (optional) ArrayRef $so_terms
                  SequenceOntology terms to limit the fetch to
    Args [2]    : (optional) boolean $without_children
                  Do not query using the children of the given SO terms 
                  i.e. query using the given terms directly
    Args [3]    : (optional) ArrayRef $included_so 
                  ArrayRef of SequenceOntology which should be queried for
                  without children. This argument allows you to combine SO terms with children
                  from argument 1 with extra non-child SO terms. e.g. you wish to query for
                  all protein_altering_variant (specified in argument 1) variations which 
                  would be defined by child SO terms but also wanted stop_retained_variant linked variations
                  defined by this argument
    Args [4]    : (optional) string $dbtype
                  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
                  This assumes that the extra db has been added to the DBAdaptor under this name (using the
                  DBConnection::add_db_adaptor method).
    Description : Returns all germline variation features on this slice. This function will 
                  only work correctly if the variation database has been attached to the core 
                  database.
                  If $so_terms is specified, only variation features with a consequence type
                  that matches or is an ontological child of any of the supplied terms will
                  be returned
    ReturnType  : listref of Bio::EnsEMBL::Variation::VariationFeature
    Exceptions  : none
    Caller      : contigview, snpview
    Status      : Stable

=cut

sub get_all_VariationFeatures{
  my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
    return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
  }
  return [];

lib/Bio/EnsEMBL/Slice.pm  view on Meta::CPAN

  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub get_all_Transcripts_by_source {
  my ($self, $source, $load_exons) = @_;
  return $self->get_all_Transcripts($load_exons, undef, undef, $source);

}


=head2 get_all_Exons

  Arg [1]    : none
  Example    : @exons = @{$slice->get_all_Exons};
  Description: Gets all exons which overlap this slice.  Note that these exons
               will not be associated with any transcripts, so this may not
               be terribly useful.
  Returntype : reference to a list of Bio::EnsEMBL::Exons
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub get_all_Exons {
  my $self = shift;
  if(!$self->adaptor()) {
    warning('Cannot get Exons without attached adaptor');
    return [];
  }
  return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
}

=head2 get_all_KaryotypeBands

  Arg [1]    : none
  Example    : @kary_bands = @{$slice->get_all_KaryotypeBands};
  Description: Retrieves the karyotype bands which this slice overlaps.
  Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
  Exceptions : none
  Caller     : general, contigview
  Status     : Stable

=cut

sub get_all_KaryotypeBands {
  my ($self) = @_;
  if (my $adaptor = $self->_get_CoreAdaptor('KaryotypeBand')) {
    return $adaptor->fetch_all_by_Slice($self);
  }
  return [];
}

=head2 get_repeatmasked_seq

  Arg [1]    : listref of strings $logic_names (optional)
  Arg [2]    : int $soft_masking_enable (optional)
  Arg [3]    : hash reference $not_default_masking_cases (optional, default is {})
               The values are 0 or 1 for hard and soft masking respectively
               The keys of the hash should be of 2 forms
               "repeat_class_" . $repeat_consensus->repeat_class,
                e.g. "repeat_class_SINE/MIR"
               "repeat_name_" . $repeat_consensus->name
                e.g. "repeat_name_MIR"
               depending on which base you want to apply the not default
               masking either the repeat_class or repeat_name. Both can be
               specified in the same hash at the same time, but in that case,
               repeat_name setting has priority over repeat_class. For example,
               you may have hard masking as default, and you may want soft
               masking of all repeat_class SINE/MIR, but repeat_name AluSp
               (which are also from repeat_class SINE/MIR).
               Your hash will be something like {"repeat_class_SINE/MIR" => 1,
                                                 "repeat_name_AluSp" => 0}
  Example    : $rm_slice = $slice->get_repeatmasked_seq();
               $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
  Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
               masked sequence instead of the regular sequence.
               Sequence returned by this new slice will have repeat regions
               hardmasked by default (sequence replaced by N) or
               or soft-masked when arg[2] = 1 (sequence in lowercase)
               Will only work with database connection to get repeat features.
  Returntype : Bio::EnsEMBL::RepeatMaskedSlice
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub get_repeatmasked_seq {
    my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;

    return Bio::EnsEMBL::RepeatMaskedSlice->new
      (-START   => $self->{'start'},
       -END     => $self->{'end'},
       -STRAND  => $self->{'strand'},
       -ADAPTOR => $self->adaptor(),
       -SEQ     => $self->{'seq'},
       -SEQ_REGION_NAME => $self->{'seq_region_name'},
       -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
       -COORD_SYSTEM    => $self->{'coord_system'},
       -REPEAT_MASK     => $logic_names,
       -SOFT_MASK       => $soft_mask,
       -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
}



=head2 _mask_features

  Arg [1]    : reference to a string $dnaref
  Arg [2]    : array_ref $repeats
               reference to a list Bio::EnsEMBL::RepeatFeature
               give the list of coordinates to replace with N or with
               lower case
  Arg [3]    : int $soft_masking_enable (optional)
  Arg [4]    : hash reference $not_default_masking_cases (optional, default is {})
               The values are 0 or 1 for hard and soft masking respectively
               The keys of the hash should be of 2 forms
               "repeat_class_" . $repeat_consensus->repeat_class,
                e.g. "repeat_class_SINE/MIR"
               "repeat_name_" . $repeat_consensus->name
                e.g. "repeat_name_MIR"
               depending on which base you want to apply the not default masking either
               the repeat_class or repeat_name. Both can be specified in the same hash
               at the same time, but in that case, repeat_name setting has priority over
               repeat_class. For example, you may have hard masking as default, and
               you may want soft masking of all repeat_class SINE/MIR,
               but repeat_name AluSp (which are also from repeat_class SINE/MIR).
               Your hash will be something like {"repeat_class_SINE/MIR" => 1,
                                                 "repeat_name_AluSp" => 0}
  Example    : none
  Description: replaces string positions described in the RepeatFeatures
               with Ns (default setting), or with the lower case equivalent
               (soft masking).  The reference to a dna string which is passed
               is changed in place.
  Returntype : none
  Exceptions : none
  Caller     : seq
  Status     : Stable

=cut

sub _mask_features {
  my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;

  $soft_mask = 0 unless (defined $soft_mask);
  $not_default_masking_cases = {} unless (defined $not_default_masking_cases);

  # explicit CORE::length call, to avoid any confusion with the Slice
  # length method
  my $dnalen = CORE::length($$dnaref);

 REP:foreach my $old_f (@{$repeats}) {
    my $f = $old_f->transfer( $self );
    my $start  = $f->start;
    my $end    = $f->end;
    my $length = ($end - $start) + 1;

    # check if we get repeat completely outside of expected slice range
    if ($end < 1 || $start > $dnalen) {
      # warning("Unexpected: Repeat completely outside slice coordinates.");
      next REP;
    }

    # repeat partly outside slice range, so correct
    # the repeat start and length to the slice size if needed
    if ($start < 1) {
      $start = 1;
      $length = ($end - $start) + 1;
    }

    # repeat partly outside slice range, so correct
    # the repeat end and length to the slice size if needed
    if ($end > $dnalen) {
      $end = $dnalen;
      $length = ($end - $start) + 1;
    }

    $start--;

    my $padstr;
    # if we decide to define masking on the base of the repeat_type, we'll need
    # to add the following, and the other commented line few lines below.
    my $rc_class;
    my $rc_name;

    if ($f->isa('Bio::EnsEMBL::RepeatFeature')) {
      $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
      $rc_name = "repeat_name_" . $f->repeat_consensus->name;
    }

    my $masking_type;
    $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
    $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});

    $masking_type = $soft_mask unless (defined $masking_type);

    if ($masking_type) {
      $padstr = lc substr ($$dnaref,$start,$length);
    } else {
      $padstr = 'N' x $length;
    }
    substr ($$dnaref,$start,$length) = $padstr;
  }
}


=head2 get_all_SearchFeatures

  Arg [1]    : scalar $ticket_ids
  Example    : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
  Description: Retrieves all search features for stored blast
               results for the ticket that overlap this slice
  Returntype : listref of Bio::EnsEMBL::SeqFeatures
  Exceptions : none
  Caller     : general (webby!)
  Status     : Stable

=cut

sub get_all_SearchFeatures {
  my $self = shift;
  my $ticket = shift;
  local $_;
  unless($ticket) {
    throw("ticket argument is required");
  }

  if(!$self->adaptor()) {
    warning("Cannot get SearchFeatures without an attached adaptor");
    return [];
  }

  my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');

  my $offset = $self->start-1;

  my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];

  foreach( @$features ) {
    $_->start( $_->start - $offset );
    $_->end(   $_->end   - $offset );
  };
  return $features;

}

=head2 get_all_AssemblyExceptionFeatures

  Example    : $slice->get_all_AssemblyExceptionFeatures();
  Description: Retrieves all misc features which overlap this slice. If
               a set code is provided only features which are members of
               the requested set are returned.
  Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures



( run in 1.226 second using v1.01-cache-2.11-cpan-98e64b0badf )