Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/ChainedAssemblyMapper.pm  view on Meta::CPAN


Bio::EnsEMBL::ChainedAssemblyMapper -
Handles mapping between two coordinate systems using the information
stored in the assembly table

=head1 SYNOPSIS

    $db   = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
    $asma = $db->get_AssemblyMapperAdaptor();
    $csa  = $db->get_CoordSystemAdaptor();

    my $chr_cs = $cs_adaptor->fetch_by_name( 'chromosome', 'NCBI33' );
    my $cln_cs = $cs_adaptor->fetch_by_name('clone');

    $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );

    # Map to contig coordinate system from chromosomal
    @cln_coords =
      $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, $chr_cs );

    # Map to chromosome coordinate system from contig
    @chr_coords =
      $asm_mapper->map( 'AL30421.1', 100, 10000, -1, $cln_cs );

    # List contig names for a region of chromsome
    @cln_ids = $asm_mapper->list_ids( '13', 1_000_000, 1, $chr_cs );

    # List chromosome names for a contig region
    @chr_ids =
      $asm_mapper->list_ids( 'AL30421.1', 1, 1000, -1, $cln_cs );

=head1 DESCRIPTION

The ChainedAssemblyMapper is an extension of the regular AssemblyMapper
that allows for mappings between coordinate systems that require
multi-step mapping.  For example if explicit mappings are defined
between the following coordinate systems,

  chromosome <-> contig
  contig     <-> clone

the ChainedAssemblyMapper would be able to perform implicit mapping
between the chromosome and clone coordinate systems.  This should be
transparent to the user of this module, and users should not even
realise that they are using a chained assembly mapper as opposed to a
normal assembly mapper.

=head1 METHODS

=cut

package Bio::EnsEMBL::ChainedAssemblyMapper;
$Bio::EnsEMBL::ChainedAssemblyMapper::VERSION = '114.0.0';
use strict;
use warnings;
use integer; #use proper arithmetic bitshifts

use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::Utils::Exception qw(throw);
use Scalar::Util qw(weaken);
use Bio::EnsEMBL::Utils::Scalar qw( check_ref);

my $FIRST = 'first';
my $MIDDLE = 'middle';
my $LAST  = 'last';

#2^20 = approx 10^6
my $CHUNKFACTOR = 20;

# max size of the pair cache in the mappers
my $DEFAULT_MAX_PAIR_COUNT = 6000;

=head2 new

  Arg [1]    : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
  Arg [2]    : Bio::EnsEMBL::CoordSystem $src_cs
  Arg [3]    : Bio::EnsEMBL::CoordSystem $int_cs
  Arg [4]    : Bio::EnsEMBL::CoordSystem $dst_cs
  Example    : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems
  Description: Creates a new AssemblyMapper
  Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
  Exceptions : thrown if wrong number of coord_systems are provided
  Caller     : AssemblyMapperAdaptor
  Status     : Stable

=cut

sub new {
  my ($caller,$adaptor,@coord_systems) = @_;

  my $class = ref($caller) || $caller;

  my $self = {};
  bless $self, $class;

  $self->adaptor($adaptor);

  if(@coord_systems != 3) {
    throw('ChainedMapper can only map between 3 coordinate systems. ' .
          scalar(@coord_systems) . ' were provided');
  }

  $adaptor->cache_seq_ids_with_mult_assemblys();

  # Set the component, intermediate and assembled coordinate systems
  $self->{'first_cs'}   = $coord_systems[0];
  $self->{'mid_cs'}   = $coord_systems[1];
  $self->{'last_cs'}   = $coord_systems[2];

  #maps between first and intermediate coord systems
  $self->{'first_mid_mapper'} = Bio::EnsEMBL::Mapper->new($FIRST, $MIDDLE);

  #maps between last and intermediate
  $self->{'last_mid_mapper'} = Bio::EnsEMBL::Mapper->new($LAST, $MIDDLE);

  #mapper that is actually used and is loaded by the mappings generated
  #by the other two mappers
  $self->{'first_last_mapper'} = Bio::EnsEMBL::Mapper->new($FIRST, $LAST,
                                                           $coord_systems[0],
                                                           $coord_systems[2]);

lib/Bio/EnsEMBL/ChainedAssemblyMapper.pm  view on Meta::CPAN

  Exceptions : none
  Caller     : internal
  Status     : Stable

=cut

sub mapper {
  my $self = shift;
  return $self->first_last_mapper();
}

=head2 assembled_CoordSystem

  Args       : none
  Example    : $coordsys = $cam->assembled_CoordSystem();
  Description: return the first CoordSystem.
  Returntype : Bio::EnsEMBL::CoordSystem
  Exceptions : none
  Caller     : internal
  Status     : Stable

=cut


sub assembled_CoordSystem {
  my $self = shift;
  return $self->{'first_cs'};
}

=head2 component_CoordSystem

  Args       : none
  Example    : $coordsys = $cam->component_CoordSystem();
  Description: return the last CoordSystem.
  Returntype : Bio::EnsEMBL::CoordSystem
  Exceptions : none
  Caller     : internal
  Status     : Stable

=cut

sub component_CoordSystem {
  my $self = shift;
  return $self->{'last_cs'};
}


=head2 adaptor

  Arg [1]    : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor $adaptor
  Description: get/set for this objects database adaptor
  Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub adaptor {
  my $self = shift;
  weaken($self->{'adaptor'} = shift) if(@_);
  return $self->{'adaptor'};
}


1;



( run in 2.097 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )