AcePerl
view release on metacpan or search on metacpan
Ace/Sequence.pm view on Meta::CPAN
last BLOCK unless defined ($refseq);
if (ref($refseq) && ref($refseq) eq 'ARRAY') {
$arrayref = $refseq;
last BLOCK;
}
if (ref($refseq) && ($refseq->can('smapped'))) {
croak "Reference sequence has no common ancestor with sequence"
unless $self->parent eq $refseq->parent;
my ($a,$b,$c) = @{$refseq->{refseq}};
# $b += $refseq->offset;
$b += $refseq->offset;
$arrayref = [$refseq,$b,$refseq->strand];
last BLOCK;
}
# look up reference sequence in database if we aren't given
# database object already
$refseq = $self->db->fetch('Sequence' => $refseq)
unless $refseq->isa('Ace::Object');
croak "Invalid reference sequence" unless $refseq;
# find position of ref sequence in parent strand
my ($r_parent,$r_offset,$r_length,$r_strand) = find_parent($refseq);
croak "Reference sequence has no common ancestor with sequence"
unless $r_parent eq $self->{parent};
# set to array reference containing this information
$arrayref = [$refseq,$r_offset,$r_strand];
}
$self->{refseq} = $arrayref;
}
return unless $prev;
return $self->parent if $self->absolute;
return wantarray ? @{$prev} : $prev->[0];
}
# return strand
sub strand { return $_[0]->{strand} }
# return reference strand
sub r_strand {
my $self = shift;
return "+1" if $self->absolute;
if (my ($ref,$r_offset,$r_strand) = $self->refseq) {
return $r_strand;
} else {
return $self->{strand}
}
}
sub offset { $_[0]->{offset} }
sub p_offset { $_[0]->{p_offset} }
sub smapped { 1; }
sub type { 'Sequence' }
sub subtype { }
sub debug {
my $self = shift;
my $d = $self->{_debug};
$self->{_debug} = shift if @_;
$d;
}
# return the database this sequence is associated with
sub db {
return Ace->name2db($_[0]->{db} ||= $_[0]->source->db);
}
sub start {
my ($self,$abs) = @_;
$abs = $self->absolute unless defined $abs;
return $self->{p_offset} + $self->{offset} + 1 if $abs;
if ($self->refseq) {
my ($ref,$r_offset,$r_strand) = $self->refseq;
return $r_strand < 0 ? 1 + $r_offset - ($self->{p_offset} + $self->{offset})
: 1 + $self->{p_offset} + $self->{offset} - $r_offset;
}
else {
return $self->{offset} +1;
}
}
sub end {
my ($self,$abs) = @_;
my $start = $self->start($abs);
my $f = $self->{length} > 0 ? 1 : -1; # for stupid 1-based adjustments
if ($abs && $self->refseq ne $self->parent) {
my $r_strand = $self->r_strand;
return $start - $self->{length} + $f
if $r_strand < 0 or $self->{strand} < 0 or $self->{length} < 0;
return $start + $self->{length} - $f
}
return $start + $self->{length} - $f if $self->r_strand eq $self->{strand};
return $start - $self->{length} + $f;
}
# turn on absolute coordinates (relative to reference sequence)
sub absolute {
my $self = shift;
my $prev = $self->{absolute};
$self->{absolute} = $_[0] if defined $_[0];
return $prev;
}
# human readable string (for debugging)
sub asString {
my $self = shift;
if ($self->absolute) {
return join '',$self->parent,'/',$self->start,',',$self->end;
} elsif (my $ref = $self->refseq){
my $label = $ref->isa('Ace::Sequence::Feature') ? $ref->info : "$ref";
return join '',$label,'/',$self->start,',',$self->end;
} else {
join '',$self->source,'/',$self->start,',',$self->end;
}
}
sub cmp {
my ($self,$arg,$reversed) = @_;
if (ref($arg) and $arg->isa('Ace::Sequence')) {
my $cmp = $self->parent cmp $arg->parent
|| $self->start <=> $arg->start;
return $reversed ? -$cmp : $cmp;
}
my $name = $self->asString;
return $reversed ? $arg cmp $name : $name cmp $arg;
}
# Return the DNA
sub dna {
my $self = shift;
return $self->{dna} if $self->{dna};
my $raw = $self->_query('seqdna');
$raw=~s/^>.*\n//m;
$raw=~s/^\/\/.*//mg;
$raw=~s/\n//g;
$raw =~ s/\0+\Z//; # blasted nulls!
my $effective_strand = $self->end >= $self->start ? '+1' : '-1';
_complement(\$raw) if $self->r_strand ne $effective_strand;
return $self->{dna} = $raw;
}
# return a gff file
sub gff {
my $self = shift;
my ($abs,$features) = rearrange([['ABS','ABSOLUTE'],'FEATURES'],@_);
$abs = $self->absolute unless defined $abs;
# can provide list of feature names, such as 'similarity', or 'all' to get 'em all
# !THIS IS BROKEN; IT SHOULD LOOK LIKE FEATURE()!
my $opt = $self->_feature_filter($features);
my $gff = $self->_gff($opt);
warn $gff if $self->debug;
$self->transformGFF(\$gff) unless $abs;
return $gff;
}
# return a GFF object using the optional GFF.pm module
sub GFF {
my $self = shift;
my ($filter,$converter) = @_; # anonymous subs
croak "GFF module not installed" unless require GFF;
require GFF::Filehandle;
my @lines = grep !/^\/\//,split "\n",$self->gff(@_);
local *IN;
local ($^W) = 0; # prevent complaint by GFF module
tie *IN,'GFF::Filehandle',\@lines;
my $gff = GFF::GeneFeatureSet->new;
$gff->read(\*IN,$filter,$converter) if $gff;
return $gff;
}
# Get the features table. Can filter by type/subtype this way:
# features('similarity:EST','annotation:assembly_tag')
sub features {
my $self = shift;
my ($filter,$opt) = $self->_make_filter(@_);
# get raw gff file
my $gff = $self->gff(-features=>$opt);
# turn it into a list of features
my @features = $self->_make_features($gff,$filter);
if ($self->automerge) { # automatic merging
# fetch out constructed transcripts and clones
my %types = map {lc($_)=>1} (@$opt,@_);
if ($types{'transcript'}) {
push @features,$self->_make_transcripts(\@features);
@features = grep {$_->type !~ /^(intron|exon)$/ } @features;
}
push @features,$self->_make_clones(\@features) if $types{'clone'};
if ($types{'similarity'}) {
my @f = $self->_make_alignments(\@features);
@features = grep {$_->type ne 'similarity'} @features;
push @features,@f;
}
}
return wantarray ? @features : \@features;
}
# A little bit more complex - assemble a list of "transcripts"
# consisting of Ace::Sequence::Transcript objects. These objects
# contain a list of exons and introns.
sub transcripts {
my $self = shift;
my $curated = shift;
my $ef = $curated ? "exon:curated" : "exon";
my $if = $curated ? "intron:curated" : "intron";
my $sf = $curated ? "Sequence:curated" : "Sequence";
Ace/Sequence.pm view on Meta::CPAN
}
}
$s .= $expr;
}
$s .= "return;\n }";
$sub = eval $s;
croak $@ if $@;
} else {
$sub = sub { 1; }
}
return ($sub,$promiscuous ? [] : [keys %filter]);
}
# turn a GFF file and a filter into a list of Ace::Sequence::Feature objects
sub _make_features {
my $self = shift;
my ($gff,$filter) = @_;
my ($r,$r_offset,$r_strand) = $self->refseq;
my $parent = $self->parent;
my $abs = $self->absolute;
if ($abs) {
$r_offset = 0;
$r = $parent;
$r_strand = '+1';
}
my @features = map {Ace::Sequence::Feature->new($parent,$r,$r_offset,$r_strand,$abs,$_)}
grep !m@^(?:\#|//)@ && $filter->($_),split("\n",$gff);
}
# low level GFF call, no changing absolute to relative coordinates
sub _gff {
my $self = shift;
my ($opt,$db) = @_;
my $data = $self->_query("seqfeatures -version 2 $opt",$db);
$data =~ s/\0+\Z//;
return $data; #blasted nulls!
}
# shortcut for running a gif query
sub _query {
my $self = shift;
my $command = shift;
my $db = shift || $self->db;
my $parent = $self->parent;
my $start = $self->start(1);
my $end = $self->end(1);
($start,$end) = ($end,$start) if $start > $end; #flippity floppity
my $coord = "-coords $start $end";
# BAD BAD HACK ALERT - CHECKS THE QUERY THAT IS PASSED DOWN
# ALSO MAKES THINGS INCOMPATIBLE WITH PRIOR 4.9 servers.
# my $opt = $command =~ /seqfeatures/ ? '-nodna' : '';
my $opt = '-noclip';
my $query = "gif seqget $parent $opt $coord ; $command";
warn $query if $self->debug;
return $db->raw_query("gif seqget $parent $opt $coord ; $command");
}
# utility function -- reverse complement
sub _complement {
my $dna = shift;
$$dna =~ tr/GATCgatc/CTAGctag/;
$$dna = scalar reverse $$dna;
}
sub _feature_filter {
my $self = shift;
my $features = shift;
return '' unless $features;
my $opt = '';
$opt = '+feature ' . join('|',@$features) if ref($features) eq 'ARRAY' && @$features;
$opt = "+feature $features" unless ref $features;
$opt;
}
1;
=head1 NAME
Ace::Sequence - Examine ACeDB Sequence Objects
=head1 SYNOPSIS
# open database connection and get an Ace::Object sequence
use Ace::Sequence;
$db = Ace->connect(-host => 'stein.cshl.org',-port => 200005);
$obj = $db->fetch(Predicted_gene => 'ZK154.3');
# Wrap it in an Ace::Sequence object
$seq = Ace::Sequence->new($obj);
# Find all the exons
@exons = $seq->features('exon');
# Find all the exons predicted by various versions of "genefinder"
@exons = $seq->features('exon:genefinder.*');
# Iterate through the exons, printing their start, end and DNA
for my $exon (@exons) {
print join "\t",$exon->start,$exon->end,$exon->dna,"\n";
}
# Find the region 1000 kb upstream of the first exon
$sub = Ace::Sequence->new(-seq=>$exons[0],
-offset=>-1000,-length=>1000);
# Find all features in that area
@features = $sub->features;
# Print its DNA
print $sub->dna;
# Create a new Sequence object from the first 500 kb of chromosome 1
( run in 0.692 second using v1.01-cache-2.11-cpan-39bf76dae61 )