BioPerl
view release on metacpan or search on metacpan
Bio/DB/GFF/Adaptor/biofetch_oracle.pm view on Meta::CPAN
$biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
$biofetch->retrieval_type('tempfile');
$biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
}
my $seq = eval {$biofetch->get_Seq_by_id($acc)} or return;
$self->_load_embl($acc,$seq);
1;
}
sub load_from_file {
my $self = shift;
my $file = shift;
my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';
my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
my $seq = $seqio->next_seq;
$self->_load_embl($seq->accession,$seq);
1;
}
sub _load_embl {
my $self = shift;
my $acc = shift;
my $seq = shift;
my $refclass = $self->refclass;
my $locus = $seq->id;
# begin loading
$self->setup_load();
# first synthesize the entry for the top-level feature
my @aliases;
foreach ($seq->accession,$seq->get_secondary_accessions) {
next if lc($_) eq lc($acc);
push @aliases,[Alias => $_];
}
$self->load_gff_line(
{
ref => $acc,
class => $refclass,
source => 'EMBL',
method => 'origin',
start => 1,
stop => $seq->length,
score => undef,
strand => '.',
phase => '.',
gclass => $self->refclass,
gname => $acc,
tstart => undef,
tstop => undef,
attributes => [[Note => $seq->desc],@aliases],
}
);
# now load each feature in turn
for my $feat ($seq->all_SeqFeatures) {
my $attributes = $self->get_attributes($feat);
my $name = $self->guess_name($attributes);
my $location = $feat->location;
my @segments = map {[$_->start,$_->end,$_->seq_id]}
$location->can('sub_Location') ? $location->sub_Location : $location;
my $type = $feat->primary_tag eq 'CDS' ? 'mRNA' : $feat->primary_tag;
my $parttype = $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;
if ($feat->primary_tag =~ /^(gene|CDS)$/) {
$self->load_gff_line( {
ref => $acc,
class => $refclass,
source => 'EMBL',
method => $type,
start => $location->start,
stop => $location->end,
score => $feat->score || undef,
strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
phase => $feat->frame || '.',
gclass => $name->[0],
gname => $name->[1],
tstart => undef,
tstop => undef,
attributes => $attributes,
}
);
@$attributes = ();
}
for my $segment (@segments) {
$self->load_gff_line( {
ref => $segment->[2] eq $locus ? $acc : $segment->[2],
class => $refclass,
source => 'EMBL',
method => $parttype,
start => $segment->[0],
stop => $segment->[1],
score => $feat->score || undef,
strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
phase => $feat->frame || '.',
gclass => $name->[0],
gname => $name->[1],
tstart => undef,
tstop => undef,
attributes => $attributes,
}
);
}
}
# finish loading
$self->finish_load();
# now load the DNA
$self->load_sequence_string($acc,$seq->seq);
1;
}
sub get_attributes {
my $self = shift;
my $seq = shift;
my @tags = $seq->all_tags or return;
my @result;
foreach my $tag (@tags) {
foreach my $value ($seq->each_tag_value($tag)) {
push @result,[$tag=>$value];
}
}
\@result;
}
sub guess_name {
my $self = shift;
my $attributes = shift;
# remove this fix when Lincoln fixes it properly
return ["Misc" => "Misc"] unless ($attributes); # these are arbitrary, and possibly destructive defaults
my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
my $best = pop @ordered_attributes;
@$attributes = @ordered_attributes;
return $best;
}
1;
( run in 1.054 second using v1.01-cache-2.11-cpan-39bf76dae61 )