BioPerl

 view release on metacpan or  search on metacpan

Bio/DB/GFF/Adaptor/biofetch_oracle.pm  view on Meta::CPAN

    $biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
    $biofetch->retrieval_type('tempfile');
    $biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
  }

  my $seq  = eval {$biofetch->get_Seq_by_id($acc)} or return;
  $self->_load_embl($acc,$seq);
  1;
}

sub load_from_file {
  my $self = shift;
  my $file = shift;

  my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';

  my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
  my $seq   = $seqio->next_seq;

  $self->_load_embl($seq->accession,$seq);
  1;
}

sub _load_embl {
  my $self = shift;
  my $acc  = shift;
  my $seq  = shift;
  my $refclass = $self->refclass;
  my $locus    = $seq->id;

  # begin loading
  $self->setup_load();

  # first synthesize the entry for the top-level feature
  my @aliases;
  foreach ($seq->accession,$seq->get_secondary_accessions) {
    next if lc($_) eq lc($acc);
    push @aliases,[Alias => $_];
  }
  $self->load_gff_line(
		       {
			ref    => $acc,
			class  => $refclass,
			source => 'EMBL',
			method => 'origin',
			start  => 1,
			stop   => $seq->length,
			score  => undef,
			strand => '.',
			phase  => '.',
			gclass => $self->refclass,
			gname  => $acc,
			tstart => undef,
			tstop  => undef,
			attributes  => [[Note => $seq->desc],@aliases],
		       }
		      );
  # now load each feature in turn
  for my $feat ($seq->all_SeqFeatures) {
    my $attributes = $self->get_attributes($feat);
    my $name       = $self->guess_name($attributes);

    my $location = $feat->location;
    my @segments = map {[$_->start,$_->end,$_->seq_id]}
      $location->can('sub_Location') ? $location->sub_Location : $location;

    my $type     =  $feat->primary_tag eq 'CDS' ? 'mRNA'  : $feat->primary_tag;
    my $parttype =  $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;

    if ($feat->primary_tag =~ /^(gene|CDS)$/) {
      $self->load_gff_line( {
			     ref    => $acc,
			     class  => $refclass,
			     source => 'EMBL',
			     method => $type,
			     start  => $location->start,
			     stop   => $location->end,
			     score  => $feat->score || undef,
			     strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
			     phase  => $feat->frame || '.',
			     gclass => $name->[0],
			     gname  => $name->[1],
			     tstart => undef,
			     tstop  => undef,
			     attributes  => $attributes,
			    }
			  );
      @$attributes = ();
    }

    for my $segment (@segments) {

      $self->load_gff_line( {
			     ref    => $segment->[2] eq $locus ? $acc : $segment->[2],
			     class  => $refclass,
			     source => 'EMBL',
			     method => $parttype,
			     start  => $segment->[0],
			     stop   => $segment->[1],
			     score  => $feat->score || undef,
			     strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
			     phase  => $feat->frame || '.',
			     gclass => $name->[0],
			     gname  => $name->[1],
			     tstart => undef,
			     tstop  => undef,
			     attributes  => $attributes,
			    }
			  );
    }

  }

  # finish loading
  $self->finish_load();

  # now load the DNA
  $self->load_sequence_string($acc,$seq->seq);

  1;
}

sub get_attributes {
  my $self = shift;
  my $seq  = shift;

  my @tags = $seq->all_tags or return;
  my @result;
  foreach my $tag (@tags) {
    foreach my $value ($seq->each_tag_value($tag)) {
      push @result,[$tag=>$value];
    }
  }
  \@result;
}

sub guess_name {
  my $self = shift;
  my $attributes = shift;
# remove this fix when Lincoln fixes it properly
  return ["Misc" => "Misc"] unless ($attributes);  # these are arbitrary, and possibly destructive defaults
  my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
  my $best = pop @ordered_attributes;
  @$attributes = @ordered_attributes;
  return $best;
}



1;



( run in 1.054 second using v1.01-cache-2.11-cpan-39bf76dae61 )