start-end results from the CPAN

start-end
BioPerl
view release on metacpan or search on metacpan
Bio/LiveSeq/IO/Loader.pm view on Meta::CPAN
  my ($swissacc,$swisshash); my @swisshashes;

  # for translation_tables
  my @ttables;

  # to create labels
  my ($name,$exon);
  my @range; my @cdsexons; my @labels;

  # maybe here also could be added special case when there is no CDS feature
  # in the entry (e.g. tRNA entry -> TOCHECK).
  # let's deal with the special case in which there is just one gene per entry
  # usually without /gene qualifier
  my @cds=@{$entry->{'CDS'}};

  my $skipgenematch=0;
  if (scalar(@cds) == 1) {
    #carp "Note: only one CDS in this entry. Treating all features found in entry as Gene features.";
    $skipgenematch=1;
  }

  my ($cds_begin,$cds_end,$proximity);
  if ($cds_position) { # if a position has been requested
    my @cds_exons=@{$cds[$cds_position-1]->{'range'}};
    ($cds_begin,$cds_end)=($cds_exons[0]->[0],$cds_exons[-1]->[1]); # begin and end of CDS
    $gene_name=$cds[$cds_position-1]->{'qualifiers'}->{'gene'};
    # DEBUG
    unless ($skipgenematch) {
      carp "--DEBUG-- cdsbegin $cds_begin cdsend $cds_end--------";
    }
    $proximity=100; # proximity CONSTANT to decide whether a feature "belongs" to the CDS
  }

  for $entryfeature (@entryfeatures) { # get only features for the desired gene
    if (($skipgenematch)||(($cds_position)&&($self->_checkfeatureproximity($entryfeature->{'range'},$cds_begin,$cds_end,$proximity)))||(!($cds_position)&&($entryfeature->{'qualifiers'}->{'gene'} eq "$gene_name"))) {
      push(@genefeatures,$entryfeature);

      my @range=@{$entryfeature->{'range'}};
      $name=$entryfeature->{'name'};
      my %qualifierhash=%{$entryfeature->{'qualifiers'}};
      if ($name eq "CDS") { # that has range containing array of exons

	# swissprot crossindexing (if without SRS support it will fill array
	# with zeros and do nothing
	if ($getswissprotinfo) {
	  $swissacc=$entryfeature->{'qualifiers'}->{'db_xref'};
	  $swisshash=$self->get_swisshash($swissacc);
	  #$self->printswissprot($swisshash); # DEBUG
	  push (@swisshashes,$swisshash);
	}

	push (@ttables,$entryfeature->{'qualifiers'}->{'transl_table'}); # undef if not specified
	
	# create labels array
	for $exon (@range) {
	  push(@labels,$exon->[0],$exon->[1]); # start and end of every exon of the CDS
	}
	push (@transcripts,$entryfeature->{'range'});
      } else {
	# "simplifying" the joinedlocation features. I.e. changing them from
	# multijoined ones to simple plain start-end features, taking only
	# the start of the first "exon" and the end of the last "exon" as
	# start and end of the entire feature
	if ($entryfeature->{'locationtype'} && $entryfeature->{'locationtype'} eq "joined") { # joined location
	  @range=($range[0]->[0],$range[-1]->[1]);
	}
	push(@labels,$range[0],$range[1]); # start and end of every feature
	if ($name eq "exon") {
	  $desc=$entryfeature->{'qualifiers'}->{'number'};
	  if ($entryfeature->{'qualifiers'}->{'note'}) {
	    if ($desc) {
	      $desc .= "|" . $entryfeature->{'qualifiers'}->{'note'};
	    } else {
	      $desc = $entryfeature->{'qualifiers'}->{'note'};
	    }
	  }
	  push (@exondescs,$desc || "unknown");
	  push(@exons,\@range);
	}
	if ($name eq "intron") {
 	  $desc=$entryfeature->{'qualifiers'}->{'number'};
	  if ($desc) {
	    $desc .= "|" . $entryfeature->{'qualifiers'}->{'note'};
	  } else {
	    $desc = $entryfeature->{'qualifiers'}->{'note'};
	  }
	  push (@introndescs,$desc || "unknown"); 
	  push(@introns,\@range);
	}
	if (($name eq "prim_transcript")||($name eq "mRNA")) { push(@prim_transcripts,\@range); }
	if ($name eq "repeat_unit") { push(@repeat_units,\@range);
	  $rpt_family=$entryfeature->{'qualifiers'}->{'rpt_family'};
	  push (@repeat_units_family,$rpt_family || "unknown");
	}
	if ($name eq "repeat_region") { push(@repeat_regions,\@range);
	  $rpt_family=$entryfeature->{'qualifiers'}->{'rpt_family'};
	  push (@repeat_regions_family,$rpt_family || "unknown");
	}
      }
    }
  }
  unless ($gene_name) { $gene_name="cds-position:".$cds_position; }
  my %genefeatureshash;
  $genefeatureshash{gene_name}=$gene_name;
  $genefeatureshash{genefeatures}=\@genefeatures;
  $genefeatureshash{labels}=\@labels;
  $genefeatureshash{ttables}=\@ttables;
  $genefeatureshash{swisshashes}=\@swisshashes;
  $genefeatureshash{transcripts}=\@transcripts;
  $genefeatureshash{exons}=\@exons;
  $genefeatureshash{exondescs}=\@exondescs;
  $genefeatureshash{introns}=\@introns;
  $genefeatureshash{introndescs}=\@introndescs;
  $genefeatureshash{prim_transcripts}=\@prim_transcripts;
  $genefeatureshash{repeat_units}=\@repeat_units;
  $genefeatureshash{repeat_regions}=\@repeat_regions;
  $genefeatureshash{repeat_units_family}=\@repeat_units_family;
  $genefeatureshash{repeat_regions_family}=\@repeat_regions_family;
  return (\%genefeatureshash);
}
( run in 0.239 second using v1.01-cache-2.11-cpan-eab888a1d7d )