BioPerl

 view release on metacpan or  search on metacpan

Bio/SeqFeature/Tools/TypeMapper.pm  view on Meta::CPAN

dgg: separated out FT_SO_map for caller changes. Update with:

  open(FTSO,"curl -s http://sequenceontology.org/resources/mapping/FT_SO.txt|");
  while(<FTSO>){
    chomp; ($ft,$so,$sid,$ftdef,$sodef)= split"\t";
    print "     '$ft' => '$so',\n" if($ft && $so && $ftdef);             
  }

=cut

sub FT_SO_map  {
  # $self= shift;
  # note : some of the ft_so mappings are commented out and overriden...
    return {
	"-" => ["located_sequence_feature", "so:0000110"],
	"-10_signal" => ["minus_10_signal", "so:0000175"],
	"-35_signal" => ["minus_35_signal", "so:0000176"],
	"3'utr" => ["three_prime_utr", "so:0000205"],
	"3'clip" => ["three_prime_clip", "so:0000557"],
	"5'utr" => ["five_prime_utr", "so:0000204"],
	"5'clip" => ["five_prime_clip", "so:0000555"],
	"caat_signal" => ["caat_signal", "so:0000172"],
	"cds" => ["cds", "so:0000316"],
	"c_region" => ["undefined", ""],
	"d-loop" => ["d_loop", "so:0000297"],
	"d_segment" => ["d_gene", "so:0000458"],
	"gc_signal" => ["gc_rich_region", "so:0000173"],
	"j_segment" => ["undefined", ""],
	"ltr" => ["long_terminal_repeat", "so:0000286"],
	"n_region" => ["undefined", ""],
	"rbs" => ["ribosome_entry_site", "so:0000139"],
	"sts" => ["sts", "so:0000331"],
	"s_region" => ["undefined", ""],
	"tata_signal" => ["tata_box", "so:0000174"],
	"v_region" => ["undefined", ""],
	"v_segment" => ["undefined", ""],
	"attenuator" => ["attenuator", "so:0000140"],
	"conflict" => ["undefined", ""],
	"enhancer" => ["enhancer", "so:0000165"],
	"exon" => ["exon", "so:0000147"],
	"gap" => ["gap", "so:0000730"],
	"gene" => ["gene", "so:0000704"],
	"idna" => ["idna", "so:0000723"],
	"intron" => ["intron", "so:0000188"],
	"mRNA" => ["mRNA", "so:0000234"],
	"mat_peptide" => ["mature_protein_region", "so:0000419"],
	"mature_peptide" => ["mature_protein_region", "so:0000419"],
	#"misc_RNA" => ["transcript", "so:0000673"],
	"misc_binding" => ["binding_site", "so:0000409"],
	"misc_difference" => ["sequence_difference", "so:0000413"],
	"misc_feature" => ["region", undef],
	"misc_recomb" => ["recombination_feature", "so:0000298"],
	"misc_signal" => ["regulatory_region", "so:0005836"],
	"misc_structure" => ["sequence_secondary_structure", "so:0000002"],
	"modified_base" => ["modified_base_site", "so:0000305"],
	"old_sequence" => ["undefined", ""],
	"operon" => ["operon", "so:0000178"],
	"oriT" => ["origin_of_transfer", "so:0000724"],
	"polya_signal" => ["polyA_signal_sequence", "so:0000551"],
	"polya_site" => ["polyA_site", "so:0000553"],
	"precursor_RNA" => ["primary_transcript", "so:0000185"],
	"prim_transcript" => ["primary_transcript", "so:0000185"],
	"primer_bind" => ["primer_binding_site", "so:0005850"],
	"promoter" => ["promoter", "so:0000167"],
	"protein_bind" => ["protein_binding_site", "so:0000410"],
	"rRNA" => ["rRNA", "so:0000252"],
	"repeat_region" => ["repeat_region", "so:0000657"],
	"repeat_unit" => ["repeat_unit", "so:0000726"],
	"satellite" => ["satellite_dna", "so:0000005"],
	"scRNA" => ["scRNA", "so:0000013"],
	"sig_peptide" => ["signal_peptide", "so:0000418"],
	"snRNA" => ["snRNA", "so:0000274"],
	"snoRNA" => ["snoRNA", "so:0000275"],
	#"source" => ["databank_entry", "so:2000061"],
	"stem_loop" => ["stem_loop", "so:0000313"],
	"tRNA" => ["tRNA", "so:0000253"],
	"terminator" => ["terminator", "so:0000141"],
	"transit_peptide" => ["transit_peptide", "so:0000725"],
	"unsure" => "undefined",
	"variation" => ["sequence_variant", "so:0000109"],

	# manually added 
	## has parent = pseudogene ; dgg
	"pseudomRNA" => ["pseudogenic_transcript", "so:0000516"],
	## from unflattener misc_rna ; dgg
	"pseudotranscript" => ["pseudogenic_transcript", "so:0000516"],
	"pseudoexon" => ["pseudogenic_exon", "so:0000507"],
	"pseudoCDS" => ["pseudogenic_exon", "so:0000507"],
	"pseudomisc_feature" => ["pseudogenic_region", "so:0000462"],
	"pseudointron" => ["pseudogenic_region", "so:0000462"],


	## "undefined" => "region",

	# this is the most generic form for rnas;
	# we always represent the processed form of
	# the transcript
	misc_RNA => ['mature_transcript',"so:0000233"],

	# not sure about this one...
	source=>['contig', "SO:0000149"],

	rep_origin=>['origin_of_replication',"SO:0000296"],

	Protein=>['polypeptide',"SO:0000104"],
    };
#  return {
     #"FT term" => "SO term",
     #"-" => "located_sequence_feature",
     #"-10_signal" => "minus_10_signal",
     #"-35_signal" => "minus_35_signal",
     #"3'UTR" => "three_prime_UTR",
     #"3'clip" => "three_prime_clip",
     #"5'UTR" => "five_prime_UTR",
     #"5'clip" => "five_prime_clip",
     #"CAAT_signal" => "CAAT_signal",
     #"CDS" => "CDS",
     #"C_region" => "undefined",
     #"D-loop" => "D_loop",
     #"D_segment" => "D_gene",
     #"GC_signal" => "GC_rich_region",
     #"J_segment" => "undefined",
     #"LTR" => "long_terminal_repeat",
     #"N_region" => "undefined",
     #"RBS" => "ribosome_entry_site",
     #"STS" => "STS",
     #"S_region" => "undefined",
     #"TATA_signal" => "TATA_box",
     #"V_region" => "undefined",
     #"V_segment" => "undefined",
     #"attenuator" => "attenuator",
     #"conflict" => "undefined",
     #"enhancer" => "enhancer",
     #"exon" => "exon",
     #"gap" => "gap",
     #"gene" => "gene",
     #"iDNA" => "iDNA",
     #"intron" => "intron",
     #"mRNA" => "mRNA",
     #"mat_peptide" => "mature_protein_region",
     #"mature_peptide" => "mature_protein_region",
##                     "misc_RNA" => "transcript",
     #"misc_binding" => "binding_site",
     #"misc_difference" => "sequence_difference",
     #"misc_feature" => "region",
     #"misc_recomb" => "recombination_feature",
     #"misc_signal" => "regulatory_region",
     #"misc_structure" => "sequence_secondary_structure",
     #"modified_base" => "modified_base_site",
     #"old_sequence" => "undefined",
     #"operon" => "operon",
     #"oriT" => "origin_of_transfer",
     #"polyA_signal" => "polyA_signal_sequence",
     #"polyA_site" => "polyA_site",
     #"precursor_RNA" => "primary_transcript",
     #"prim_transcript" => "primary_transcript",
     #"primer_bind" => "primer_binding_site",
     #"promoter" => "promoter",
     #"protein_bind" => "protein_binding_site",
     #"rRNA" => "rRNA",
     #"repeat_region" => "repeat_region",
     #"repeat_unit" => "repeat_unit",
     #"satellite" => "satellite_DNA",
     #"scRNA" => "scRNA",
     #"sig_peptide" => "signal_peptide",
     #"snRNA" => "snRNA",
     #"snoRNA" => "snoRNA",
##                     "source" => "databank_entry",
     #"stem_loop" => "stem_loop",
     #"tRNA" => "tRNA",
     #"terminator" => "terminator",
     #"transit_peptide" => "transit_peptide",
     #"unsure" => "undefined",
     #"variation" => "sequence_variant",

      #"pseudomRNA" => "pseudogenic_transcript", ## has parent = pseudogene ; dgg
      #"pseudotranscript" => "pseudogenic_transcript", ## from Unflattener misc_RNA ; dgg
      #"pseudoexon" => "pseudogenic_exon",
      #"pseudoCDS"  => "pseudogenic_exon",
      #"pseudomisc_feature" => "pseudogenic_region",
      #"pseudointron" => "pseudogenic_region",
      
      ### "undefined" => "region",

      ## this is the most generic form for RNAs;
      ## we always represent the processed form of
      ## the transcript
      #misc_RNA=>'processed_transcript',
      
      ## not sure about this one...
      #source=>'contig',
      
      #rep_origin=>'origin_of_replication',
      
      #Protein=>'protein',
      #};
}

sub map_types_to_SO{
   my ($self,@args) = @_;

   push(@args, (-type_map=> $self->FT_SO_map() ) );
   return $self->map_types(@args);
}

=head2 get_relationship_type_by_parent_child

 Title   : get_relationship_type_by_parent_child
 Usage   : $type = $tm->get_relationship_type_by_parent_child($parent_sf, $child_sf);
 Usage   : $type = $tm->get_relationship_type_by_parent_child('mRNA', 'protein');
 Function: given two features where the parent contains the child,
           will determine what the relationship between them in
 Example :
 Returns : 
 Args    : parent SeqFeature, child SeqFeature OR



( run in 0.513 second using v1.01-cache-2.11-cpan-39bf76dae61 )