BioPerl
view release on metacpan or search on metacpan
Bio/SeqFeature/Tools/TypeMapper.pm view on Meta::CPAN
dgg: separated out FT_SO_map for caller changes. Update with:
open(FTSO,"curl -s http://sequenceontology.org/resources/mapping/FT_SO.txt|");
while(<FTSO>){
chomp; ($ft,$so,$sid,$ftdef,$sodef)= split"\t";
print " '$ft' => '$so',\n" if($ft && $so && $ftdef);
}
=cut
sub FT_SO_map {
# $self= shift;
# note : some of the ft_so mappings are commented out and overriden...
return {
"-" => ["located_sequence_feature", "so:0000110"],
"-10_signal" => ["minus_10_signal", "so:0000175"],
"-35_signal" => ["minus_35_signal", "so:0000176"],
"3'utr" => ["three_prime_utr", "so:0000205"],
"3'clip" => ["three_prime_clip", "so:0000557"],
"5'utr" => ["five_prime_utr", "so:0000204"],
"5'clip" => ["five_prime_clip", "so:0000555"],
"caat_signal" => ["caat_signal", "so:0000172"],
"cds" => ["cds", "so:0000316"],
"c_region" => ["undefined", ""],
"d-loop" => ["d_loop", "so:0000297"],
"d_segment" => ["d_gene", "so:0000458"],
"gc_signal" => ["gc_rich_region", "so:0000173"],
"j_segment" => ["undefined", ""],
"ltr" => ["long_terminal_repeat", "so:0000286"],
"n_region" => ["undefined", ""],
"rbs" => ["ribosome_entry_site", "so:0000139"],
"sts" => ["sts", "so:0000331"],
"s_region" => ["undefined", ""],
"tata_signal" => ["tata_box", "so:0000174"],
"v_region" => ["undefined", ""],
"v_segment" => ["undefined", ""],
"attenuator" => ["attenuator", "so:0000140"],
"conflict" => ["undefined", ""],
"enhancer" => ["enhancer", "so:0000165"],
"exon" => ["exon", "so:0000147"],
"gap" => ["gap", "so:0000730"],
"gene" => ["gene", "so:0000704"],
"idna" => ["idna", "so:0000723"],
"intron" => ["intron", "so:0000188"],
"mRNA" => ["mRNA", "so:0000234"],
"mat_peptide" => ["mature_protein_region", "so:0000419"],
"mature_peptide" => ["mature_protein_region", "so:0000419"],
#"misc_RNA" => ["transcript", "so:0000673"],
"misc_binding" => ["binding_site", "so:0000409"],
"misc_difference" => ["sequence_difference", "so:0000413"],
"misc_feature" => ["region", undef],
"misc_recomb" => ["recombination_feature", "so:0000298"],
"misc_signal" => ["regulatory_region", "so:0005836"],
"misc_structure" => ["sequence_secondary_structure", "so:0000002"],
"modified_base" => ["modified_base_site", "so:0000305"],
"old_sequence" => ["undefined", ""],
"operon" => ["operon", "so:0000178"],
"oriT" => ["origin_of_transfer", "so:0000724"],
"polya_signal" => ["polyA_signal_sequence", "so:0000551"],
"polya_site" => ["polyA_site", "so:0000553"],
"precursor_RNA" => ["primary_transcript", "so:0000185"],
"prim_transcript" => ["primary_transcript", "so:0000185"],
"primer_bind" => ["primer_binding_site", "so:0005850"],
"promoter" => ["promoter", "so:0000167"],
"protein_bind" => ["protein_binding_site", "so:0000410"],
"rRNA" => ["rRNA", "so:0000252"],
"repeat_region" => ["repeat_region", "so:0000657"],
"repeat_unit" => ["repeat_unit", "so:0000726"],
"satellite" => ["satellite_dna", "so:0000005"],
"scRNA" => ["scRNA", "so:0000013"],
"sig_peptide" => ["signal_peptide", "so:0000418"],
"snRNA" => ["snRNA", "so:0000274"],
"snoRNA" => ["snoRNA", "so:0000275"],
#"source" => ["databank_entry", "so:2000061"],
"stem_loop" => ["stem_loop", "so:0000313"],
"tRNA" => ["tRNA", "so:0000253"],
"terminator" => ["terminator", "so:0000141"],
"transit_peptide" => ["transit_peptide", "so:0000725"],
"unsure" => "undefined",
"variation" => ["sequence_variant", "so:0000109"],
# manually added
## has parent = pseudogene ; dgg
"pseudomRNA" => ["pseudogenic_transcript", "so:0000516"],
## from unflattener misc_rna ; dgg
"pseudotranscript" => ["pseudogenic_transcript", "so:0000516"],
"pseudoexon" => ["pseudogenic_exon", "so:0000507"],
"pseudoCDS" => ["pseudogenic_exon", "so:0000507"],
"pseudomisc_feature" => ["pseudogenic_region", "so:0000462"],
"pseudointron" => ["pseudogenic_region", "so:0000462"],
## "undefined" => "region",
# this is the most generic form for rnas;
# we always represent the processed form of
# the transcript
misc_RNA => ['mature_transcript',"so:0000233"],
# not sure about this one...
source=>['contig', "SO:0000149"],
rep_origin=>['origin_of_replication',"SO:0000296"],
Protein=>['polypeptide',"SO:0000104"],
};
# return {
#"FT term" => "SO term",
#"-" => "located_sequence_feature",
#"-10_signal" => "minus_10_signal",
#"-35_signal" => "minus_35_signal",
#"3'UTR" => "three_prime_UTR",
#"3'clip" => "three_prime_clip",
#"5'UTR" => "five_prime_UTR",
#"5'clip" => "five_prime_clip",
#"CAAT_signal" => "CAAT_signal",
#"CDS" => "CDS",
#"C_region" => "undefined",
#"D-loop" => "D_loop",
#"D_segment" => "D_gene",
#"GC_signal" => "GC_rich_region",
#"J_segment" => "undefined",
#"LTR" => "long_terminal_repeat",
#"N_region" => "undefined",
#"RBS" => "ribosome_entry_site",
#"STS" => "STS",
#"S_region" => "undefined",
#"TATA_signal" => "TATA_box",
#"V_region" => "undefined",
#"V_segment" => "undefined",
#"attenuator" => "attenuator",
#"conflict" => "undefined",
#"enhancer" => "enhancer",
#"exon" => "exon",
#"gap" => "gap",
#"gene" => "gene",
#"iDNA" => "iDNA",
#"intron" => "intron",
#"mRNA" => "mRNA",
#"mat_peptide" => "mature_protein_region",
#"mature_peptide" => "mature_protein_region",
## "misc_RNA" => "transcript",
#"misc_binding" => "binding_site",
#"misc_difference" => "sequence_difference",
#"misc_feature" => "region",
#"misc_recomb" => "recombination_feature",
#"misc_signal" => "regulatory_region",
#"misc_structure" => "sequence_secondary_structure",
#"modified_base" => "modified_base_site",
#"old_sequence" => "undefined",
#"operon" => "operon",
#"oriT" => "origin_of_transfer",
#"polyA_signal" => "polyA_signal_sequence",
#"polyA_site" => "polyA_site",
#"precursor_RNA" => "primary_transcript",
#"prim_transcript" => "primary_transcript",
#"primer_bind" => "primer_binding_site",
#"promoter" => "promoter",
#"protein_bind" => "protein_binding_site",
#"rRNA" => "rRNA",
#"repeat_region" => "repeat_region",
#"repeat_unit" => "repeat_unit",
#"satellite" => "satellite_DNA",
#"scRNA" => "scRNA",
#"sig_peptide" => "signal_peptide",
#"snRNA" => "snRNA",
#"snoRNA" => "snoRNA",
## "source" => "databank_entry",
#"stem_loop" => "stem_loop",
#"tRNA" => "tRNA",
#"terminator" => "terminator",
#"transit_peptide" => "transit_peptide",
#"unsure" => "undefined",
#"variation" => "sequence_variant",
#"pseudomRNA" => "pseudogenic_transcript", ## has parent = pseudogene ; dgg
#"pseudotranscript" => "pseudogenic_transcript", ## from Unflattener misc_RNA ; dgg
#"pseudoexon" => "pseudogenic_exon",
#"pseudoCDS" => "pseudogenic_exon",
#"pseudomisc_feature" => "pseudogenic_region",
#"pseudointron" => "pseudogenic_region",
### "undefined" => "region",
## this is the most generic form for RNAs;
## we always represent the processed form of
## the transcript
#misc_RNA=>'processed_transcript',
## not sure about this one...
#source=>'contig',
#rep_origin=>'origin_of_replication',
#Protein=>'protein',
#};
}
sub map_types_to_SO{
my ($self,@args) = @_;
push(@args, (-type_map=> $self->FT_SO_map() ) );
return $self->map_types(@args);
}
=head2 get_relationship_type_by_parent_child
Title : get_relationship_type_by_parent_child
Usage : $type = $tm->get_relationship_type_by_parent_child($parent_sf, $child_sf);
Usage : $type = $tm->get_relationship_type_by_parent_child('mRNA', 'protein');
Function: given two features where the parent contains the child,
will determine what the relationship between them in
Example :
Returns :
Args : parent SeqFeature, child SeqFeature OR
( run in 0.513 second using v1.01-cache-2.11-cpan-39bf76dae61 )