Bio-ASN1-EntrezGene
view release on metacpan or search on metacpan
examples/parse_entrez_gene_example.pl view on Meta::CPAN
{
if($t->{genomic} eq $genomeacc) # only process when the trans is on same contig as first one
{
$genestart = $t->{exons}->[0]->{from} if(!$genestart || $t->{exons}->[0]->{from} < $genestart);
$geneend = $t->{exons}->[$#{$t->{exons}}]->{to} if($t->{exons}->[$#{$t->{exons}}]->{to} > $geneend);
}
foreach my $exon (@{$t->{exons}})
{
$trans->{strand} = $exon->{strand} unless $trans->{strand};
my $start = $exon->{from}; # follow Entrez Gene style, start is always smaller than end
my $end = $exon->{to};
my $coordSys = "$t->{genomic}";
}
# finally protein
if($t->{protein})
{
foreach my $pacc (keys %{$t->{protein}})
{
my $p = $t->{protein}->{$pacc};
my $deal_with_prot_xref if($p->{acc}); # db should be refseq
my $add_ccds_xref if($p->{ccds}); # db should be CCDS
my $protname = $p->{name} if($p->{name});
if($p->{from} || $p->{to}) # sometimes Entrez Gene forgets to annotate CDS start/end like for gene 574, NP_001178
{
my $protcoordSys = "$t->{genomic}";
my $protstart = $p->{from} if $p->{from};
my $protend = $p->{to} if $p->{to};
}
# domains
if($p->{dom})
{
foreach my $dom (@{$p->{dom}})
{
my $desc = $dom->{desc};
my $score = $dom->{score} if(defined $dom->{score});
my $loc = { start => $dom->{start}, end=> $dom->{end}, coordSys => "$p->{acc}" };
my $xref = $dom->{xref};
}
}
}
}
}
# put transcript into variant if annotated, otherwise into transcript
if($variant)
{
# user can decide how to store
}
else
{
# user can decide how to store
}
}
if($genestart && $geneend)
{
# user can decide how to store
}
# tracking info about how this gene has changed
if(safeval($seq, '{track-info}->[0]->{current-id}'))
{
my (@ids, $newegid, $newllid);
foreach my $id (@{$seq->{'track-info'}->[0]->{'current-id'}})
{
my $tmpid = safeval($id, '{tag}->[0]->{id}');
push(@ids, "$id->{db}:$tmpid");
$newegid = $tmpid if($id->{db} =~ /^GeneID$/i);
$newllid = $tmpid if($id->{db} =~ /^LocusID$/i);
}
my $comment = "Gene moved: current IDs are: " . join(' ; ', @ids);
}
return $llgene;
}
# safely assign a value to $data->{$key} ($data must be hash)
sub safeassign
{
my ($data, $key, $ds, $str) = @_;
my $tmp = safeval($ds, $str);
$data->{$key} = $tmp if $tmp;
return (defined $tmp)? 1 : 0;
}
# safely extracts a value, another choice is to simply use
# eval in-line, if it fails, it fails. Probably faster, but can't
# give feedback in-line (always has to add a couple lines dealing with
# $@ for error reporting), might still be worth it though because
# of the speed. User can make his/her own choice here.
sub safeval
{
my ($ds, $str) = @_; # data structure and string (we need $ds passed in because we use strict)
my @items = split('->', $str);
foreach (@items)
{
my $tmp;
if(($tmp) = /\[(\d+)\]/)
{
return undef unless(ref($ds) eq 'ARRAY' && @$ds > $tmp);
$ds = $ds->[$tmp];
}
elsif(($tmp) = /^{(.*?)}$/)
{
return undef unless(ref($ds) eq 'HASH' && $ds->{$tmp}); # this is not ideal (since one might want to return '' instead of undef when this hash value is defined as ''), but correct for our situations
$ds = $ds->{$tmp};
}
else
{
die "wrong syntax for string:$str\n";
}
}
return $ds;
}
sub addxrefs
{
# left for user implementation since it's project-specific
}
sub addxref
{
# left for user implementation since it's project-specific
}
# used for making xrefs listed under comments
sub makexref
( run in 1.294 second using v1.01-cache-2.11-cpan-5735350b133 )