Bio-ASN1-EntrezGene

 view release on metacpan or  search on metacpan

examples/parse_entrez_gene_example.pl  view on Meta::CPAN

    {
      if($t->{genomic} eq $genomeacc) # only process when the trans is on same contig as first one
      {
        $genestart = $t->{exons}->[0]->{from} if(!$genestart || $t->{exons}->[0]->{from} < $genestart);
        $geneend = $t->{exons}->[$#{$t->{exons}}]->{to} if($t->{exons}->[$#{$t->{exons}}]->{to} > $geneend);
      }
      foreach my $exon (@{$t->{exons}})
      {
        $trans->{strand} = $exon->{strand} unless $trans->{strand};
        my $start = $exon->{from}; # follow Entrez Gene style, start is always smaller than end
        my $end = $exon->{to};
        my $coordSys = "$t->{genomic}";
      }
      # finally protein
      if($t->{protein})
      {
        foreach my $pacc (keys %{$t->{protein}})
        {
          my $p = $t->{protein}->{$pacc};
          my $deal_with_prot_xref if($p->{acc}); # db should be refseq
          my $add_ccds_xref if($p->{ccds}); # db should be CCDS
          my $protname = $p->{name} if($p->{name});
          if($p->{from} || $p->{to}) # sometimes Entrez Gene forgets to annotate CDS start/end like for gene 574, NP_001178
          {
            my $protcoordSys = "$t->{genomic}";
            my $protstart = $p->{from} if $p->{from};
            my $protend = $p->{to} if $p->{to};
          }
          # domains
          if($p->{dom})
          {
            foreach my $dom (@{$p->{dom}})
            {
              my $desc = $dom->{desc};
              my $score = $dom->{score} if(defined $dom->{score});
              my $loc = { start => $dom->{start}, end=> $dom->{end}, coordSys => "$p->{acc}" };
              my $xref = $dom->{xref};
            }
          }
        }
      }
    }
    # put transcript into variant if annotated, otherwise into transcript
    if($variant)
    {
      # user can decide how to store
    }
    else
    {
      # user can decide how to store
    }
  }
  if($genestart && $geneend)
  {
    # user can decide how to store
  }

  # tracking info about how this gene has changed
  if(safeval($seq, '{track-info}->[0]->{current-id}'))
  {
    my (@ids, $newegid, $newllid);
    foreach my $id (@{$seq->{'track-info'}->[0]->{'current-id'}})
    {
      my $tmpid = safeval($id, '{tag}->[0]->{id}');
      push(@ids, "$id->{db}:$tmpid");
      $newegid = $tmpid if($id->{db} =~ /^GeneID$/i);
      $newllid = $tmpid if($id->{db} =~ /^LocusID$/i);
    }
    my $comment = "Gene moved: current IDs are: " . join(' ; ', @ids);
  }

  return $llgene;
}

# safely assign a value to $data->{$key} ($data must be hash)
sub safeassign
{
  my ($data, $key, $ds, $str) = @_;
  my $tmp = safeval($ds, $str);
  $data->{$key} = $tmp if $tmp;
  return (defined $tmp)? 1 : 0;
}

# safely extracts a value, another choice is to simply use
# eval in-line, if it fails, it fails.  Probably faster, but can't
# give feedback in-line (always has to add a couple lines dealing with
# $@ for error reporting), might still be worth it though because
# of the speed.  User can make his/her own choice here.

sub safeval
{
  my ($ds, $str) = @_; # data structure and string (we need $ds passed in because we use strict)
  my @items = split('->', $str);
  foreach (@items)
  {
    my $tmp;
    if(($tmp) = /\[(\d+)\]/)
    {
      return undef unless(ref($ds) eq 'ARRAY' && @$ds > $tmp);
      $ds = $ds->[$tmp];
    }
    elsif(($tmp) = /^{(.*?)}$/)
    {
      return undef unless(ref($ds) eq 'HASH' && $ds->{$tmp}); # this is not ideal (since one might want to return '' instead of undef when this hash value is defined as ''), but correct for our situations
      $ds = $ds->{$tmp};
    }
    else
    {
      die "wrong syntax for string:$str\n";
    }
  }
  return $ds;
}

sub addxrefs
{
  # left for user implementation since it's project-specific
}

sub addxref
{
  # left for user implementation since it's project-specific
}

# used for making xrefs listed under comments
sub makexref



( run in 1.294 second using v1.01-cache-2.11-cpan-5735350b133 )