InSilicoSpectro

 view release on metacpan or  search on metacpan

scripts/importResid.pl  view on Meta::CPAN


use strict;
use Getopt::Long;
use XML::Parser;
use File::Temp qw(tempfile);
use LWP::Simple;
use Carp;
use Pod::Usage;

use InSilicoSpectro::InSilico::ModRes;
use InSilicoSpectro::InSilico::CleavEnzyme;
use InSilicoSpectro;

my ($help, $verbose, $dest, $username);
if (!GetOptions('dest=s'=> \$dest,
		'user=s'=>\$username,
		'help' => \$help,
		'verbose' => \$verbose) || defined($help)){
  pod2usage(-verbose=>2, -exitval=>(not $help), -output=>\*STDOUT);
}

# Opens and parse
my $src = $ARGV[0] || CORE::die "must provide a resid_xml source (url or file)";

my $residFile;
if($src=~/^(ftp|http):/i){
    my (undef, $ftmp)=tempfile(UNLINK=>1, SUFFIX=>".resid.xml");
    print STDERR "downloading $src to $ftmp\n" if $verbose;
    unless(my $rc=is_success(getstore($src, $ftmp))){
      InSilicoSpectro::Utils::io::croakIt "could not download $src: ".status_message($rc);
    }
    $residFile=$ftmp;
}else{
  $residFile=$src;
}

if($username){
  require Phenyx::Config::GlobalParam;
  Phenyx::Config::GlobalParam::readParam();
  require Phenyx::Manage::User;
  my $user=Phenyx::Manage::User->new(name=>$username);
  $dest=$user->getFile("insilicodef.xml");
  #Phenyx::InSilicoSpectro::init($dest);
}

my $parser = new XML::Parser(Style => 'Stream');
if($src eq '-'){
  $parser->parse(\*STDIN);
}else{
  open(F, $residFile) || CORE::die ("cannot open [$residFile]: $!");
  $parser->parse(\*F);
  close(F);
}

InSilicoSpectro::saveInSilicoDef($dest);

# ------------------------------ XML --------------------------

my ($curChar, $eNum);
my ($id, $myId, $correction, $weightType, $isSpFeature, $name, $alternateName, $description);
my ($formula, $avgDelta, $monoDelta, $seqSpecificity, @feature);

sub Text
{
  $curChar .= $_;

} # Text


sub StartTag
{
  my($p, $el) = @_;

  if ($el eq 'Entry'){
    $id = $_{id};
    $eNum++;
    $myId = "$id-$eNum";
    undef(@feature);
    undef $description;
  }
  elsif ($el eq 'CorrectionBlock'){
    $correction = 1;
  }
  elsif ($el eq 'Weight'){
    $weightType = $_{type};
  }
  elsif ($el eq 'Feature'){
    $isSpFeature = $_{type} == 'SWISS-PROT';
  }
  undef($curChar);

} # StartTag


sub EndTag
{
  my($p, $el)= @_;

  if ($el eq 'Name'){
    $name = $curChar;
  }
  elsif ($el eq 'AlternateName'){
    $alternateName = $curChar;
  }
  elsif ($el eq 'Description'){
    $description = $curChar;
  }
  elsif (($el eq 'Formula') && defined($correction)){
    $formula = $curChar;
    $formula=~s/\s//g;
  }
  elsif (($el eq 'Weight') && defined($correction)){
    if ($weightType eq 'chemical'){
      $avgDelta = $curChar;
    }
    else{
      $monoDelta = $curChar;
    }
  }
  elsif ($el eq 'CorrectionBlock'){
    undef($correction);
  }
  elsif (($el eq 'Feature') && $isSpFeature){
    push(@feature, $curChar);
    undef($isSpFeature);
  }
  elsif ($el eq 'SequenceSpec'){
    $seqSpecificity = $curChar;
    #    $seqSpecificity=~s/\W//g;
  }
  elsif ($el eq 'Entry'){
    my $mr =InSilicoSpectro::InSilico::ModRes->new(name=>$name);
    $mr->{description}=$description;
    $mr->{alternateName}=$alternateName;
    $mr->{residId}=$id;
    $seqSpecificity=~s/\s//g;
    if($seqSpecificity=~/,/){
      $seqSpecificity=~s/,/.*?/g;
      $mr->{regexpStr}=$seqSpecificity;
    }elsif(length($seqSpecificity)>1){
      my ($aa, $rem)=split //, $seqSpecificity, 2;
      $mr->{regexpStr}="$aa(?=$rem)";
    }else{
      $mr->{site}{residue}=$seqSpecificity;
    }
    $mr->{delta_monoisotopic}=$monoDelta;
    $mr->{delta_average}=$avgDelta;
    $mr->{sprotFT}="(".(join('|', @feature)).").*";
    $mr->{formula}=$formula;
  }

} # EndTag



( run in 1.082 second using v1.01-cache-2.11-cpan-39bf76dae61 )