InSilicoSpectro

 view release on metacpan or  search on metacpan

scripts/peptSpectra/mascot2pept.pl  view on Meta::CPAN

		'minnumpept=i' => \$minNumPept,
		'minprotscore=f' => \$minProtScore,
		'minlen=i' => \$minLen,
		'instrument=s' => \$instrument,
                'verbose' => \$verbose) || defined($help) || (defined($outputScore) && (($basicScore > $outputScore) || ($outputScore > $minScore))) || (!defined($outputScore) && ($basicScore > $minScore)) || ($minScore > $saveScore))
{
  print STDERR "Usage: xml2pept.pl [options] idJobs
\t-help
\t-h
\t-verbose
\t--fasta=fname
\t--imposedcharge=int
\t--minscore=float     [minimum ion score to count the peptide, default=$minScore]
\t--savescore=float    [save ion score, default=$saveScore]
\t--outputscore=float  [minimum ion score to output the peptide, default=$outputScore]
\t--minprotscore=float [minimum protein score, default=$minProtScore]
\t--minnumpept=int     [minimum number of distinct peptides for one protein, default=$minNumPept]
\t--minlen=int         [minimum peptide length, default=$minLen]
\t--basicscore=float   [minimum ion score to read a peptide from the file, default=$basicScore]
\t--instrument=string  [instrument used, default='$instrument']

Note: It is mandatory that basicscore <= outputscore <= minscore <= savescore\n";
  exit(0);
}

$outputScore = $minScore if (!defined($outputScore));
InSilicoSpectro::init();

my $correctPeptide;
if ($fasta){
  # Loads a series of correct protein sequences as a fasta file
  my $protein;
  open(F, $fasta) || CORE::die("Cannot open [$fasta]: $!");
  while (<F>){
    if (index($_, '>') == 0){
      $correctPeptide .= $protein.'|';
      undef($protein);
    }
    else{
      s/[\n\r]//g;
      $protein .= $_;
    }
  }
  close(F);
}

# Charge format conversion
my %charge = (
	      '1+,2+,and3+' => '1,2,3',
	      '1+,2+and3+' => '1,2,3',
	      '1+' => '1',
	      '2+' => '2',
	      '2+and3+' => '2,3',
	      '2+,and3+' => '2,3',
	      '3+' => '3',
	      '4+' => '4'
	     );

# Modifications conversion (Macot mod_file to InSilicoSpectro insilicodef.xml)
my %modifConv = (
		 'Acetyl (K)' => 'ACET_core',
		 'Acetyl (N-term)' => 'ACET_nterm',
		 'Amide (C-term)' => 'AMID',
		 'Title:Biotin (K)' => 'BIOT',
		 'Title:Biotin (N-term)' => 'BIOT_nterm',
		 'Carbamidomethyl (C)' => 'Cys_CAM',
		 'Carbamyl (K)' => 'CAM_core',
		 'Carbamyl (N-term)' => 'CAM_nterm',
		 'Carboxymethyl (C)' => 'Cys_CM',
		 'Deamidation (NQ)' => 'DEAMID',
		 'Guanidination (K)' => 'Guanidination',
		 'ICAT_light' => 'ICAT_light',
		 'ICAT_heavy' => 'ICAT_heavy',
		 'iTRAQ (K)' => 'iTRAQ_KY',
		 'iTRAQ (N-term)' => 'iTRAQ_nterm',
		 'iTRAQ (Y)' => 'iTRAQ_KY',
		 'N-Acetyl (Protein)' => 'ACET_nterm',
		 'N-Formyl (Protein)' => 'FORM',
		 'O18 (C-term)' => 'O18',
		 'Oxidation (M)' => 'Oxidation_M',
		 'Oxidation (HW)' => 'Oxidation_HW',
		 'Phospho (STY)' => 'PHOS',
		 'Phospho (Y)' => 'PHOS',
		 'Propionamide (C)' => 'Cys_PAM',
		 'Pyro-glu (N-term Q)' => 'PYRR',
		 'S-pyridylethyl (C)' => 'Cys_PE',
		 'Sulfation (Y)' => 'SULF_core',
		 'Sulfation (S)' => 'SULF_core',
		 'Sulfation (T)' => 'SULF_core',
		 'Ubiquination I (K)' => 'Ubiquitin_0mc',
		 'Ubiquitination II (K)' => '"Ubiquitin_1mc'
		);

my $cmdLine = "mascot2pept.pl".(defined($verbose)?' -verbose':'').(defined($imposedCharge)?" --imposedcharge=$imposedCharge":'').(defined($fasta)?" --fasta=$fasta":'')." --basicscore=$basicScore --minscore=$minScore --savescore=$saveScore --minnumpep...
my @time = localtime();
my $date = sprintf("%d-%02d-%02d", 1900+$time[5], 1+$time[4], $time[3]);
my $time = sprintf("%02d:%02d:%02d", $time[2], $time[1], $time[0]);
print <<end_of_xml;
<?xml version="1.0" encoding="ISO-8859-1"?>
  <idi:PeptSpectraIdentifications  xmlns:idi="namespace/PeptSpectra.html">
    <idi:OneSample>
      <idi:header>
        <idi:instrument>$instrument</idi:instrument>
        <idi:spectrumType>msms</idi:spectrumType>
        <idi:date>$date</idi:date>
        <idi:time>$time</idi:time>
        <idi:autoExtraction><![CDATA[$cmdLine]]></idi:autoExtraction>
	<ple:ItemOrder xmlns:ple="namespace/PeakListExport.html">
	  <ple:item type="mass"/>
	  <ple:item type="intensity"/>
	  <ple:item type="charge"/>
	</ple:ItemOrder>
      </idi:header>
    <idi:Identifications>
end_of_xml

# Parses files
use XML::Parser;
our $file;
my (%cmpd, %prot, %query, @fixedModif, @variableModif);
foreach $file (@ARGV){
  print STDERR "Parsing $file\n" if ($verbose);
  undef(%cmpd);
  undef(%prot);
  undef(%query);
  undef(@fixedModif);
  undef(@variableModif);

  if ($file =~ /\.gz$/){
    open(F, "gunzip -c $file |") || print STDERR "Warning, cannot open [$file]: $!";
  }
  else{
    open(F, $file) || print STDERR "Warning, cannot open [$file]: $!";
  }
  mascotParse(\*F);
  close(F);



( run in 1.691 second using v1.01-cache-2.11-cpan-5837b0d9d2c )