MS
view release on metacpan or search on metacpan
lib/MS/PepInfo.pm view on Meta::CPAN
die "Unsupported enzyme specified\n"
if (! defined $re{$_});
while ($seq =~ /$re{$_}/ig) {
push @cut_sites, $+[0];
}
}
push @cut_sites, length($seq);
@cut_sites = sort {$a <=> $b} uniq @cut_sites;
my @pieces;
for my $i (0..$#cut_sites) {
A:
for my $a (1..$missed+1) {
$a = $i + $a;
last A if ($a > $#cut_sites);
push @pieces, substr($seq,$cut_sites[$i],$cut_sites[$a]-$cut_sites[$i]);
}
}
return @pieces;
}
sub calc_gravy {
return sum( map {$kyte_doolittle{$_}} split( '', $_[0]) )/length($_[0]);
}
sub calc_parker {
return sum( map {$parker_guo_hodges{$_}} split( '', $_[0]) )/length($_[0]);
}
sub calc_aliphatic {
my @aa = split '', $_[0];
my $mf_A = grep {$_ eq 'A'} @aa;
my $mf_V = grep {$_ eq 'V'} @aa;
my $mf_IL = grep {$_ =~ /[IL]/} @aa;
return ($mf_A + 2.9*$mf_V + 3.9*$mf_IL) * 100 / scalar(@aa);
}
sub calc_mw {
return sum map {$ave_mass{$_}} (split('',$_[0]),'H2O');
}
sub calc_fragments {
my ($peptide,$mod_ref,$max_charge,$incl_nloss) = @_;
my $PROTON = elem_mass('H');
$max_charge = $max_charge // 1;
my @masses = map {aa_mass($_)} split('',$peptide);
unshift @masses, 0;
push @masses, formula_mass('HOH');
# check that mod array length equals mass array length
my $n_mass = scalar(@masses);
my $n_mods = scalar(@$mod_ref);
die "mod array len mismatch ($n_mass mass vs $n_mods mods)"
if ($n_mass ne $n_mods);
for (0..$#{$mod_ref}) {
my $mod = $mod_ref->[$_];
if (defined $mod) {
if ($mod =~ /^[\d\-\.e]+$/) { # is a float, use directly
$masses[$_] += $mod;
}
else {
my $mass = mod_mass($mod);
die "Mass for $mod not found" if (! defined $mass);
$masses[$_] += $mass;
}
}
}
my @pre_groups = ([@masses]);
my @cuts = ();
# calculate neutral loss peptides if peptide is not non-mobile
my $R_count = $peptide =~ tr/R/R/;
if ($incl_nloss && $R_count < $max_charge) {
while ($peptide =~ /P/g) {
push @cuts, $-[0] if (
$-[0] > 0
&& $-[0] < 6
&& $-[0] < $#masses-1);
}
pos($peptide) = 0;
while ($peptide =~ /H/g) {
push @cuts, $-[0] if (
$-[0] > 1
&& $-[0] < 4
&& $-[0] < $#masses-1);
}
pos($peptide) = 0;
while ($peptide =~ /D/g) {
push @cuts, $-[0]+1 if (
$-[0] < 6
&& $-[0] < $#masses-2);
}
pos($peptide) = 0;
@cuts = uniq @cuts;
for (@cuts) {
my @sub = @masses[$_+1..$#masses];
# put on front so more common frags will overwrite
unshift @pre_groups, [@sub];
}
}
my @series = ();
for my $g (0..$#pre_groups) {
my @m = @{$pre_groups[$g]};
my $tag = '';
if ($g > 0) {
my $lost = scalar(@masses) - scalar(@m) - 1;
$tag = "-$lost<sub>N</sub>";
}
#calculate [M]
my $pre_mz = (sum(@m) + $max_charge*$PROTON)/$max_charge;
push @series, [$pre_mz, '[M]', '', $max_charge, $tag];
for (1..$max_charge) {
#push @series, _series('a', $peptide, \@m, 1);
push @series, _series('b', $peptide, \@m, $_, $tag);
push @series, _series('y', $peptide, \@m, $_, $tag)
if ($g == 0);
}
}
@series = sort {$a->[0] <=> $b->[0]} @series;
#@series = grep {$_->[0] > 100} @series;
return @series;
}
sub _series {
my ($type, $peptide, $mass_ref, $charge, $tag) = @_;
my $PROTON = elem_mass('H');
my @masses = @{$mass_ref};
my @series;
my $term = $type =~ /^[y]$/ ? 'C' : 'N';
# calculate standard series
for my $i (1..$#masses-1) {
#for my $i ($charge..$#masses-1) {
#for my $i ($charge..$#masses-2) {
my ($start,$end)
= $term eq 'N' ? (0, $i) : ($#masses-$i, $#masses);
my $adjust
= $type eq 'a' ? - formula_mass('CO')
: 0;
my $mz = (sum(@masses[$start..$end]) + $adjust
+ $PROTON*$charge)/$charge;
push @series, [$mz,$type,$i,$charge, $tag];
}
# neutral ammonia loss
while ($peptide =~ /[RKQN]/g) {
my $i = $-[0]+1;
$i = $#masses - $i if ($term eq 'C');
next if ($i < $charge);
next if ($i >= $#masses -1);
my ($start,$end)
= $term eq 'N' ? (0, $i) : ($#masses-$i, $#masses);
my $adjust
= $type eq 'a' ? - formula_mass('CO')
: 0;
my $mz = (sum(@masses[$start..$end]) + $adjust - formula_mass('NH3')
+ $PROTON*$charge)/$charge;
push @series, [$mz,$type,$i,$charge,"-NH3$tag"];
}
pos($peptide) = 0;
# neutral water loss
if ($type ne 'a') {
while ($peptide =~ /[STED]/g) {
my $i = $-[0]+1;
$i = $#masses - $i if ($term eq 'C');
next if ($i < $charge);
next if ($i >= $#masses -1);
my ($start,$end)
= $term eq 'N' ? (0, $i) : ($#masses-$i, $#masses);
my $adjust
= $type eq 'a' ? - formula_mass('CO')
: 0;
my $mz = (sum(@masses[$start..$end]) + $adjust - formula_mass('H2O')
+ $PROTON*$charge)/$charge + $adjust;
push @series, [$mz,$type,$i,$charge,"-H2O$tag"];
}
}
return @series;
}
1;
__END__
=head1 NAME
MS::PepUtils - utility functions for proteomics calculations
=head1 SYNOPSIS
use MS::PepUtils qw/calc_mw calc_gravy calc_parker calc_aliphatic calc_fragments digest/;
my $mw = calc_mw( 'ACDEF' );
my $gravy = calc_gravy( 'ACDEF' );
my $hydro = calc_parker( 'ACDEF' );
my $ai = calc_aliphatic( 'ACDEF' );
my @peps = digest(
'ACDEF',
['trypsin'],
0,
);
my @frags = calc_fragments(
'ACDEF',
[0, 0, 0, 0, 0, 0, 0],
3,
1,
);
=head1 DESCRIPTION
B<WARNING:> This module is deprecated. See below.
C<MS::PepUtils> was a set of utility functions for common proteomics
calculations. It's use has been superceded by the L<MS::Peptide> and
L<MS::Protein> classes, which implement many of the same functions here in
both OO and functional interfaces and are generally more useful. This module has been retained for
backward-compatibility only.
=head1 FUNCTIONS
=head2 calc_mw
my $mw = calc_mw( 'ACDEF' );
Returns the average molecular weight of a protein.
=head2 calc_gravy
( run in 0.681 second using v1.01-cache-2.11-cpan-99c4e6809bf )