Chemistry-MolecularMass
view release on metacpan or search on metacpan
MolecularMass/MolecularMass.pm view on Meta::CPAN
package Chemistry::MolecularMass;
=pod
=head1 NAME
Chemistry::MolecularMass - Perl extension for calculating
molecular mass of a chemical compound given its chemical formula.
=head1 VERSION
0.1
=head1 AUTHOR AND COPYRIGHT
Copyright (C) 2000, Maksim A. Khrapov
maksim@recursivemind.com
http://www.recursivemind.com
MolecularMass/MolecularMass.pm view on Meta::CPAN
also allows a programmer to change the default masses of elements
for work with isotopes. It also includes some of the more common
chemical abbreviations as macros and allows to add new macros and
change the values of old macros. A hash of all macros and a hash
of all elements can be returned.
Arbitrary element names can be added, they are expected, however, to
start with an upper case letter followed by zero or more lower case
letters. Macros can be any string of characters. Macros are substituted
only once, so a macro should not evaluate to another macro. Legal
characters in a formula are: A-Za-z0-9<>{}[]()
Spaces are not allowed. Parentheses can be nested arbitrarily deep.
Each MolecularMass object has its own hashes of macros and element
masses, so modifications made to one MolecularMass object do NOT
affect another. The whole thing was programmed with reentrancy
in mind, so it should be thread safe as well.
=cut
use strict;
MolecularMass/MolecularMass.pm view on Meta::CPAN
bless $hr, $class;
return $hr;
}
#####################################################################
sub calc_mass
{
my $self = shift;
my $formula = shift;
if($formula =~ /[^a-zA-Z\d()<>{}\[\]]/)
{
warn "Forbidden chars\n";
return undef; ### Forbidden characters
}
unless(verify_parens($formula))
{
warn "Parentheses don't match\n";
return undef; ### Parentheses do not match
}
$formula =~ tr/<>{}[]/()()()/;
my $exp_formula = $self->expand_macros($formula);
my %symbol_table = parse_formula($exp_formula);
my $weight = $self->sum_el_masses(\%symbol_table);
return $weight;
}
#####################################################################
sub expand_macros
{
my $self = shift;
my $formula = shift;
my $macro;
foreach $macro (keys %{$self->{macros}})
{
my $value = $self->{macros}->{$macro};
eval '$formula =~ s/$macro/($value)/g';
}
return $formula;
}
#####################################################################
sub sum_el_masses
{
my $self = shift;
my $symtab = shift;
my $weight = 0;
MolecularMass/MolecularMass.xs view on Meta::CPAN
int verify_brackets(char *);
int check_brackets(char *, char *);
int is_bracket(char);
int is_left_bracket(char);
char other_bracket(char);
int only_alnum(char *, char *);
int not_even(char *, char *);
char *matching_bracket(char *, char *);
Atom_count *parse_formula_c(char *formula);
void print_atom_count(Atom_count *i);
Atom_count *flatten(Symtab *n);
Atom_count *combine(Atom_count *n);
Atom_count *add_atom(Atom_count *i, Atom_count *j);
void free_symtab(Symtab *n);
int tokenize(Token *t, int *error, char **f);
char *make_str_copy(char *s);
void multiply(Atom_count *i, int n);
Atom_count *new_element(char *element_symbol);
Symtab *new_symtab(void);
/***********************************************/
/* */
/* Functions (for parsing) */
/* */
/***********************************************/
Atom_count *parse_formula_c(char *formula)
{
Token tok;
Token *t = &tok;
int error = 0;
Stack *temp_stack;
Atom_count *ac;
Symtab *st;
Stack *stack = (Stack *) malloc(sizeof(Stack));
stack->first_tab = NULL;
stack->last_tab = NULL;
stack->prev = NULL;
t->type = 4; /* Wrong!!! */
t->element_symbol = NULL;
t->count = 0;
while(tokenize(&tok, &error, &formula))
{
if(t->type == 0) /* left parenthesis */
{
temp_stack = (Stack *) malloc(sizeof(Stack));
temp_stack->first_tab = NULL;
temp_stack->last_tab = NULL;
temp_stack->prev = stack;
stack = temp_stack;
}
else if(t->type == 1) /* element name */
MolecularMass/MolecularMass.xs view on Meta::CPAN
}
if(error) return(NULL);
ac = combine(flatten(stack->first_tab));
free_symtab(stack->first_tab);
free(stack);
return(ac);
}
/************************************************/
int tokenize(Token *t, int *error, char **formula)
{
char *formula_offset = *formula;
char *i = formula_offset;
char *j;
char *k;
if(*i == '(')
{
t->type = 0;
++formula_offset;
*formula = formula_offset;
return(1);
}
else if(*i == ')')
{
t->type = 3;
++formula_offset;
*formula = formula_offset;
return(1);
}
else if(isupper(*i))
{
t->type = 1;
++i;
while(islower(*i))
{
++i;
}
j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
k = j;
while(formula_offset != i)
{
*k = *formula_offset;
++formula_offset;
++k;
}
*k = '\0';
free(t->element_symbol);
t->element_symbol = j;
*formula = formula_offset;
return(1);
}
else if(isdigit(*i))
{
t->type = 2;
++i;
while(isdigit(*i))
{
++i;
}
j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
k = j;
while(formula_offset != i)
{
*k = *formula_offset;
++formula_offset;
++k;
}
*k = '\0';
t->count = atoi(j);
free(j);
*formula = formula_offset;
return(1);
}
else if(*i == '\0')
{
free(t->element_symbol);
return(0);
}
else
{
free(t->element_symbol);
MolecularMass/MolecularMass.xs view on Meta::CPAN
int
verify_parens(s)
char *s;
CODE:
RETVAL = verify_brackets(s);
OUTPUT:
RETVAL
void
parse_formula(s)
char *s;
PREINIT:
Atom_count *i;
PPCODE:
i = parse_formula_c(s);
if(i == NULL)
{
/* do push nothing on the stack --
an empty list is implicitly returned */
}
else
{
while(i != NULL)
{
MolecularMass/README view on Meta::CPAN
NAME
Chemistry::MolecularMass - Perl extension for calculating molecular mass of a chemical compound given its chemical formula.
VERSION
0.1
AUTHOR AND COPYRIGHT
Copyright (C) 2000, Maksim A. Khrapov maksim@recursivemind.com http://www.recursivemind.com
This program is distributed under Perl Artistic Lisence. No warranty. Use at your own risk.
SYNOPSIS
MolecularMass/README view on Meta::CPAN
also allows a programmer to change the default masses of elements
for work with isotopes. It also includes some of the more common
chemical abbreviations as macros and allows to add new macros and
change the values of old macros. A hash of all macros and a hash
of all elements can be returned.
Arbitrary element names can be added, they are expected, however, to
start with an upper case letter followed by zero or more lower case
letters. Macros can be any string of characters. Macros are substituted
only once, so a macro should not evaluate to another macro. Legal
characters in a formula are: A-Za-z0-9<>{}[]()
Spaces are not allowed. Parentheses can be nested arbitrarily deep.
Each MolecularMass object has its own hashes of macros and element
masses, so modifications made to one MolecularMass object do NOT
affect another. The whole thing was programmed with reentrancy
in mind, so it should be thread safe as well.
( run in 0.636 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )