Chemistry-MolecularMass

 view release on metacpan or  search on metacpan

MolecularMass/MolecularMass.pm  view on Meta::CPAN

package Chemistry::MolecularMass;

=pod

=head1 NAME

Chemistry::MolecularMass - Perl extension for calculating
molecular mass of a chemical compound given its chemical formula.

=head1 VERSION

0.1

=head1 AUTHOR AND COPYRIGHT

Copyright (C) 2000, Maksim A. Khrapov
maksim@recursivemind.com
http://www.recursivemind.com

MolecularMass/MolecularMass.pm  view on Meta::CPAN

   also allows a programmer to change the default masses of elements
   for work with isotopes. It also includes some of the more common
   chemical abbreviations as macros and allows to add new macros and
   change the values of old macros. A hash of all macros and a hash
   of all elements can be returned.

   Arbitrary element names can be added, they are expected, however, to
   start with an upper case letter followed by zero or more lower case
   letters. Macros can be any string of characters. Macros are substituted
   only once, so a macro should not evaluate to another macro. Legal
   characters in a formula are: A-Za-z0-9<>{}[]()
   Spaces are not allowed. Parentheses can be nested arbitrarily deep.

   Each MolecularMass object has its own hashes of macros and element 
   masses, so modifications made to one MolecularMass object do NOT
   affect another. The whole thing was programmed with reentrancy
   in mind, so it should be thread safe as well.

=cut

use strict;

MolecularMass/MolecularMass.pm  view on Meta::CPAN


   bless $hr, $class;
   return $hr;
}

#####################################################################

sub calc_mass
{
   my $self = shift;
   my $formula = shift;
   
   if($formula =~ /[^a-zA-Z\d()<>{}\[\]]/)
   {
      warn "Forbidden chars\n";
      return undef; ### Forbidden characters
   }

   unless(verify_parens($formula))
   {
      warn "Parentheses don't match\n";
      return undef; ### Parentheses do not match
   }

   $formula =~ tr/<>{}[]/()()()/;
   my $exp_formula = $self->expand_macros($formula);
   my %symbol_table = parse_formula($exp_formula);
   my $weight = $self->sum_el_masses(\%symbol_table);

   return $weight;
}

#####################################################################

sub expand_macros
{
   my $self = shift;
   my $formula = shift;

   my $macro;
   foreach $macro (keys %{$self->{macros}})
   {
      my $value = $self->{macros}->{$macro};
      eval '$formula =~ s/$macro/($value)/g';
   }
   return $formula;
}

#####################################################################

sub sum_el_masses
{
   my $self = shift;
   my $symtab = shift;

   my $weight = 0;

MolecularMass/MolecularMass.xs  view on Meta::CPAN


int verify_brackets(char *);
int check_brackets(char *, char *);
int is_bracket(char);
int is_left_bracket(char);
char other_bracket(char);
int only_alnum(char *, char *);
int not_even(char *, char *);
char *matching_bracket(char *, char *);

Atom_count *parse_formula_c(char *formula);
void print_atom_count(Atom_count *i);
Atom_count *flatten(Symtab *n);
Atom_count *combine(Atom_count *n);
Atom_count *add_atom(Atom_count *i, Atom_count *j);
void free_symtab(Symtab *n);
int tokenize(Token *t, int *error, char **f);
char *make_str_copy(char *s);
void multiply(Atom_count *i, int n);
Atom_count *new_element(char *element_symbol);
Symtab *new_symtab(void);

/***********************************************/
/*                                             */
/*      Functions (for parsing)                */
/*                                             */
/***********************************************/

Atom_count *parse_formula_c(char *formula)
{
   Token tok;
   Token *t = &tok;
   int error = 0;
   Stack *temp_stack;
   Atom_count *ac;
   Symtab *st;
   Stack *stack = (Stack *) malloc(sizeof(Stack));
   stack->first_tab = NULL;
   stack->last_tab = NULL;
   stack->prev = NULL;

   t->type = 4; /* Wrong!!! */
   t->element_symbol = NULL;
   t->count = 0;

   while(tokenize(&tok, &error, &formula))
   {
      if(t->type == 0) /* left parenthesis */
      {
         temp_stack = (Stack *) malloc(sizeof(Stack));
	 temp_stack->first_tab = NULL;
	 temp_stack->last_tab = NULL;
	 temp_stack->prev = stack;
	 stack = temp_stack;
      }
      else if(t->type == 1) /* element name */

MolecularMass/MolecularMass.xs  view on Meta::CPAN

   }
   if(error) return(NULL);
   ac = combine(flatten(stack->first_tab));
   free_symtab(stack->first_tab);
   free(stack);
   return(ac);
}

/************************************************/

int tokenize(Token *t, int *error, char **formula)
{
   char *formula_offset = *formula;
   char *i = formula_offset;
   char *j;
   char *k;

   if(*i == '(')
   {
      t->type = 0;
      ++formula_offset;
      *formula = formula_offset;
      return(1);
   }
   else if(*i == ')')
   {
      t->type = 3;
      ++formula_offset;
      *formula = formula_offset;
      return(1);
   }
   else if(isupper(*i))
   {
      t->type = 1;
      ++i;
      while(islower(*i))
      {
         ++i;
      }
      j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
      k = j;
      while(formula_offset != i)
      {
         *k = *formula_offset;
	 ++formula_offset;
	 ++k;
      }
      *k = '\0';
      free(t->element_symbol);
      t->element_symbol = j;
      *formula = formula_offset;
      return(1);
   }
   else if(isdigit(*i))
   {
      t->type = 2;
      ++i;
      while(isdigit(*i))
      {
         ++i;
      }
      j = (char *) malloc(sizeof(char) * (i - formula_offset + 1));
      k = j;
      while(formula_offset != i)
      {
         *k = *formula_offset;
	 ++formula_offset;
	 ++k;
      }
      *k = '\0';
      t->count = atoi(j);
      free(j);
      *formula = formula_offset;
      return(1);
   }
   else if(*i == '\0')
   {
      free(t->element_symbol);
      return(0);
   }
   else
   {
      free(t->element_symbol);

MolecularMass/MolecularMass.xs  view on Meta::CPAN


int
verify_parens(s)
	char *s;
	CODE:
		RETVAL = verify_brackets(s);
	OUTPUT:
		RETVAL

void
parse_formula(s)
	char *s;
	PREINIT:
		Atom_count *i;
	PPCODE:
		i = parse_formula_c(s);

		if(i == NULL)
		{
		   /* do push nothing on the stack --
		      an empty list is implicitly returned */
		}
		else
		{
		   while(i != NULL)
		   {

MolecularMass/README  view on Meta::CPAN

NAME
    Chemistry::MolecularMass - Perl extension for calculating molecular mass of a chemical compound given its chemical formula.

VERSION
    0.1

AUTHOR AND COPYRIGHT
    Copyright (C) 2000, Maksim A. Khrapov maksim@recursivemind.com http://www.recursivemind.com

    This program is distributed under Perl Artistic Lisence. No warranty. Use at your own risk.

SYNOPSIS

MolecularMass/README  view on Meta::CPAN

       also allows a programmer to change the default masses of elements
       for work with isotopes. It also includes some of the more common
       chemical abbreviations as macros and allows to add new macros and
       change the values of old macros. A hash of all macros and a hash
       of all elements can be returned.

       Arbitrary element names can be added, they are expected, however, to
       start with an upper case letter followed by zero or more lower case
       letters. Macros can be any string of characters. Macros are substituted
       only once, so a macro should not evaluate to another macro. Legal
       characters in a formula are: A-Za-z0-9<>{}[]()
       Spaces are not allowed. Parentheses can be nested arbitrarily deep.

       Each MolecularMass object has its own hashes of macros and element 
       masses, so modifications made to one MolecularMass object do NOT
       affect another. The whole thing was programmed with reentrancy
       in mind, so it should be thread safe as well.



( run in 0.821 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )