Chemistry-File-InChI
view release on metacpan or search on metacpan
lib/Chemistry/File/InChI/Parser.yp view on Meta::CPAN
# Footer section
sub parse
{
my( $self, $string ) = @_;
$self->YYData->{INPUT} = $string;
$self->{USER}{MOL} = Chemistry::Mol->new;
$self->{USER}{CURSOR} = 0;
$self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yydebug => 0 );
return $self->{USER}{MOL};
}
sub _Lexer
{
my( $self ) = @_;
# If the line is empty and the input is originating from the file,
# another line is read.
if( !$self->YYData->{INPUT} && $self->{USER}{FILEIN} ) {
my $filein = $self->{USER}{FILEIN};
$self->YYData->{INPUT} = <$filein>;
$self->{USER}{CHARNO} = 0;
}
# Prefix
if( $self->YYData->{INPUT} =~ s/^(InChI=1S?)// ) {
return ( 'prefix', $1 );
}
# Formula parts
# TODO: Check Hill order, require counts > 1
if( $self->YYData->{INPUT} =~ s/^([\/\.])([2-9]|[1-9][0-9]+)?(([A-Z][a-z]?\d*)+)// ) {
my( $sep, $count, $formula ) = ( $1, $2, $3 );
$count = 1 unless $count;
my %atom_map;
while( $formula =~ /([A-Z][a-z]?)(\d*)/g ) {
my( $element, $count ) = ( $1, $2 );
next if $element eq 'H'; # H atoms will be added later
$count = 1 unless $count;
for (1..$count) {
my $atom = Chemistry::Atom->new( symbol => $element );
$self->{USER}{MOL}->add_atom( $atom );
$atom_map{scalar( keys %atom_map ) + 1} = $atom;
}
}
if( $sep eq '/') {
$self->{USER}{ATOM_MAPS} = [ \%atom_map ];
$self->{USER}{MOL}->attr( 'inchi/counts', [ $count ] );
return ( 'formula_first', $formula );
} else {
push @{$self->{USER}{ATOM_MAPS}}, \%atom_map;
push @{$self->{USER}{MOL}->attr( 'inchi/counts' )}, $count;
return ( 'formula_continuation', $formula );
}
}
# Reset cursor on 'h', 'q' or 't'
if( $self->YYData->{INPUT} =~ s/^([hqt])// ) {
$self->{USER}{CURSOR} = 0;
return ( $1, $1 );
}
# Remove unsupported layers
$self->YYData->{INPUT} =~ s/^(\/[pbmsifo][^\/]*)+//;
# Any other character
if( $self->YYData->{INPUT} =~ s/^(.)// ) {
return ( $1, $1 );
}
return ( '', '' );
}
sub _Error
{
my( $self ) = @_;
die 'ERROR: ', $self->YYData->{INPUT}, "\n";
}
sub _add_bonds
{
my( $self, $a, $b ) = @_;
my @bonds = ref $b eq 'ARRAY' ? @$b : $b;
for (@bonds) {
$self->{USER}{MOL}->new_bond( atoms => [ $self->_get_atom( $a ),
$self->_get_atom( $_ ) ] );
}
}
sub _add_hydrogens
{
my( $self, $atoms, $count ) = @_;
my $atom_map = $self->{USER}{ATOM_MAPS}[$self->{USER}{CURSOR}];
my @atoms = map { $atom_map->{$_} }
ref $atoms ? @$atoms : ( $atoms );
$count = 1 unless $count;
for my $atom (@atoms) {
for (1..$count) {
my $H = Chemistry::Atom->new( symbol => 'H' );
$self->{USER}{MOL}->add_atom( $H );
$self->{USER}{MOL}->new_bond( atoms => [ $atom, $H ] );
}
}
}
sub _get_atom
{
my( $self, $atom ) = @_;
my $atom_map = $self->{USER}{ATOM_MAPS}[$self->{USER}{CURSOR}];
if( !exists $atom_map->{$atom} ) {
# If an atom with given index does not exist, this is probably a hydrogen.
# FIXME: Check if there are unused hydrogen atoms.
( run in 1.073 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )