Chemistry-File-InChI
view release on metacpan or search on metacpan
lib/Chemistry/File/InChI.pm view on Meta::CPAN
=head1 SYNOPSIS
use Chemistry::File::InChI;
# read a molecule
my $mol = Chemistry::Mol->parse('InChI=1S/H2N2/c1-2/h1-2H', format => 'inchi');
=head1 DESCRIPTION
InChI identifier reader written according to L<Richard L. Apodaca's InChI grammar|https://github.com/metamolecular/inchi-grammar>.
Only formula, C</c>, C</h>, C</t>, C</s> and C</q> layers are supported at the moment.
Certain InChI concepts do not map into the concepts of C<Chemistry::Mol>, thus they are stored as molecule and atom attributes.
Count multiplier of a molecule is stored in molecule attribute C<inchi/counts>.
Stereochemistry setting of C<1> or C<2> is stored in molecule attribute C<inchi/stereochemistry>.
Charges are stored in molecule attribute C<inchi/charges>.
Tetrahedral center setting C<+> or C<-> is stored in atom attribute C<inchi/chirality>.
=cut
lib/Chemistry/File/InChI/Parser.yp view on Meta::CPAN
{
my $layer1_id = first { $LAYER_ORDER[$_] eq $_[1] } 0..$#LAYER_ORDER;
my $layer2_id = first { $LAYER_ORDER[$_] eq $_[2] } 0..$#LAYER_ORDER;
die "unknown layer $_[2]\n" unless defined $layer2_id;
die "duplicated layer $_[1]\n" if $layer1_id == $layer2_id;
die "incorrect layer order, $_[2] must appear before $_[1]\n" if $layer1_id > $layer2_id;
$_[2];
}
;
layer: formula
{ 'FORMULA' }
| connections
{ 'CONNECTIONS' }
| h_atoms
{ 'H_ATOMS' }
| charge
{ 'CHARGE' }
| tetrahedral
{ 'TETRAHEDRAL' }
| stereochemistry
{ 'STEREOCHEMISTRY' }
;
formula: formula_first
{ [ $_[1] ] }
| formula formula_continuation
{ push @{$_[1]}, $_[2] }
;
connections: '/' 'c'
{ $_[0]->{USER}{CURSOR}++ }
| '/' 'c' graph
{ $_[0]->{USER}{CURSOR}++ }
| connections ';'
{ $_[0]->{USER}{CURSOR}++ }
| connections ';' graph
lib/Chemistry/File/InChI/Parser.yp view on Meta::CPAN
}
# Prefix
if( $self->YYData->{INPUT} =~ s/^(InChI=1S?)// ) {
return ( 'prefix', $1 );
}
# Formula parts
# TODO: Check Hill order, require counts > 1
if( $self->YYData->{INPUT} =~ s/^([\/\.])([2-9]|[1-9][0-9]+)?(([A-Z][a-z]?\d*)+)// ) {
my( $sep, $count, $formula ) = ( $1, $2, $3 );
$count = 1 unless $count;
my %atom_map;
while( $formula =~ /([A-Z][a-z]?)(\d*)/g ) {
my( $element, $count ) = ( $1, $2 );
next if $element eq 'H'; # H atoms will be added later
$count = 1 unless $count;
for (1..$count) {
my $atom = Chemistry::Atom->new( symbol => $element );
$self->{USER}{MOL}->add_atom( $atom );
$atom_map{scalar( keys %atom_map ) + 1} = $atom;
}
}
if( $sep eq '/') {
$self->{USER}{ATOM_MAPS} = [ \%atom_map ];
$self->{USER}{MOL}->attr( 'inchi/counts', [ $count ] );
return ( 'formula_first', $formula );
} else {
push @{$self->{USER}{ATOM_MAPS}}, \%atom_map;
push @{$self->{USER}{MOL}->attr( 'inchi/counts' )}, $count;
return ( 'formula_continuation', $formula );
}
}
# Reset cursor on 'h', 'q' or 't'
if( $self->YYData->{INPUT} =~ s/^([hqt])// ) {
$self->{USER}{CURSOR} = 0;
return ( $1, $1 );
}
# Remove unsupported layers
( run in 0.491 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )