Chemistry-File-InChI

 view release on metacpan or  search on metacpan

lib/Chemistry/File/InChI.pm  view on Meta::CPAN

=head1 SYNOPSIS

    use Chemistry::File::InChI;

    # read a molecule
    my $mol = Chemistry::Mol->parse('InChI=1S/H2N2/c1-2/h1-2H', format => 'inchi');

=head1 DESCRIPTION

InChI identifier reader written according to L<Richard L. Apodaca's InChI grammar|https://github.com/metamolecular/inchi-grammar>.
Only formula, C</c>, C</h>, C</t>, C</s> and C</q> layers are supported at the moment.
Certain InChI concepts do not map into the concepts of C<Chemistry::Mol>, thus they are stored as molecule and atom attributes.

Count multiplier of a molecule is stored in molecule attribute C<inchi/counts>.
Stereochemistry setting of C<1> or C<2> is stored in molecule attribute C<inchi/stereochemistry>.
Charges are stored in molecule attribute C<inchi/charges>.

Tetrahedral center setting C<+> or C<-> is stored in atom attribute C<inchi/chirality>.

=cut

lib/Chemistry/File/InChI/Parser.yp  view on Meta::CPAN

            {
                my $layer1_id = first { $LAYER_ORDER[$_] eq $_[1] } 0..$#LAYER_ORDER;
                my $layer2_id = first { $LAYER_ORDER[$_] eq $_[2] } 0..$#LAYER_ORDER;
                die "unknown layer $_[2]\n" unless defined $layer2_id;
                die "duplicated layer $_[1]\n" if $layer1_id == $layer2_id;
                die "incorrect layer order, $_[2] must appear before $_[1]\n" if $layer1_id > $layer2_id;
                $_[2];
            }
      ;

layer: formula
            { 'FORMULA' }
     | connections
            { 'CONNECTIONS' }
     | h_atoms
            { 'H_ATOMS' }
     | charge
            { 'CHARGE' }
     | tetrahedral
            { 'TETRAHEDRAL' }
     | stereochemistry
            { 'STEREOCHEMISTRY' }
     ;

formula: formula_first
            { [ $_[1] ] }
       | formula formula_continuation
            { push @{$_[1]}, $_[2] }
       ;

connections: '/' 'c'
                { $_[0]->{USER}{CURSOR}++ }
           | '/' 'c' graph
                { $_[0]->{USER}{CURSOR}++ }
           | connections ';'
                { $_[0]->{USER}{CURSOR}++ }
           | connections ';' graph

lib/Chemistry/File/InChI/Parser.yp  view on Meta::CPAN

    }

    # Prefix
    if( $self->YYData->{INPUT} =~ s/^(InChI=1S?)// ) {
        return ( 'prefix', $1 );
    }

    # Formula parts
    # TODO: Check Hill order, require counts > 1
    if( $self->YYData->{INPUT} =~ s/^([\/\.])([2-9]|[1-9][0-9]+)?(([A-Z][a-z]?\d*)+)// ) {
        my( $sep, $count, $formula ) = ( $1, $2, $3 );
        $count = 1 unless $count;
        my %atom_map;

        while( $formula =~ /([A-Z][a-z]?)(\d*)/g ) {
            my( $element, $count ) = ( $1, $2 );
            next if $element eq 'H'; # H atoms will be added later
            $count = 1 unless $count;
            for (1..$count) {
                my $atom = Chemistry::Atom->new( symbol => $element );
                $self->{USER}{MOL}->add_atom( $atom );
                $atom_map{scalar( keys %atom_map ) + 1} = $atom;
            }
        }

        if( $sep eq '/') {
            $self->{USER}{ATOM_MAPS} = [ \%atom_map ];
            $self->{USER}{MOL}->attr( 'inchi/counts', [ $count ] );
            return ( 'formula_first', $formula );
        } else {
            push @{$self->{USER}{ATOM_MAPS}}, \%atom_map;
            push @{$self->{USER}{MOL}->attr( 'inchi/counts' )}, $count;
            return ( 'formula_continuation', $formula );
        }
    }

    # Reset cursor on 'h', 'q' or 't'
    if( $self->YYData->{INPUT} =~ s/^([hqt])// ) {
        $self->{USER}{CURSOR} = 0;
        return ( $1, $1 );
    }

    # Remove unsupported layers



( run in 0.491 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )