Chemistry-Bond-Find

 view release on metacpan or  search on metacpan

Find.pm  view on Meta::CPAN


=cut

# The new algorithm based on a suggestion by BrowserUK
sub find_bonds {
    my ($mol, %opts) = @_;
    %opts = (min_atoms => 20, tolerance => 1.1, %opts,
        cutoffs => {});  # set defaults
    my $margin = guess_margin($mol, \%opts);    
    %opts = (margin => $margin, %opts);
    my $grid = {};
    partition_space($mol, $grid, \%opts);
    find_bonds_grid($mol, $grid, \%opts);
    if ($opts{orders}) {
        assign_bond_orders($mol, %opts);
    }
}

use POSIX 'floor';
my $Y = 1000;
my $X = $Y * $Y;
my $Z = 1;
my $ORIGIN = int(($Y**3 + $Y**2 + $Y)/2);

# used by the new BrowserUK algorithm
sub partition_space {
    my ($mol, $grid, $opts) = @_;
    my $margin = $opts->{margin};
    for my $atom ($mol->atoms) {
        my (@vec) = $atom->coords->array;
        my (@norm_vec) = map { floor($_ / $margin) } @vec;
        my $n = $X * $norm_vec[0] + $Y * $norm_vec[1]
            + $norm_vec[2] + $ORIGIN;
        push @{$grid->{$n}}, $atom;
    }
}

# used by the new BrowserUK algorithm
sub find_bonds_grid {
    my ($mol, $grid, $opts) = @_;
    while (my ($n, $atoms) = each %$grid) {
        #print "Cell $n has ". @$atoms . " atoms\n";
        find_bonds_n2_one_set($mol, $atoms, $opts);
        for my $neigh_n (
            $n+$Z, 
            $n+$Z+$Y, $n+$Z-$Y, $n+$Z+$X, $n+$Z-$X, 
            $n+$Z+$Y+$X, $n+$Z+$Y-$X, $n+$Z-$Y+$X, $n+$Z-$Y-$X,
            $n+$Y, $n+$Y+$X, $n+$Y-$X,
            $n+$X,
            ) {
            if ($grid->{$neigh_n}) {
                find_bonds_n2_two_sets($mol, $atoms, $grid->{$neigh_n}, $opts);
            }
        }
    }
}

# used by both find_bonds variants to figure out the maximum cutoff
sub guess_margin {
    my ($mol, $opts) = @_;
    my $formula = $mol->formula_hash;
    my $max = 0;
    for my $elem (keys %$formula) {
        $max = $Covalent_Radius{$elem} if $Covalent_Radius{$elem} > $max;
    }
    $max *= 2 * $opts->{tolerance};
    #printf "MARGIN guessed at (%.2f)\n", $max;
    $max;
}

# brute-force N^2 algorithm
sub find_bonds_n2_one_set {
    my ($mol, $atoms, $opts) = @_;
    my $bond_class = $opts->{bond_class} || $mol->bond_class;
    for (my $i = 0; $i < @$atoms; ++$i) {
	for (my $j = $i + 1; $j < @$atoms; ++$j) {
	    my ($a1, $a2) = ($atoms->[$i], $atoms->[$j]);
	    if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
		$mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
	    }
	}
    }
}

# brute force N*M algorithm for finding bonds between to sets of atoms
sub find_bonds_n2_two_sets {
    my ($mol, $atoms1, $atoms2, $opts) = @_;
    my $bond_class = $opts->{bond_class} || $mol->bond_class;
    for my $a1 (@$atoms1) {
	for my $a2 (@$atoms2) {
	    if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
		$mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
	    }
	}
    }
}


=item C<assign_bond_orders($mol, %opts)>

Assign the formal bond orders in a molecule. The bonds must already be defined,
either by C<find_bonds> or because the molecule was read from a file that
included bonds but no bond orders. If the bond orders were already defined
(maybe the molecule came from a file that did include bond orders after all),
the original bond orders are erased and the process begins from scratch. Two
different algorithms are available, and may be selected by using the "method"
option:

    assign_bond_orders($mol, method => 'itub');
    assign_bond_orders($mol, method => 'baber');

=over

=item itub

This is the default if no method is specified. Developed from scratch by the
author of this module, this algorithm requires only the connection table
information, and it requires that all hydrogen atoms be explicit. It looks for
an atom with unsatisfied valence, increases a bond order, and then does the
same recursively for the neighbors. If everybody's not happy at the end, it
backtracks and tries another bond. The recursive part does not cover the whole
molecule, but only the contiguous region of "unhappy" atoms next to the
starting atom and their neighbors. This permits separating the molecule into
independent regions, so that if one is solved and there's a problem in another,
we don't have to backtrack to the first one.

The itub algorithm has the following additional options:

=over

=item use_coords

Although the algorithm does not I<require> 3D coordinates, it uses them by
default to improve the initial guesses of which bond orders should be
increased. To avoid using coordinates, add the C<use_coords> option with a
false value:

    assign_bond_orders($mol, use_coords => 0);

The results are the same most of the time, but using good coordinates improves
the results for complicated cases such as fused heteroaromatic systems.

=item scratch

If true, start the bond order assignment from scratch by assuming that all bond
orders are 1. If false, start from the current bond orders and try to fix the
unsatisfied valences. This option is true by default.

=back

=item baber

A bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 (with some interpretation).

This algorithm uses the 3D coordinates along with various cutoffs and
confidence formulas to guess the bond orders. It then tries to resolve
conflicts by looping through the atoms (but is not recursive or backtracking).
It does not require explicit hydrogens (although it's better when they are
available) because it was designed for use with real crystallographic data
which often doesn't have hydrogen atoms.

This method doesn't always give a good answer, especially for conjugated and
aromatic systems. The variation used in this module adds some random numbers to
resolve some ambiguities and break loops, so the results are not even entirely
deterministic (the 'itub' method is deterministic but the result may depend on
the input order of the atoms).

=back

=cut

sub assign_bond_orders  {
    my ($mol, %opts) = @_;
    if ($opts{method} and $opts{method} eq 'baber') {
        assign_bond_orders_baber($mol, %opts);
    } else {
        assign_bond_orders_itub($mol, %opts);
    }
}

####### Bond order assignment algorithm by Ivan Tubert-Brohman

# The "typical" valence that we expect an atom to have satisfied. If not
# given, a value of 1 is assumed.
my %MIN_VALENCES = ( O => 2, C => 4, S => 2, H => 1, N => 3, P => 3, Si => 2,
    F => 1, Cl => 1, Br => 1, I => 1 );

# $ALLOWED_INCREASES{$from}{$to} means that element $from is willing to 
# exceed its minimum valence by having a multiple bond to element $to.
my %ALLOWED_INCREASES = ( 
    Cl => { O => 7 },
    Br => { O => 7 },
    I  => { O => 7 },
    S  => { O => 6, C => 3, S => 4 },
    N  => { O => 5, C => 4, N => 4 },
    Si => { O => 4, C => 4 },
    O  => { C => 3, O => 3 },
    P  => { O => 5, C => 4 },
);

sub assign_bond_orders_itub {
    my ($mol, %opts) = @_;
    %opts = (use_coords => 1, scratch => 1, funny => {}, %opts);
    #my @funny_atoms;

    if ($opts{scratch}) {
        $_->order(1) for $mol->bonds;
    }
    for my $atom ($mol->atoms) {
        if (wants_more_bonds($atom)) {
            my $ret = make_happy($atom, \%opts, []);
            print "Atom $atom made happy? '$ret'\n" if $DEBUG;
            unless ($ret) { # atom is funny (has formal charge or unpaired e-)
                $opts{funny}{$atom} = 1; 
                #push @funny_atoms, $atom;
            }

Find.pm  view on Meta::CPAN

        }
        # couldn't find happiness
    } else {
        my $next = shift @$q;
        unless ($next) { # no one left; everybody is happy!
            print "no more atoms left to check at $atom\n" if $DEBUG;
            return 1;
        }
        return (make_happy($next, $opts, $q)); # happy if next atom is happy
    }
    print "not happy at $atom\n" if $DEBUG;
    0; # not happy
}

sub sorted_neighbors {
    my ($atom, $opts) = @_;
    my $use_coords = $opts->{use_coords};

    print "sorting neighbors\n" if $DEBUG;
    return map {
            $_->{bn}
        } sort { 
            ($b->{wants} cmp $a->{wants}) # those who want go first
            || ($use_coords ? ($a->{len} <=> $b->{len}) : 0);
                # if they both want to the same degree, the shortest
                # bond goes first if we are using coords
        } map { 
            +{ 
                wants => wants_more_bonds($_->{to}), 
                bn => $_, 
                len => !$use_coords || $_->{bond}->length,
            }
        } $atom->bonds_neighbors;
}

sub accepts_more_bonds {
    my ($atom, $to) = @_;
    my ($symbol, $to_symbol) = ($atom->symbol, $to->symbol);

    #my $n_bonds = 0;
    #$n_bonds += $_->order for $atom->bonds;
    my $valence = $atom->valence;

    # not enough bonds even for the minimum valence?
    return 1 if ($valence < ($MIN_VALENCES{$atom->symbol} || 1));

    if ($valence < ($ALLOWED_INCREASES{$symbol}{$to_symbol} || 0)) {
        # make sure we are willing to make multiple bonds with this guy
        return 1;
    } else {
        return 0; # max. valence satisfied
    }
}


############
# Bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
# J. Chem. Inf. Comp. Sci. 1992, 32, 401-406

# this algorithm uses the 3D coordinates along with various cutoffs and
# confidence formulas to assign the bond orders. It does not require all
# explicit hydrogens (although it's better when they are available) because
# it's design for use with real crystallographic data which often doesn't 
# have hydrogen.

my %Valences = (
    C => [4], N => [3,4], O => [2],
    P => [4, 5], S => [2, 4, 6], As => [4, 5], Se => [2, 4, 6],
    Te => [2, 4, 6],
    F => [1], Cl => [1, 3], Br => [1, 3, 5], I => [1, 3, 5, 7],
    H => [1],
);

# note; this table should only have single-letter symbols
my %Bond_Orders = (
    "C C" => { is => 1.49, id => 1.31, it => 1.18, wsd => 1.38, wdt => 1.21 },
    "C N" => { is => 1.42, id => 1.32, it => 1.14, wsd => 1.34, wdt => 1.20 },
    "C O" => { is => 1.41, id => 1.22, wsd => 1.28 },
    "C S" => { is => 1.78, id => 1.68, wsd => 1.70 },
    "N N" => { is => 1.40, id => 1.22, wsd => 1.32 },
    "N O" => { is => 1.39, id => 1.22, wsd => 1.25 },
    "O S" => { is => 1.58, id => 1.45, wsd => 1.54 },
    #"O S" => { is => 1.58, id => 1.45, wsd => 1.51 }, # modified value
    "O P" => { is => 1.60, id => 1.48, wsd => 1.52 },
);

# add keys for reverse bond for ease of lookup
for (keys %Bond_Orders) {
    $Bond_Orders{ scalar reverse $_ } = $Bond_Orders{$_};
}

sub assign_bond_orders_baber {
    my ($mol, %opts) = @_;
    my $max_tries = $opts{max_tries} || 10;

    assign_initial_bonds($mol);
    assign_initial_coordinations($mol);
    my $tries = 0;
    while (resolve_conflicts($mol)) {
        if ($tries++ > $max_tries) {
            print "too many tries\n" if $DEBUG;
            last;
        }
        print "try again\n" if $DEBUG;
    }
    $tries;
}

sub assign_initial_bonds {
    my ($mol) = @_;

    for my $bond ($mol->bonds) {
        #my %bond_has = map { ($_->symbol, 1 ) } $bond->atoms;
        my $symbols = join " ", map { $_->symbol } $bond->atoms;
        my ($order, $confidence);
        my $l_obs = $bond->length;
        if ($symbols =~ /\b(H|F|Cl|Br|I)\b/) {
            $order      = 1;
            $confidence = 10000;
        } elsif ($symbols =~ /\bSi\b/) {
            $order      = 1;



( run in 1.401 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )