Chemistry-Mol

 view release on metacpan or  search on metacpan

lib/Chemistry/Mol.pm  view on Meta::CPAN

    my @a = $self->atoms;
    @a = sort { $sub->($a,$b) } @a;
    $self->{atoms} = \@a;
    $self;
}

=item $mol->bonds($n1, ...)

Returns the bonds with the given indices, or all by default.
Indices start from one, not from zero.

=cut

sub bonds {
    my $self = shift;
    if (@_) {
        my @bonds = map {$_ - 1} @_;
        @{$self->{bonds}}[@bonds];
    } else {
        @{$self->{bonds}};
    }
}

=item $mol->print(option => value...)

Convert the molecule to a string representation. If no options are given, 
a default YAML-like format is used (this may change in the future). Otherwise,
the format should be specified by using the C<format> option.

=cut

sub print {
    my $self = shift;
    my (%opts) = @_;
    my $ret;
    local $" = ""; #"

    if ($opts{format}) {
        return $self->formats($opts{format})->write_string($self, %opts);
    }
    # else use default printout 
    $ret = <<END;
$self->{id}:
    name: $self->{name}
END
    $ret .= "    attr:\n";
    $ret .= $self->print_attr(2);
    $ret .= "    atoms:\n";
    for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
    $ret .= "    bonds:\n";
    for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
    $ret;
}

=item $s = $mol->sprintf($format)

Format interesting molecular information in a concise way, as specified by
a printf-like format.

    %n - name
    %f - formula 
    %f{formula with format} - (note: right braces within
        the format should be escaped with a backslash)
    %s - SMILES representation
    %S - canonical SMILES representation
    %m - mass
    %8.3m - mass, formatted as %8.3f with core sprintf
    %q - formal charge
    %a - atom count
    %b - bond count
    %t - type
    %i - id
    %% - %

For example, if you want just about everything:

    $mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
        . "mass=%m; charge =%q; type=%t; id=%i");

Note that you have to C<use Chemistry::File::SMILES> before using C<%s> or
C<%S> on C<< $mol->sprintf >>.

=cut

sub sprintf {
    my ($mol, $format) = @_;
    no warnings 'uninitialized'; # don't care if some properties are undefined
    $format ||= "%f";
    $format =~ s/%%/\\%/g;              # escape %% with a \
    $format =~ s/(?<!\\)%f\{(.*?)(?<!\\)\}/$mol->formula($1)/eg; # %f{}
    $format =~ s/(?<!\\)%f/$mol->formula/eg;                    # %f
    $format =~ s/(?<!\\)%s/$mol->print(format=>'smiles')/eg;    # %s
    $format =~ s/(?<!\\)%S/$mol->print(format=>'smiles', unique => 1)/eg;    # %s
    $format =~ s/(?<!\\)%n/$mol->name/eg;                       # %n
    $format =~ s/(?<!\\)%(\d*\.?\d*)m/
        $1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg;        # %m
    $format =~ s/(?<!\\)%q/$mol->charge/eg;                     # %q
    $format =~ s/(?<!\\)%a/$mol->atoms/eg;                      # %a
    $format =~ s/(?<!\\)%b/$mol->bonds/eg;                      # %b
    $format =~ s/(?<!\\)%t/$mol->type/eg;                       # %t
    $format =~ s/(?<!\\)%i/$mol->id/eg;                         # %i
    $format =~ s/\\(.)/$1/g;                             # other \ escapes
    $format;
}

=item $mol->printf($format)

Same as C<< $mol->sprintf >>, but prints to standard output automatically.
Used for quick and dirty molecular information dumping.

=cut

sub printf {
    my ($mol, $format) = @_;
    print $mol->sprintf($format);
}

=item Chemistry::Mol->parse($string, option => value...)

Parse the molecule encoded in C<$string>. The format should be specified
with the the C<format> option; otherwise, it will be guessed.

=cut

sub parse {
    my $self = shift;
    my $s = shift;
    my %opts = (mol_class => $self, @_);

    if ($opts{format}) {
        return $self->formats($opts{format})->parse_string($s, %opts);
    } else {
        croak "Parse does not support autodetection yet.",
            "Please specify a format.";
    }
    return;
}

=item Chemistry::Mol->read($fname, option => value ...)

Read a file and return a list of Mol objects, or croaks if there was a problem.
The type of file will be guessed if not specified via the C<format> option.

Note that only registered file readers will be used. Readers may be registered
using C<register_format()>; modules that include readers (such as
L<Chemistry::File::PDB>) usually register them automatically when they are
loaded.

Automatic decompression of gzipped files is supported if the L<Compress::Zlib>
module is installed. Files ending in .gz are assumed to be compressed;
otherwise it is possible to force decompression by passing the gzip => 1

lib/Chemistry/Mol.pm  view on Meta::CPAN

}

=item Chemistry::Mol->formats

Returns a list of the file formats that have been installed by
register_format()

=cut

sub formats {
    my $self = shift;
    if (@_) {
        my ($type) = @_;
        my $file_class = $FILE_FORMATS{$type};
        unless ($file_class) {
            croak "No class installed for type '$type'";
        }
        return $file_class;
    } else {
        return sort keys %FILE_FORMATS;
    }
}

=item $mol->mass

Return the molar mass. This is just the sum of the masses of the atoms.  See
L<Chemistry::Atom>::mass for details such as the handling of isotopes.

=cut

sub mass {
    my ($self) = @_;
    my $mass = 0;
    for my $atom ($self->atoms) {
        $mass += $atom->mass;
    }
    $mass;
}

=item $mol->charge

Return the charge of the molecule. By default it returns the sum of the formal
charges of the atoms. However, it is possible to set an arbitrary charge by
calling C<< $mol->charge($new_charge) >>

=cut

sub charge {
    my ($self) = shift;
    if (@_) {
        $self->{charge} = shift;
        $self;
    } else {
        return $self->{charge} if defined $self->{charge};
        my $charge = 0;
        $charge += $_->formal_charge || 0 for $self->atoms;
        $charge;
    }
}

=item $mol->formula_hash

Returns a hash reference describing the molecular formula. For methane it would
return { C => 1, H => 4 }.

=cut

sub formula_hash {
    my ($self) = @_;
    my $formula = {};
    for my $atom ($self->atoms) {
        $formula->{$atom->symbol}++;
        $formula->{H} += $atom->hydrogens if $atom->hydrogens;
    }
    $formula;
}

=item $mol->formula($format)

Returns a string with the formula. The format can be specified as a printf-like
string with the control sequences specified in the L<Chemistry::File::Formula>
documentation.

=cut

sub formula {
    my ($self, $format) = @_;
    require Chemistry::File::Formula;
    $self->print(format => "formula", formula_format => $format);
}

=item my $mol2 = $mol->clone;

Makes a copy of a molecule. Note that this is a B<deep> copy; if your molecule
has a pointer to the rest of the universe, the entire universe will be cloned!

By default, clone() uses L<Storable> to copy the Perl data structure. L<Clone>
can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to
C<Clone> (default is C<Storable>). The documentation of Storable claims L<Clone>
is less memory-intensive.

=cut

sub clone {
    my ($self) = @_;
    my $clone;
    if ($clone_backend eq "Storable") {
        $clone = dclone $self;
        $clone->_weaken if Storable->VERSION < 2.14;
    } elsif ($clone_backend eq "Clone") {
        require Clone;
        $clone = Clone::clone $self;
    } else {
        croak "Unknown clone backend '$clone_backend'";
    }
    $clone;
}

=item my $mol2 = $mol->safe_clone;

Like clone, it makes a deep copy of a molecule. The difference is that the copy
is not "exact" in that new molecule and its atoms and bonds get assigned new
IDs. This makes it safe to combine cloned molecules. For example, this is an
error:

    # XXX don't try this at home!
    my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
    # the atoms in $mol1 will clash

But this is ok:

    # the "safe clone" of $mol1 will have new IDs
    my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);

=cut

sub safe_clone {
    my ($mol) = @_;
    my $clone = $mol->clone;
    for ($clone, $clone->atoms, $clone->bonds) {
        $_->id($_->nextID);
    }
    $clone;
} 

sub _weaken {
    my ($self) = @_;
    for ($self->atoms, $self->bonds) {
        $_->_weaken;



( run in 0.486 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )