Chemistry-File-PDB
view release on metacpan or search on metacpan
package Chemistry::File::PDB;
our $VERSION = '0.23';
# $Id: PDB.pm,v 1.13 2009/05/10 21:57:32 itubert Exp $
use base qw(Chemistry::File);
use Chemistry::MacroMol;
use Chemistry::Domain;
use Scalar::Util qw(weaken);
use Carp;
use strict;
use warnings;
=head1 NAME
Chemistry::File::PDB - Protein Data Bank file format reader/writer
=head1 SYNOPSIS
use Chemistry::File::PDB;
# read a PDB file
my $macro_mol = Chemistry::MacroMol->read("myfile.pdb");
# write a PDB file
$macro_mol->write("out.pdb");
# read all models in a multi-model file
my @mols = Chemistry::MacroMol->read("models.pdb");
# read one model at a time
my $file = Chemistry::MacroMol->file("models.pdb");
$file->open;
while (my $mol = $file->read_mol($file->fh)) {
# do something with $mol
}
=cut
=head1 DESCRIPTION
This module reads and writes PDB files. The PDB file format is commonly used to
describe proteins, particularly those stored in the Protein Data Bank
(L<http://www.rcsb.org/pdb/>). The current version of this module only reads the
following record types, ignoring everything else:
ATOM
HETATM
ENDMDL
END
This module automatically registers the 'pdb' format with Chemistry::Mol,
so that PDB files may be identified and read by Chemistry::Mol->read(). For
autodetection purpuses, it assumes that files ending in .pdb or having
a line matching /^(ATOM |HETATM)/ are PDB files.
The PDB reader and writer is designed for dealing with Chemistry::MacroMol
objects, but it can also create and use Chemistry::Mol objects by throwing some
information away.
=head2 Properties
When reading and writing files, this module stores or gets some of the
information in the following places:
=over
=item $domain->type
=item $atom->attr("pdb/serial_number")
The serial number for the atom, as given in the PDB file.
=back
If some of this information is not available when writing a PDB file, this
module tries to make it up (by counting the atoms or residues, for example).
The default residue name for writing is UNK (unknown). Atom names are just the
atomic symbols.
=head2 Multi-model files
If a PDB file has multiple models (separated by END or ENDMDL records), each
call to read_mol will return one model.
=head2 Output features
On writing Chemistry::Mol objects, which don't have macromolecule information
and usually don't have atom names, the atom names are made up by concatenating
the atomic symbol with a unique ID (up to 1296 atoms are possible). The ID can
be disabled by setting the option 'noid':
$mol->write("out.pdb", noid => 1);
The molecule's name is written as a HEADER record; A REMARK record is added
listing the version of Chemistry::File::PDB that was used.
=cut
Chemistry::Mol->register_format(pdb => __PACKAGE__);
sub read_mol {
my ($self, $fh, %options) = @_;
return if $fh->eof;
my $domain;
my $mol_class = $options{mol_class} || "Chemistry::MacroMol";
my $mol = $mol_class->new;
my $is_macro = $mol->isa('Chemistry::MacroMol');
$domain = $mol unless $is_macro;
local $_;
my $curr_residue = 0;
while (<$fh>) {
if (/^TER/) {
# TODO read separated molecules?
} elsif (/^(?:HETATM|ATOM)/) {
my ($atom_n, $symbol, $suff, $res_name, $chain_id,
$seq_n, $ins_code, $x, $y, $z
) = unpack "x6A5x1A2A2x1A3x1A1A4A1x3A8A8A8", $_;
#print "S:$symbol; N:$name; x:$x; y:$y; z:$z\n";
$seq_n *= 1;
if (!$domain || $seq_n != $curr_residue) {
# new residue
if ($is_macro) {
$domain = Chemistry::Domain->new(
parent => $mol, name => "$res_name$seq_n",
type => $res_name);
$mol->add_domain($domain);
weaken $domain;
$domain->attr('pdb/sequence_number', $seq_n);
$domain->attr('pdb/chain_id', $chain_id);
$domain->attr('pdb/insertion_code', $ins_code);
}
$curr_residue = $seq_n;
}
$symbol =~ s/ //g;
my $atom_name = $symbol.$suff;
$atom_name =~ s/ //g;
$symbol = ucfirst(lc($symbol));
my $a = $domain->new_atom(
symbol => $symbol,
coords => [$x, $y, $z],
name => $atom_name,
);
$a->attr('pdb/residue_name', "$res_name$seq_n");
$a->attr('pdb/serial_number', $atom_n*1);
#$a->attr('pdb/residue', $domain); # XXX causes memory leak
} elsif (/^END(MDL)?/) {
return $mol;
}
}
return $mol;
}
sub name_is {
my ($class, $fname) = @_;
$fname =~ /\.pdb$/i;
}
sub file_is {
my ($class, $fname) = @_;
return 1 if $fname =~ /\.pdb$/i;
open F, $fname or croak "Could not open file $fname";
while (<F>){
if (/^ATOM / or /^HETATM/) {
close F;
return 1;
}
}
return 0;
}
sub write_string {
my ($class, $mol, %opts) = @_;
my $ret = '';
$ret .= 'TITLE ' . $mol->name . "\n" if defined $mol->name;
$ret .= 'REMARK Generated by ' . __PACKAGE__ . " version $VERSION\n";
if ($mol->isa("Chemistry::MacroMol")) {
my $i = 1;
my $j = 1;
for my $res ($mol->domains) {
my $seq_n = $res->attr("pdb/sequence_number");
$seq_n = $j++ unless defined $seq_n;
my $res_name = substr($res->name, 0, 3) || "UNK";
( run in 0.493 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )