ABI
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
#$Id: ABI.pm,v 1.3.2.4 2006/11/20 03:18:12 malay Exp $
# Perl module to parse ABI chromatogram file
# Malay <malay@bioinformatics.org>
# Copyright (c) 2002, 2003, 2004, 2005, 2006 Malay Kumar Basu
# You may distribute this module under the same terms as perl itself
# Thanks to David H. Klatte for all the hard work!
package ABI;
use IO::File;
use Carp;
use strict;
=head1 NAME
ABI.pm - Perl module to parse chromatogram files generated by
Applied Biosystems (ABI) automated DNA sequencing machine.
Please cite:
ABI.pm - Perl module to parse chromatogram files generated by
Applied Biosystems (ABI) automated DNA sequencing machine.
by Malay K Basu (malay@bioinformatics.org); source code available at:
http://search.cpan.org/~malay
=head1 VERSION
Version 1.0
=cut
our $VERSION = '1.0';
=head1 SYNOPSIS
my $abi = ABI->new(-file=>"mysequence.abi");
my $seq = $abi->get_sequence(); # To get the sequence
my @trace_a = $abi->get_trace("A"); # Get the raw traces for "A"
my @trace_g = $abi->get_trace("G"); # Get the raw traces for "G"
my @base_calls = $abi->get_base_calls(); # Get the base calls
=head1 DESCRIPTION
An ABI chromatogram file is in binary format. It contain several
information only some of which is required for normal use. This
module only gives access to the most used information stored in
ABI file. All the accesses are read only.
If you have edited the file using a trace editor, then you can use the corresponding
method to access the edited sequence and base calls.
=head1 CONSTRUCTOR
=head2 new()
Usage : $abi = ABI->new(-file=>"filename");
$abi = ABI->new("filename"); # same thing
=cut
sub new {
my $class = shift;
my $self = {};
bless $self, ref($class) || $class;
$self->_init(@_);
#print "****", $self->{_mac_header}, "\n";
return $self;
}
sub _init {
my ( $self, @args ) = @_;
my ($file) = $self->_rearrange( ["FILE"], @args );
if ( !defined($file) ) {
croak "Can't open the input file\n";
} else {
$self->set_file_handle($file);
}
$self->{_sequence} = "";
$self->{_sequence_corrected} = "";
$self->{_sample} = "";
$self->{A} = [];
$self->{T} = [];
$self->{G} = [];
$self->{C} = [];
$self->{_basecalls} = [];
$self->{_basecalls_corrected} = [];
$self->{_trace_length} = 0;
$self->{_seq_length} = 0;
$self->{_seq_length_corrected} = 0;
$self->{_abs_index} = 26;
$self->{_index} = undef;
$self->{PLOC1} = undef;
$self->{PLOC} = undef;
$self->{_a_start} = undef;
$self->{_g_start} = undef;
$self->{_c_start} = undef;
$self->{_t_start} = undef;
$self->{DATA9} = undef;
$self->{DATA10} = undef;
$self->{DATA11} = undef;
$self->{DATA12} = undef;
$self->{PBAS1} = undef;
$self->{PBAS2} = undef;
$self->{FWO} = undef;
$self->{_mac_header} = 0;
$self->{_maximum_trace} = 0;
if ( $self->_is_abi() ) {
#print "ABI FILE\n";
$self->_set_index();
$self->_set_base_calls();
$self->_set_corrected_base_calls();
$self->_set_seq();
$self->_set_corrected_seq();
$self->_set_traces();
$self->_set_max_trace();
$self->_set_sample_name();
close( $self->{_fh} );
}
return $self;
}
sub set_file_handle {
my $self = shift;
my $path = shift;
my $fh = IO::File->new();
if ( $fh->open("< $path") ) {
binmode($fh);
$self->{_fh} = $fh;
} else {
croak "Could not open $path in ABITrace::set_file_handle\n";
}
}
sub _rearrange {
my ( $self, $order, @param ) = @_;
return unless @param;
return @param unless ( defined( $param[0] ) && $param[0] =~ /^-/ );
for ( my $i = 0 ; $i < @param ; $i += 2 ) {
$param[$i] =~ s/^\-//;
$param[$i] =~ tr/a-z/A-Z/;
}
# Now we'll convert the @params variable into an associative array.
local ($^W) = 0; # prevent "odd number of elements" warning with -w.
my (%param) = @param;
my (@return_array);
# What we intend to do is loop through the @{$order} variable,
# and for each value, we use that as a key into our associative
# array, pushing the value at that key onto our return array.
my ($key);
foreach $key ( @{$order} ) {
my ($value) = $param{$key};
delete $param{$key};
push( @return_array, $value );
}
# print "\n_rearrange() after processing:\n";
# my $i; for ($i=0;$i<@return_array;$i++) { printf "%20s => %s\n", ${$order}[$i], $return_array[$i]; } <STDIN>;
return (@return_array);
}
sub _is_abi {
my $self = shift;
my $fh = $self->{"_fh"};
my $buf;
seek( $fh, 0, 0 );
read( $fh, $buf, 3 );
#my $a = unpack("n*", $buf);
if ( $buf eq "ABI" ) {
return 1;
} else {
seek( $fh, 128, 0 );
read( $fh, $buf, 3 );
if ( $buf eq "ABI" ) {
$self->_set_mac_header();
return 1;
} else {
return 0;
}
}
}
sub _set_mac_header {
my $self = shift;
$self->{_mac_header} = 128;
}
sub _set_index {
my $self = shift;
my $data_counter = 0;
my $pbas_counter = 0;
my $ploc_counter = 0;
my ( $num_records, $buf );
#print $self->{_fh}, "\n";
#print $self->{_mac_header}, "\n";
seek( $self->{_fh}, $self->{_abs_index} + $self->{_mac_header}, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_index} = unpack( "N", $buf );
#print $self->{_index};
seek( $self->{_fh}, $self->{_abs_index} - 8 + $self->{_mac_header}, 0 );
read( $self->{_fh}, $buf, 4 );
$num_records = unpack( "N", $buf );
for ( my $i = 0 ; $i <= $num_records - 1 ; $i++ ) {
seek( $self->{_fh}, $self->{_index} + ( $i * 28 ), 0 );
read( $self->{_fh}, $buf, 4 );
if ( $buf eq "FWO_" ) {
$self->{FWO} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $buf eq "DATA" ) {
$data_counter++;
if ( $data_counter == 9 ) {
$self->{DATA9} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 10 ) {
$self->{DATA10} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 11 ) {
$self->{DATA11} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 12 ) {
$self->{DATA12} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "PBAS" ) {
$pbas_counter++;
if ( $pbas_counter == 1 ) {
$self->{PBAS1} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $pbas_counter == 2 ) {
$self->{PBAS2} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "PLOC" ) {
$ploc_counter++;
if ( $ploc_counter == 1 ) {
$self->{PLOC1} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $ploc_counter == 2 ) {
$self->{PLOC} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "SMPL" ) {
$self->{SMPL} = $self->{_index} + ( $i * 28 ) + 20;
}
}
seek( $self->{_fh}, $self->{DATA12} - 8, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_trace_length} = unpack( "N", $buf );
seek( $self->{_fh}, $self->{PBAS2} - 4, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_seq_length} = unpack( "N", $buf );
seek( $self->{_fh}, $self->{PBAS1} - 4, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_seq_length_corrected} = unpack( "N", $buf );
$self->{PLOC} = $self->_get_int( $self->{PLOC} ) + $self->{_mac_header};
$self->{PLOC1} = $self->_get_int( $self->{PLOC1} ) + $self->{_mac_header};
$self->{DATA9} = $self->_get_int( $self->{DATA9} ) + $self->{_mac_header};
$self->{DATA10} = $self->_get_int( $self->{DATA10} ) + $self->{_mac_header};
$self->{DATA11} = $self->_get_int( $self->{DATA11} ) + $self->{_mac_header};
$self->{DATA12} = $self->_get_int( $self->{DATA12} ) + $self->{_mac_header};
$self->{PBAS1} = $self->_get_int( $self->{PBAS1} ) + $self->{_mac_header};
$self->{PBAS2} = $self->_get_int( $self->{PBAS2} ) + $self->{_mac_header};
$self->{SMPL} += $self->{_mac_header};
}
sub _set_base_calls {
my $self = shift;
my $buf;
my $length = $self->{_seq_length} * 2;
my $fh = $self->{_fh};
seek( $fh, $self->{PLOC}, 0 );
read( $fh, $buf, $length );
@{ $self->{_basecalls} } = unpack( "n" x $length, $buf );
# print "@{$self->{_basecalls}}" , "\n";
}
sub _set_corrected_base_calls {
my $self = shift;
my $buf;
my $length = $self->{_seq_length_corrected} * 2;
my $fh = $self->{_fh};
seek( $fh, $self->{PLOC1}, 0 );
read( $fh, $buf, $length );
@{ $self->{_basecalls_corrected} } = unpack( "n" x $length, $buf );
}
sub _set_seq {
my $self = shift;
my $buf;
my $length = $self->{_seq_length};
my $fh = $self->{_fh};
seek( $fh, $self->{PBAS2}, 0 );
read( $fh, $buf, $length );
$self->{_sequence} = $buf;
#my @seq = unpack( "C" x $length, $buf);
#print $buf, "\n";
}
sub _set_corrected_seq {
my $self = shift;
my $buf;
my $length = $self->{_seq_length_corrected};
my $fh = $self->{_fh};
seek( $fh, $self->{PBAS1}, 0 );
read( $fh, $buf, $length );
$self->{_sequence_corrected} = $buf;
}
sub _set_traces {
my $self = shift;
my $buf;
my ( @pointers, @A, @G, @C, @T );
my (@datas) =
( $self->{DATA9}, $self->{DATA10}, $self->{DATA11}, $self->{DATA12} );
my $fh = $self->{_fh};
seek( $fh, $self->{FWO}, 0 );
read( $fh, $buf, 4 );
my @order = split( //, $buf );
#print "@order", "\n";
for ( my $i = 0 ; $i < 4 ; $i++ ) {
if ( $order[$i] =~ /A/i ) {
$pointers[0] = $datas[$i];
} elsif ( $order[$i] =~ /C/i ) {
$pointers[1] = $datas[$i];
} elsif ( $order[$i] =~ /G/i ) {
$pointers[2] = $datas[$i];
} elsif ( $order[$i] =~ /T/i ) {
$pointers[3] = $datas[$i];
} else {
croak "Wrong traces\n";
}
}
for ( my $i = 0 ; $i < 4 ; $i++ ) {
seek( $fh, $pointers[$i], 0 );
read( $fh, $buf, $self->{_trace_length} * 2 );
if ( $i == 0 ) {
@A = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 1 ) {
@C = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 2 ) {
@G = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 3 ) {
@T = unpack( "n" x $self->{_trace_length}, $buf );
}
}
@{ $self->{A} } = @A;
@{ $self->{G} } = @G;
@{ $self->{T} } = @T;
@{ $self->{C} } = @C;
}
sub _get_int {
my $self = shift;
my $buf;
my $pos = shift;
my $fh = $self->{_fh};
seek( $fh, $pos, 0 );
read( $fh, $buf, 4 );
return unpack( "N", $buf );
}
sub _set_max_trace {
my $self = shift;
my @A = @{ $self->{A} };
my @T = @{ $self->{T} };
my @G = @{ $self->{G} };
my @C = @{ $self->{C} };
my $max = 0;
for ( my $i = 0 ; $i < @T ; $i++ ) {
if ( $T[$i] > $max ) { $max = $T[$i]; }
if ( $A[$i] > $max ) { $max = $A[$i]; }
if ( $G[$i] > $max ) { $max = $G[$i]; }
if ( $C[$i] > $max ) { $max = $C[$i]; }
}
$self->{_maximum_trace} = $max;
}
sub _set_sample_name {
my $self = shift;
my $buf;
my $fh = $self->{_fh};
seek( $fh, $self->{SMPL}, 0 );
read( $fh, $buf, 1 );
my $length = unpack( "C", $buf );
read( $fh, $buf, $length );
$self->{_sample} = $buf;
}
=head1 METHODS
=head2 get_max_trace()
Title : get_max_trace()
Usage : $max = $abi->get_max_trace();
Function : Returns the maximum trace value of all the traces.
Args : Nothing
Returns : A scalar
=cut
sub get_max_trace {
my $self = shift;
return $self->{_maximum_trace};
}
=head2 get_trace()
Title : get_trace()
Usage : my @a = $abi->get_trace("A");
Function : Returns the raw traces as array.
Args : "A" or "G" or "C" or "T"
Returns : An array
=cut
sub get_trace {
my $self = shift;
my $symbol = shift;
if ( $symbol =~ /A/i ) {
return @{ $self->{A} };
} elsif ( $symbol =~ /G/i ) {
return @{ $self->{G} };
} elsif ( $symbol =~ /C/i ) {
return @{ $self->{C} };
} elsif ( $symbol =~ /T/i ) {
return @{ $self->{T} };
} else {
croak "Illegal symbol\n";
}
}
=head2 get_sequence()
Title : get_sequence()
Usage : my $seq = $abi->get_sequence();
Function : Returns the original unedited sequence as string. If you want to access the edited
sequence use "get_corrected_sequence()" instead.
Args : Nothing
Returns : A scalar
=cut
sub get_sequence {
my $self = shift;
return $self->{_sequence};
}
=head2 get_corrected_sequence()
Title : get_corrected_sequence()
Usage : my $seq = $abi->get_corrected_sequence();
Function : Returns the corrected sequence as string. If you want to access the original
unedited sequence, use "get_sequence()" instead.
Args : Nothing
Returns : A scalar
=cut
sub get_corrected_sequence {
my $self = shift;
return $self->{_sequence_corrected};
}
=head2 get_sequence_length()
Title : get_sequence_length()
Usage : my $seq_length = $abi->get_sequence_length();
Function : Returns the sequence length of the orginal unedited sequence.
Args : Nothing
Returns : A scalar
=cut
sub get_sequence_length {
my $self = shift;
return $self->{_seq_length};
}
=head2 get_corrected_sequence_length()
Title : get_corrected_sequence_length()
Usage : my $seq_length = $abi->get_corrected_sequence_length();
Function : Returns the length of the edited sequence.
Args : Nothing
Returns : A scalar
=cut
sub get_corrected_sequence_length {
my $self = shift;
#print STDERR "**ABI**",$self->{_seq_length_corrected},"\n";
return $self->{_seq_length_corrected};
}
=head2 get_trace_length()
Title : get_trace_length()
Usage : my $trace_length = $abi->get_trace_length();
Function : Returns the trace length
Args : Nothing
Returns : A scalar
=cut
sub get_trace_length {
my $self = shift;
return $self->{_trace_length};
}
=head2 get_base_calls()
Title : get_base_calls()
Usage : my @base_calls = $abi->get_base_calls();
Function : Returns the called bases by the base caller. This method will return the unedited
original basecalls created by the basecaller.
Args : Nothing
Returns : An array
=cut
sub get_base_calls {
my $self = shift;
return @{ $self->{_basecalls} };
}
=head2 get_corrected_base_calls()
Title : get_corrected_base_calls()
Usage : my @base_calls = $abi->get_corrected_base_calls();
Function : If you have edited the trace file you can get the corrected base call
with this method
Args : Nothing
Returns : An array
=cut
sub get_corrected_base_calls {
my $self = shift;
return @{ $self->{_basecalls_corrected} };
}
=head2 get_sample_name()
Title : get_sample_name()
Usage : my $sample = $abi->get_sample_name();
Function : Returns hard coded sample name
Args : Nothing
Returns : A scalar
=cut
sub get_sample_name {
my $self = shift;
return $self->{_sample};
}
=head1 AUTHOR
Malay <malay@bioinformatics.org>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-abi at rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=ABI>.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.
or
You can directly contact me to my email address.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc ABI
You can also look for information at:
=over 4
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/ABI>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/ABI>
=item * RT: CPAN's request tracker
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=ABI>
=item * Search CPAN
L<http://search.cpan.org/dist/ABI>
=back
=head1 COPYRIGHT & LICENSE
Copyright 2002,2006 Malay, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1;
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.370 second using v1.00-cache-2.02-grep-82fe00e-cpan-503542c4f10 )