ABI
view release on metacpan or search on metacpan
=head1 DESCRIPTION
An ABI chromatogram file is in binary format. It contain several
information only some of which is required for normal use. This
module only gives access to the most used information stored in
ABI file. All the accesses are read only.
If you have edited the file using a trace editor, then you can use the corresponding
method to access the edited sequence and base calls.
=head1 CONSTRUCTOR
=head2 new()
Usage : $abi = ABI->new(-file=>"filename");
$abi = ABI->new("filename"); # same thing
=cut
sub new {
my $class = shift;
my $self = {};
bless $self, ref($class) || $class;
$self->_init(@_);
#print "****", $self->{_mac_header}, "\n";
return $self;
}
sub _init {
my ( $self, @args ) = @_;
my ($file) = $self->_rearrange( ["FILE"], @args );
if ( !defined($file) ) {
croak "Can't open the input file\n";
} else {
$self->set_file_handle($file);
}
$self->{_sequence} = "";
$self->{_sequence_corrected} = "";
$self->{_sample} = "";
$self->{A} = [];
$self->{T} = [];
$self->{G} = [];
$self->{C} = [];
$self->{_basecalls} = [];
$self->{_basecalls_corrected} = [];
$self->{_trace_length} = 0;
$self->{_seq_length} = 0;
$self->{_seq_length_corrected} = 0;
$self->{_abs_index} = 26;
$self->{_index} = undef;
$self->{PLOC1} = undef;
$self->{PLOC} = undef;
$self->{_a_start} = undef;
$self->{_g_start} = undef;
$self->{_c_start} = undef;
$self->{_t_start} = undef;
$self->{DATA9} = undef;
$self->{DATA10} = undef;
$self->{DATA11} = undef;
$self->{DATA12} = undef;
$self->{PBAS1} = undef;
$self->{PBAS2} = undef;
$self->{FWO} = undef;
$self->{_mac_header} = 0;
$self->{_maximum_trace} = 0;
if ( $self->_is_abi() ) {
#print "ABI FILE\n";
$self->_set_index();
$self->_set_base_calls();
$self->_set_corrected_base_calls();
$self->_set_seq();
$self->_set_corrected_seq();
$self->_set_traces();
$self->_set_max_trace();
$self->_set_sample_name();
close( $self->{_fh} );
}
return $self;
}
sub set_file_handle {
my $self = shift;
my $path = shift;
my $fh = IO::File->new();
if ( $fh->open("< $path") ) {
binmode($fh);
$self->{_fh} = $fh;
} else {
croak "Could not open $path in ABITrace::set_file_handle\n";
}
}
sub _rearrange {
my ( $self, $order, @param ) = @_;
return unless @param;
return @param unless ( defined( $param[0] ) && $param[0] =~ /^-/ );
for ( my $i = 0 ; $i < @param ; $i += 2 ) {
$param[$i] =~ s/^\-//;
$param[$i] =~ tr/a-z/A-Z/;
}
# Now we'll convert the @params variable into an associative array.
local ($^W) = 0; # prevent "odd number of elements" warning with -w.
my (%param) = @param;
my (@return_array);
# What we intend to do is loop through the @{$order} variable,
# and for each value, we use that as a key into our associative
# array, pushing the value at that key onto our return array.
my ($key);
foreach $key ( @{$order} ) {
my ($value) = $param{$key};
delete $param{$key};
push( @return_array, $value );
}
# print "\n_rearrange() after processing:\n";
# my $i; for ($i=0;$i<@return_array;$i++) { printf "%20s => %s\n", ${$order}[$i], $return_array[$i]; } <STDIN>;
return (@return_array);
}
sub _is_abi {
my $self = shift;
my $fh = $self->{"_fh"};
my $buf;
seek( $fh, 0, 0 );
read( $fh, $buf, 3 );
#my $a = unpack("n*", $buf);
if ( $buf eq "ABI" ) {
return 1;
} else {
seek( $fh, 128, 0 );
read( $fh, $buf, 3 );
if ( $buf eq "ABI" ) {
$self->_set_mac_header();
return 1;
} else {
return 0;
}
}
}
sub _set_mac_header {
my $self = shift;
$self->{_mac_header} = 128;
}
sub _set_index {
my $self = shift;
my $data_counter = 0;
my $pbas_counter = 0;
my $ploc_counter = 0;
my ( $num_records, $buf );
#print $self->{_fh}, "\n";
#print $self->{_mac_header}, "\n";
seek( $self->{_fh}, $self->{_abs_index} + $self->{_mac_header}, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_index} = unpack( "N", $buf );
#print $self->{_index};
seek( $self->{_fh}, $self->{_abs_index} - 8 + $self->{_mac_header}, 0 );
read( $self->{_fh}, $buf, 4 );
$num_records = unpack( "N", $buf );
for ( my $i = 0 ; $i <= $num_records - 1 ; $i++ ) {
seek( $self->{_fh}, $self->{_index} + ( $i * 28 ), 0 );
read( $self->{_fh}, $buf, 4 );
if ( $buf eq "FWO_" ) {
$self->{FWO} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $buf eq "DATA" ) {
$data_counter++;
if ( $data_counter == 9 ) {
$self->{DATA9} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 10 ) {
$self->{DATA10} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 11 ) {
$self->{DATA11} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $data_counter == 12 ) {
$self->{DATA12} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "PBAS" ) {
$pbas_counter++;
if ( $pbas_counter == 1 ) {
$self->{PBAS1} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $pbas_counter == 2 ) {
$self->{PBAS2} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "PLOC" ) {
$ploc_counter++;
if ( $ploc_counter == 1 ) {
$self->{PLOC1} = $self->{_index} + ( $i * 28 ) + 20;
}
if ( $ploc_counter == 2 ) {
$self->{PLOC} = $self->{_index} + ( $i * 28 ) + 20;
}
}
if ( $buf eq "SMPL" ) {
$self->{SMPL} = $self->{_index} + ( $i * 28 ) + 20;
}
}
seek( $self->{_fh}, $self->{DATA12} - 8, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_trace_length} = unpack( "N", $buf );
seek( $self->{_fh}, $self->{PBAS2} - 4, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_seq_length} = unpack( "N", $buf );
seek( $self->{_fh}, $self->{PBAS1} - 4, 0 );
read( $self->{_fh}, $buf, 4 );
$self->{_seq_length_corrected} = unpack( "N", $buf );
$self->{PLOC} = $self->_get_int( $self->{PLOC} ) + $self->{_mac_header};
$self->{PLOC1} = $self->_get_int( $self->{PLOC1} ) + $self->{_mac_header};
$self->{DATA9} = $self->_get_int( $self->{DATA9} ) + $self->{_mac_header};
$self->{DATA10} = $self->_get_int( $self->{DATA10} ) + $self->{_mac_header};
$self->{DATA11} = $self->_get_int( $self->{DATA11} ) + $self->{_mac_header};
$self->{DATA12} = $self->_get_int( $self->{DATA12} ) + $self->{_mac_header};
$self->{PBAS1} = $self->_get_int( $self->{PBAS1} ) + $self->{_mac_header};
$self->{PBAS2} = $self->_get_int( $self->{PBAS2} ) + $self->{_mac_header};
$self->{SMPL} += $self->{_mac_header};
}
sub _set_base_calls {
my $self = shift;
my $buf;
my $length = $self->{_seq_length} * 2;
my $fh = $self->{_fh};
seek( $fh, $self->{PLOC}, 0 );
read( $fh, $buf, $length );
@{ $self->{_basecalls} } = unpack( "n" x $length, $buf );
# print "@{$self->{_basecalls}}" , "\n";
}
sub _set_corrected_base_calls {
my $self = shift;
my $buf;
my $length = $self->{_seq_length_corrected} * 2;
my $fh = $self->{_fh};
seek( $fh, $self->{PLOC1}, 0 );
read( $fh, $buf, $length );
@{ $self->{_basecalls_corrected} } = unpack( "n" x $length, $buf );
}
sub _set_seq {
my $self = shift;
my $buf;
my $length = $self->{_seq_length};
my $fh = $self->{_fh};
seek( $fh, $self->{PBAS2}, 0 );
read( $fh, $buf, $length );
$self->{_sequence} = $buf;
#my @seq = unpack( "C" x $length, $buf);
#print $buf, "\n";
}
sub _set_corrected_seq {
my $self = shift;
my $buf;
my $length = $self->{_seq_length_corrected};
my $fh = $self->{_fh};
seek( $fh, $self->{PBAS1}, 0 );
read( $fh, $buf, $length );
$self->{_sequence_corrected} = $buf;
}
sub _set_traces {
my $self = shift;
my $buf;
my ( @pointers, @A, @G, @C, @T );
my (@datas) =
( $self->{DATA9}, $self->{DATA10}, $self->{DATA11}, $self->{DATA12} );
my $fh = $self->{_fh};
seek( $fh, $self->{FWO}, 0 );
read( $fh, $buf, 4 );
my @order = split( //, $buf );
#print "@order", "\n";
for ( my $i = 0 ; $i < 4 ; $i++ ) {
if ( $order[$i] =~ /A/i ) {
$pointers[0] = $datas[$i];
} elsif ( $order[$i] =~ /C/i ) {
$pointers[1] = $datas[$i];
} elsif ( $order[$i] =~ /G/i ) {
$pointers[2] = $datas[$i];
} elsif ( $order[$i] =~ /T/i ) {
$pointers[3] = $datas[$i];
} else {
croak "Wrong traces\n";
}
}
for ( my $i = 0 ; $i < 4 ; $i++ ) {
seek( $fh, $pointers[$i], 0 );
read( $fh, $buf, $self->{_trace_length} * 2 );
if ( $i == 0 ) {
@A = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 1 ) {
@C = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 2 ) {
@G = unpack( "n" x $self->{_trace_length}, $buf );
}
if ( $i == 3 ) {
@T = unpack( "n" x $self->{_trace_length}, $buf );
}
}
@{ $self->{A} } = @A;
@{ $self->{G} } = @G;
@{ $self->{T} } = @T;
@{ $self->{C} } = @C;
}
sub _get_int {
my $self = shift;
my $buf;
my $pos = shift;
my $fh = $self->{_fh};
seek( $fh, $pos, 0 );
read( $fh, $buf, 4 );
return unpack( "N", $buf );
}
sub _set_max_trace {
my $self = shift;
my @A = @{ $self->{A} };
my @T = @{ $self->{T} };
my @G = @{ $self->{G} };
my @C = @{ $self->{C} };
my $max = 0;
for ( my $i = 0 ; $i < @T ; $i++ ) {
if ( $T[$i] > $max ) { $max = $T[$i]; }
( run in 0.547 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )