Bio-ToolBox
view release on metacpan or search on metacpan
lib/Bio/ToolBox/db_helper/bam.pm view on Meta::CPAN
### Open a bam database connection
sub open_bam_db {
my $bamfile = shift;
# check the path
my $path = $bamfile;
$path =~ s/^file://; # strip the file prefix if present
# check for bam index
check_bam_index($path);
# open the bam database object
# we specifically do not cache the bam object or chromosome names here
my $sam;
eval { $sam = Bio::DB::Sam->new( -bam => $path ); };
if ($sam) {
return $sam;
}
elsif ( $EVAL_ERROR or $OS_ERROR ) {
carp "ERROR: Unable to open Bam file: $EVAL_ERROR $OS_ERROR";
return;
}
else {
return;
}
}
### Open an indexed fasta file
sub open_indexed_fasta {
my $fasta = shift;
if ( $fasta =~ /\.gz$/ ) {
carp
"ERROR: Bio::DB::Sam::Fai doesn't support compressed fasta files! Please decompress";
return;
}
my $fai;
eval { $fai = Bio::DB::Sam::Fai->load($fasta) };
# this should automatically build the fai index if possible
if ($fai) {
return $fai;
}
else {
carp "ERROR: unable to open Fasta file!";
return;
}
}
### Check for a bam index
sub check_bam_index {
# the old samtools always expects a .bam.bai index file
# I find that relying on -autoindex yields a flaky Bio::DB::Sam object that
# doesn't always work as expected. Best to create the index BEFORE opening
my $bamfile = shift;
return if ( $bamfile =~ /^(?:http | ftp)/xi ); # I can't do much with remote files
# we will check the modification time to make sure index is newer
my $bam_mtime = ( stat($bamfile) )[9];
# optional index names
my $bam_index = "$bamfile.bai"; # .bam.bai
my $alt_index = $bamfile;
$alt_index =~ s/bam$/bai/i; # picard uses .bai instead of .bam.bai as samtools does
# check for existing index
if ( -e $bam_index ) {
if ( ( stat($bam_index) )[9] < $bam_mtime ) {
# index is older than bam file
print " index $bam_index is old. Attempting to update time stamp.\n";
my $now = time;
utime( $now, $now, $bam_index ) || Bio::DB::Bam->reindex($bamfile);
}
}
elsif ( -e $alt_index ) {
if ( ( stat($alt_index) )[9] < $bam_mtime ) {
# index is older than bam file
print " index $alt_index is old.\n";
}
# reuse this index
copy( $alt_index, $bam_index );
}
else {
# make a new index
Bio::DB::Bam->reindex($bamfile);
}
}
### Write a new bam file
sub write_new_bam_file {
my $file = shift;
$file .= '.bam' unless $file =~ /\.bam$/i;
my $bam = Bio::DB::Bam->open( $file, 'w' );
if ($bam) {
return $bam;
}
else {
carp "ERROR: unable to open bam file $file! $OS_ERROR";
return;
}
}
### Collect Bam scores
sub collect_bam_scores {
# passed parameters as array ref
# chromosome, start, stop, strand, strandedness, method, db, dataset
my $param = shift;
# initialize score structures
# which one is used depends on the return type variable
my %pos2data; # either position => count or position => [scores]
my $scores = []; # just scores
# look at each bamfile
# usually there is only one, but there may be more than one
for ( my $b = DATA; $b < scalar @{$param}; $b++ ) {
## Open the Bam File
my $bamfile = $param->[$b];
my $bam = $OPENED_BAM{$bamfile} || undef;
unless ($bam) {
# open and cache the bam file
$bam = open_bam_db($bamfile)
or croak "FATAL: Unable to open bam file '$bamfile'!";
$OPENED_BAM{$bamfile} = $bam;
# record the chromosomes and possible variants
$BAM_CHROMOS{$bamfile} = {};
foreach my $s ( $bam->seq_ids ) {
$BAM_CHROMOS{$bamfile}{$s} = $s;
if ( $s =~ /^chr(.+)$/ ) {
$BAM_CHROMOS{$bamfile}{$1} = $s;
( run in 1.412 second using v1.01-cache-2.11-cpan-39bf76dae61 )