Bio-ToolBox

 view release on metacpan or  search on metacpan

lib/Bio/ToolBox/db_helper/bam.pm  view on Meta::CPAN


### Open a bam database connection
sub open_bam_db {
	my $bamfile = shift;

	# check the path
	my $path = $bamfile;
	$path =~ s/^file://;    # strip the file prefix if present

	# check for bam index
	check_bam_index($path);

	# open the bam database object
	# we specifically do not cache the bam object or chromosome names here
	my $sam;
	eval { $sam = Bio::DB::Sam->new( -bam => $path ); };
	if ($sam) {
		return $sam;
	}
	elsif ( $EVAL_ERROR or $OS_ERROR ) {
		carp "ERROR: Unable to open Bam file: $EVAL_ERROR $OS_ERROR";
		return;
	}
	else {
		return;
	}
}

### Open an indexed fasta file
sub open_indexed_fasta {
	my $fasta = shift;
	if ( $fasta =~ /\.gz$/ ) {
		carp
"ERROR:  Bio::DB::Sam::Fai doesn't support compressed fasta files! Please decompress";
		return;
	}
	my $fai;
	eval { $fai = Bio::DB::Sam::Fai->load($fasta) };

	# this should automatically build the fai index if possible
	if ($fai) {
		return $fai;
	}
	else {
		carp "ERROR: unable to open Fasta file!";
		return;
	}
}

### Check for a bam index
sub check_bam_index {

	# the old samtools always expects a .bam.bai index file
	# I find that relying on -autoindex yields a flaky Bio::DB::Sam object that
	# doesn't always work as expected. Best to create the index BEFORE opening

	my $bamfile = shift;
	return if ( $bamfile =~ /^(?:http | ftp)/xi );    # I can't do much with remote files

	# we will check the modification time to make sure index is newer
	my $bam_mtime = ( stat($bamfile) )[9];

	# optional index names
	my $bam_index = "$bamfile.bai";                   # .bam.bai
	my $alt_index = $bamfile;
	$alt_index =~ s/bam$/bai/i;    # picard uses .bai instead of .bam.bai as samtools does

	# check for existing index
	if ( -e $bam_index ) {
		if ( ( stat($bam_index) )[9] < $bam_mtime ) {

			# index is older than bam file
			print " index $bam_index is old. Attempting to update time stamp.\n";
			my $now = time;
			utime( $now, $now, $bam_index ) || Bio::DB::Bam->reindex($bamfile);
		}
	}
	elsif ( -e $alt_index ) {
		if ( ( stat($alt_index) )[9] < $bam_mtime ) {

			# index is older than bam file
			print " index $alt_index is old.\n";
		}

		# reuse this index
		copy( $alt_index, $bam_index );
	}
	else {
		# make a new index
		Bio::DB::Bam->reindex($bamfile);
	}
}

### Write a new bam file
sub write_new_bam_file {
	my $file = shift;
	$file .= '.bam' unless $file =~ /\.bam$/i;
	my $bam = Bio::DB::Bam->open( $file, 'w' );
	if ($bam) {
		return $bam;
	}
	else {
		carp "ERROR: unable to open bam file $file! $OS_ERROR";
		return;
	}
}

### Collect Bam scores
sub collect_bam_scores {

	# passed parameters as array ref
	# chromosome, start, stop, strand, strandedness, method, db, dataset
	my $param = shift;

	# initialize score structures
	# which one is used depends on the return type variable
	my %pos2data;       # either position => count or position => [scores]
	my $scores = [];    # just scores

	# look at each bamfile
	# usually there is only one, but there may be more than one
	for ( my $b = DATA; $b < scalar @{$param}; $b++ ) {

		## Open the Bam File
		my $bamfile = $param->[$b];
		my $bam     = $OPENED_BAM{$bamfile} || undef;
		unless ($bam) {

			# open and cache the bam file
			$bam = open_bam_db($bamfile)
				or croak "FATAL: Unable to open bam file '$bamfile'!";
			$OPENED_BAM{$bamfile} = $bam;

			# record the chromosomes and possible variants
			$BAM_CHROMOS{$bamfile} = {};
			foreach my $s ( $bam->seq_ids ) {
				$BAM_CHROMOS{$bamfile}{$s} = $s;
				if ( $s =~ /^chr(.+)$/ ) {
					$BAM_CHROMOS{$bamfile}{$1} = $s;



( run in 1.412 second using v1.01-cache-2.11-cpan-39bf76dae61 )