Bio-SamTools
view release on metacpan or search on metacpan
lib/Bio/DB/Sam.pm view on Meta::CPAN
my $path = shift;
my $autoindex = shift;
return $self->index_open_in_safewd($path) if Bio::DB::Sam->is_remote($path);
if ($autoindex) {
$self->reindex($path) unless
-e "${path}.bai" && mtime($path) <= mtime("${path}.bai");
}
croak "No index file for $path; try opening file with -autoindex" unless -e "${path}.bai";
return $self->index_open($path);
}
sub reindex {
my $self = shift;
my $path = shift;
# if bam file is not sorted, then index_build will exit.
# we spawn a shell to intercept this eventuality
print STDERR "[bam_index_build] creating index for $path\n" if -t STDOUT;
my $result = open my $fh,"-|";
die "Couldn't fork $!" unless defined $result;
if ($result == 0) { # in child
# dup stderr to stdout so that we can intercept messages from library
open STDERR,">&STDOUT";
$self->index_build($path);
exit 0;
}
my $mesg = <$fh>;
$mesg ||= '';
close $fh;
if ($mesg =~ /not sorted/i) {
print STDERR "[bam_index_build] sorting by coordinate...\n" if -t STDOUT;
$self->sort_core(0,$path,"$path.sorted");
rename "$path.sorted.bam",$path;
$self->index_build($path);
} elsif ($mesg) {
die $mesg;
}
}
# same as index_open(), but changes current wd to TMPDIR to accomodate
# the C library when it tries to download the index file from remote
# locations.
sub index_open_in_safewd {
my $self = shift;
my $dir = getcwd;
my $tmpdir = File::Spec->tmpdir;
chdir($tmpdir);
my $result = $self->index_open(@_);
chdir $dir;
$result;
}
sub mtime {
my $path = shift;
(stat($path))[9];
}
1;
__END__
=head1 EXAMPLES
For illustrative purposes only, here is an extremely stupid SNP caller
that tallies up bases that are q>20 and calls a SNP if there are at
least 4 non-N/non-indel bases at the position and at least 25% of them
are a non-reference base.
my @SNPs; # this will be list of SNPs
my $snp_caller = sub {
my ($seqid,$pos,$p) = @_;
my $refbase = $sam->segment($seqid,$pos,$pos)->dna;
my ($total,$different);
for my $pileup (@$p) {
my $b = $pileup->alignment;
next if $pileup->indel or $pileup->is_refskip; # don't deal with these ;-)
my $qbase = substr($b->qseq,$pileup->qpos,1);
next if $qbase =~ /[nN]/;
my $qscore = $b->qscore->[$pileup->qpos];
next unless $qscore > 25;
$total++;
$different++ if $refbase ne $qbase;
}
if ($total >= 4 && $different/$total >= 0.25) {
push @SNPs,"$seqid:$pos";
}
};
$sam->pileup('seq1',$snp_caller);
print "Found SNPs: @SNPs\n";
=head1 GBrowse Compatibility
The Bio::DB::Sam interface can be used as a backend to GBrowse
(gmod.sourceforge.net/gbrowse). GBrowse can calculate and display
coverage graphs across large regions, alignment cartoons across
intermediate size regions, and detailed base-pair level alignments
across small regions.
Here is a typical configuration for a BAM database that contains
information from a shotgun genomic sequencing project. Some notes:
* It is important to set "search options = none" in order to avoid
GBrowse trying to scan through the BAM database to match read
names. This is a time-consuming operation.
* The callback to "bgcolor" renders pairs whose mates are unmapped in
red.
* The callback to "balloon hover" causes a balloon to pop up with the
read name when the user hovers over each paired read. Otherwise the
( run in 1.280 second using v1.01-cache-2.11-cpan-39bf76dae61 )