Bio-DB-HTS
view release on metacpan or search on metacpan
lib/Bio/DB/HTS.pm view on Meta::CPAN
croak "No index file for $path; try opening file with -autoindex"
unless -e "${path}.bai" or
-e "${path}.csi" or
-e "${path}.crai";
return $self->index_load($fh);
} ## end sub index
sub reindex {
my $self = shift;
my $path = shift;
# if bam|cram file is not sorted, then index_build will exit.
# we spawn a shell to intercept this eventuality
print STDERR "[hts_index_build] creating index for $path\n" if -t STDOUT;
my $result = open my $fh, "-|";
die "Couldn't fork $!" unless defined $result;
if ( $result == 0 ) { # in child
# dup stderr to stdout so that we can intercept messages from library
open STDERR, ">&STDOUT";
$self->index_build($path);
exit 0;
}
my $mesg = <$fh>;
$mesg ||= '';
close $fh;
if ( $mesg =~ /not sorted/i ) {
print STDERR "[hts_index_build] sorting by coordinate...\n"
if -t STDOUT;
$self->sort_core( 0, $path, "$path.sorted" );
$path =~ /\.(.+?)$/;
rename "$path.sorted.$1", $path;
$self->index_build($path);
}
elsif ($mesg) {
die $mesg;
}
} ## end sub reindex
# same as index_open(), but changes current wd to TMPDIR to accomodate
# the C library when it tries to download the index file from remote
# locations.
sub index_open_in_safewd {
my $self = shift;
my $fh = shift;
my $dir = getcwd;
my $tmpdir = File::Spec->tmpdir;
chdir($tmpdir);
my $result = $self->index_load($fh);
chdir $dir;
$result;
}
sub mtime {
my $path = shift;
( stat($path) )[9];
}
1;
__END__
=head1 EXAMPLES
For illustrative purposes only, here is an extremely stupid SNP caller
that tallies up bases that are q>20 and calls a SNP if there are at
least 4 non-N/non-indel bases at the position and at least 25% of them
are a non-reference base.
my @SNPs; # this will be list of SNPs
my $snp_caller = sub {
my ($seqid,$pos,$p) = @_;
my $refbase = $hts->segment($seqid,$pos,$pos)->dna;
my ($total,$different);
for my $pileup (@$p) {
my $b = $pileup->alignment;
next if $pileup->indel or $pileup->is_refskip; # don't deal with these ;-)
my $qbase = substr($b->qseq,$pileup->qpos,1);
next if $qbase =~ /[nN]/;
my $qscore = $b->qscore->[$pileup->qpos];
next unless $qscore > 25;
$total++;
$different++ if $refbase ne $qbase;
}
if ($total >= 4 && $different/$total >= 0.25) {
push @SNPs,"$seqid:$pos";
}
};
$hts->pileup('seq1',$snp_caller);
print "Found SNPs: @SNPs\n";
=head1 GBrowse Compatibility
The Bio::DB::HTS interface can be used as a backend to GBrowse
(gmod.sourceforge.net/gbrowse). GBrowse can calculate and display
coverage graphs across large regions, alignment cartoons across
intermediate size regions, and detailed base-pair level alignments
across small regions.
Here is a typical configuration for a BAM database that contains
information from a shotgun genomic sequencing project. Some notes:
* It is important to set "search options = none" in order to avoid
GBrowse trying to scan through the BAM database to match read
names. This is a time-consuming operation.
* The callback to "bgcolor" renders pairs whose mates are unmapped in
red.
* The callback to "balloon hover" causes a balloon to pop up with the
read name when the user hovers over each paired read. Otherwise the
default behavior would be to provide information about the pair as
( run in 2.817 seconds using v1.01-cache-2.11-cpan-d8267643d1d )