Bio-ToolBox
view release on metacpan or search on metacpan
lib/Bio/ToolBox/db_helper/big.pm view on Meta::CPAN
package Bio::ToolBox::db_helper::big;
use warnings;
use strict;
use Carp;
use English qw(-no_match_vars);
use List::Util qw(min max sum);
use Bio::ToolBox::db_helper::constants;
use Bio::DB::Big;
require Exporter;
our $VERSION = '2.02';
# Initialize CURL buffers
BEGIN {
# not clear if this should be done only once or if it's harmless to re-init
# for every new file, so I guess best to just do it here at the very beginning
# initialization is only for remote files
Bio::DB::Big->init();
}
# Exported names
our @ISA = qw(Exporter);
## no critic
## this is never intended to be used directly by end users
## and exporting everything is required
our @EXPORT = qw(
open_bigwig_db
collect_bigwig_score
collect_bigwig_scores
collect_bigwig_position_scores
open_bigbed_db
collect_bigbed_scores
collect_bigbed_position_scores
open_bigwigset_db
collect_bigwigset_score
collect_bigwigset_scores
collect_bigwigset_position_scores
sum_total_bigbed_features
);
## use critic
# Hash of Bigfile chromosomes
my %BIG_CHROMOS;
# sometimes user may request a chromosome that's not in the bigfile
# that could lead to an exception
# we will record the chromosomes list in this hash
# $BIG_CHROMOS{bigfile}{altchromo} = chromo
# we also record the chromosome name variant with or without chr prefix
# to accommodate different naming conventions
# Hash of Bigfile chromosome lengths
my %BIG_CHROMOLENGTHS;
# since libBigWig doesn't internally clip chromosome lengths
# we will cache chromosome lengths and check them before it leads to an exception
# $BIG_CHROMOLENGTHS{bigfile}{chromo} = length
# Opened bigFile db objects
my %OPENED_BIG;
# a cache for opened Bigfile databases, primarily for collecting scores
# caching here is only for local purposes of collecting scores
# db_helper also provides caching of db objects but with option to force open in
# the case of forking processes - we don't have that here
# BigWigSet bigWig IDs
my %BIGWIGSET_WIGS;
# cache for the bigwigs from a BigWigSet used in a query
# we want to use low level bigWig access which isn't normally
# available from the high level BigWigSet, so we identify the
# bigWigs from the bigWigSet and cache them here
#### BigWig Subroutines
lib/Bio/ToolBox/db_helper/big.pm view on Meta::CPAN
push @{ $pos2data{$position} }, $bed->score + 0;
}
}
}
}
# combine multiple datapoints at the same position
if ( $param->[METH] eq 'ncount' ) {
foreach my $position ( keys %pos2data ) {
my %name2count;
foreach ( @{ $pos2data{$position} } ) { $name2count{$_} += 1 }
$pos2data{$position} = scalar( keys %name2count );
}
}
elsif ( $param->[METH] eq 'count' or $param->[METH] eq 'pcount' ) {
# do nothing, these aren't arrays
}
elsif ( $param->[METH] eq 'mean' ) {
foreach my $position ( keys %pos2data ) {
$pos2data{$position} =
sum( @{ $pos2data{$position} } ) / scalar( @{ $pos2data{$position} } );
}
}
elsif ( $param->[METH] eq 'median' ) {
foreach my $position ( keys %pos2data ) {
$pos2data{$position} = median( @{ $pos2data{$position} } );
}
}
elsif ( $param->[METH] eq 'min' ) {
foreach my $position ( keys %pos2data ) {
$pos2data{$position} = min( @{ $pos2data{$position} } );
}
}
elsif ( $param->[METH] eq 'max' ) {
foreach my $position ( keys %pos2data ) {
$pos2data{$position} = max( @{ $pos2data{$position} } );
}
}
elsif ( $param->[METH] eq 'sum' ) {
foreach my $position ( keys %pos2data ) {
$pos2data{$position} = sum( @{ $pos2data{$position} } );
}
}
else {
# just take the mean for everything else
foreach my $position ( keys %pos2data ) {
$pos2data{$position} =
sum( @{ $pos2data{$position} } ) / scalar( @{ $pos2data{$position} } );
}
}
# return collected data
return wantarray ? %pos2data : \%pos2data;
}
sub sum_total_bigbed_features {
# there is no easy way to do this with this adapter, except to literally
# walk through the entire file.
# well, we do this with bam files, I guess we could do the same here
# honestly, who uses this????? it's legacy. skip for now until someone complains
return undef;
}
#### BigWigSet Subroutines
sub open_bigwigset_db {
my $path = shift;
return Bio::ToolBox::db_helper::big::BigWigSet->new($path);
}
sub collect_bigwigset_score {
# passed parameters as array ref
# chromosome, start, stop, strand, strandedness, method, db, dataset
my $param = shift;
# lookup the bigWig files based on the parameters
my $ids = _lookup_bigwigset_wigs($param);
return unless scalar( @{$ids} ) > 0;
croak(
'FATAL: multiple selected bigWig files from a BigWigSet is not supported with single score method'
) if scalar( @{$ids} ) > 1;
push @{$param}, @{$ids};
# use the low level single bigWig API
return collect_bigwig_score($param);
}
sub collect_bigwigset_scores {
# passed parameters as array ref
# chromosome, start, stop, strand, strandedness, method, db, dataset
my $param = shift;
# lookup the bigWig files based on the parameters
my $ids = _lookup_bigwigset_wigs($param);
return unless scalar( @{$ids} ) > 0;
push @{$param}, @{$ids};
# use the low level single bigWig API
return collect_bigwig_scores($param);
}
sub collect_bigwigset_position_scores {
# passed parameters as array ref
# chromosome, start, stop, strand, strandedness, method, db, dataset
my $param = shift;
# lookup the bigWig files based on the parameters
my $ids = _lookup_bigwigset_wigs($param);
return unless scalar( @{$ids} ) > 0;
push @{$param}, @{$ids};
# use the low level single bigWig API
return collect_bigwig_position_scores($param);
}
#### Internal
( run in 2.212 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )