Bio-ToolBox

 view release on metacpan or  search on metacpan

lib/Bio/ToolBox/db_helper/big.pm  view on Meta::CPAN

package Bio::ToolBox::db_helper::big;

use warnings;
use strict;
use Carp;
use English    qw(-no_match_vars);
use List::Util qw(min max sum);
use Bio::ToolBox::db_helper::constants;
use Bio::DB::Big;
require Exporter;

our $VERSION = '2.02';

# Initialize CURL buffers
BEGIN {
	# not clear if this should be done only once or if it's harmless to re-init
	# for every new file, so I guess best to just do it here at the very beginning
	# initialization is only for remote files
	Bio::DB::Big->init();
}

# Exported names
our @ISA = qw(Exporter);

## no critic
## this is never intended to be used directly by end users
## and exporting everything is required
our @EXPORT = qw(
	open_bigwig_db
	collect_bigwig_score
	collect_bigwig_scores
	collect_bigwig_position_scores
	open_bigbed_db
	collect_bigbed_scores
	collect_bigbed_position_scores
	open_bigwigset_db
	collect_bigwigset_score
	collect_bigwigset_scores
	collect_bigwigset_position_scores
	sum_total_bigbed_features
);
## use critic

# Hash of Bigfile chromosomes
my %BIG_CHROMOS;

# sometimes user may request a chromosome that's not in the bigfile
# that could lead to an exception
# we will record the chromosomes list in this hash
# $BIG_CHROMOS{bigfile}{altchromo} = chromo
# we also record the chromosome name variant with or without chr prefix
# to accommodate different naming conventions

# Hash of Bigfile chromosome lengths
my %BIG_CHROMOLENGTHS;

# since libBigWig doesn't internally clip chromosome lengths
# we will cache chromosome lengths and check them before it leads to an exception
# $BIG_CHROMOLENGTHS{bigfile}{chromo} = length

# Opened bigFile db objects
my %OPENED_BIG;

# a cache for opened Bigfile databases, primarily for collecting scores
# caching here is only for local purposes of collecting scores
# db_helper also provides caching of db objects but with option to force open in
# the case of forking processes - we don't have that here

# BigWigSet bigWig IDs
my %BIGWIGSET_WIGS;

# cache for the bigwigs from a BigWigSet used in a query
# we want to use low level bigWig access which isn't normally
# available from the high level BigWigSet, so we identify the
# bigWigs from the bigWigSet and cache them here

#### BigWig Subroutines

lib/Bio/ToolBox/db_helper/big.pm  view on Meta::CPAN

					push @{ $pos2data{$position} }, $bed->score + 0;
				}
			}
		}
	}

	# combine multiple datapoints at the same position
	if ( $param->[METH] eq 'ncount' ) {
		foreach my $position ( keys %pos2data ) {
			my %name2count;
			foreach ( @{ $pos2data{$position} } ) { $name2count{$_} += 1 }
			$pos2data{$position} = scalar( keys %name2count );
		}
	}
	elsif ( $param->[METH] eq 'count' or $param->[METH] eq 'pcount' ) {

		# do nothing, these aren't arrays
	}
	elsif ( $param->[METH] eq 'mean' ) {
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} =
				sum( @{ $pos2data{$position} } ) / scalar( @{ $pos2data{$position} } );
		}
	}
	elsif ( $param->[METH] eq 'median' ) {
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} = median( @{ $pos2data{$position} } );
		}
	}
	elsif ( $param->[METH] eq 'min' ) {
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} = min( @{ $pos2data{$position} } );
		}
	}
	elsif ( $param->[METH] eq 'max' ) {
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} = max( @{ $pos2data{$position} } );
		}
	}
	elsif ( $param->[METH] eq 'sum' ) {
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} = sum( @{ $pos2data{$position} } );
		}
	}
	else {
		# just take the mean for everything else
		foreach my $position ( keys %pos2data ) {
			$pos2data{$position} =
				sum( @{ $pos2data{$position} } ) / scalar( @{ $pos2data{$position} } );
		}
	}

	# return collected data
	return wantarray ? %pos2data : \%pos2data;
}

sub sum_total_bigbed_features {

	# there is no easy way to do this with this adapter, except to literally
	# walk through the entire file.
	# well, we do this with bam files, I guess we could do the same here
	# honestly, who uses this????? it's legacy. skip for now until someone complains
	return undef;
}

#### BigWigSet Subroutines

sub open_bigwigset_db {
	my $path = shift;
	return Bio::ToolBox::db_helper::big::BigWigSet->new($path);
}

sub collect_bigwigset_score {

	# passed parameters as array ref
	# chromosome, start, stop, strand, strandedness, method, db, dataset
	my $param = shift;

	# lookup the bigWig files based on the parameters
	my $ids = _lookup_bigwigset_wigs($param);
	return unless scalar( @{$ids} ) > 0;
	croak(
'FATAL: multiple selected bigWig files from a BigWigSet is not supported with single score method'
	) if scalar( @{$ids} ) > 1;
	push @{$param}, @{$ids};

	# use the low level single bigWig API
	return collect_bigwig_score($param);
}

sub collect_bigwigset_scores {

	# passed parameters as array ref
	# chromosome, start, stop, strand, strandedness, method, db, dataset
	my $param = shift;

	# lookup the bigWig files based on the parameters
	my $ids = _lookup_bigwigset_wigs($param);
	return unless scalar( @{$ids} ) > 0;
	push @{$param}, @{$ids};

	# use the low level single bigWig API
	return collect_bigwig_scores($param);
}

sub collect_bigwigset_position_scores {

	# passed parameters as array ref
	# chromosome, start, stop, strand, strandedness, method, db, dataset
	my $param = shift;

	# lookup the bigWig files based on the parameters
	my $ids = _lookup_bigwigset_wigs($param);
	return unless scalar( @{$ids} ) > 0;
	push @{$param}, @{$ids};

	# use the low level single bigWig API
	return collect_bigwig_position_scores($param);
}

#### Internal



( run in 2.212 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )