Bio-InterProScanWrapper
view release on metacpan or search on metacpan
lib/Bio/InterProScanWrapper/External/LSFInterProScan.pm view on Meta::CPAN
package Bio::InterProScanWrapper::External::LSFInterProScan;
# ABSTRACT: Run interproscan via LSF jobs
use Moose;
use LSF;
use LSF::JobManager;
use File::Basename;
use Bio::InterProScanWrapper::Exceptions;
has 'input_file' => ( is => 'ro', isa => 'Str', required => 1);
has 'output_file' => ( is => 'ro', isa => 'Str', required => 1);
has 'temp_directory_name' => ( is => 'ro', isa => 'Str', required => 1);
has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'memory_in_mb' => ( is => 'ro', isa => 'Int', default => 5000 );
has 'queue' => ( is => 'ro', isa => 'Str', default => 'normal' );
has '_job_manager' => ( is => 'ro', isa => 'LSF::JobManager', lazy => 1, builder => '_build__job_manager' );
has 'exec' => ( is => 'ro', isa => 'Str', default => '/software/pathogen/external/apps/usr/local/iprscan-5.0.7/interproscan.sh' );
has 'output_type' => ( is => 'ro', isa => 'Str', default => 'gff3' );
has '_output_suffix' => ( is => 'ro', isa => 'Str', default => '.out' );
has 'tokens_per_job' => ( is => 'ro', isa => 'Int', default => 25 );
# A single instance uses more than 1 cpu so you need to reserve more slots
has '_cpus_per_command' => ( is => 'ro', isa => 'Int', default => 4 );
sub _build__job_manager {
my ($self) = @_;
return LSF::JobManager->new( -q => $self->queue );
}
sub _generate_memory_parameter {
my ($self) = @_;
return "select[mem > ".$self->memory_in_mb."] rusage[mem=".$self->memory_in_mb.", iprscantok=".$self->tokens_per_job."] span[hosts=1]";
}
sub _submit_job {
my ( $self, $sequence_temp_files_directory, $number_of_files ) = @_;
my($filename, $directories, $suffix) = fileparse($self->input_file);
$filename =~ s!\W!_!gi;
my $job_array_name = "iprscan_".$filename."_".int(rand(100))."[1-$number_of_files]";
$self->_job_manager->submit(
-o => ".iprscan.o",
-e => ".iprscan.e",
-M => $self->memory_in_mb,
-R => $self->_generate_memory_parameter,
-n => $self->_cpus_per_command,
-J => $job_array_name,
$self->_construct_cmd($sequence_temp_files_directory)
);
}
sub _construct_cmd
{
my ($self, $sequence_temp_files_directory) = @_;
my $cmd = join(
' ',
(
$self->exec, '-f', $self->output_type, '--goterms', '--iprlookup',
'--pathways', '-i', $sequence_temp_files_directory.'/'.'$LSB_JOBINDEX'.'.seq', '--outfile', $sequence_temp_files_directory.'/'.'$LSB_JOBINDEX'.'.seq'. $self->_output_suffix
)
);
}
sub _construct_dependancy_params
{
my ($self, $ids) = @_;
return '' if((! defined($ids)) || @{$ids} == 0);
my @done_ids;
for my $id ( @{$ids})
{
( run in 0.573 second using v1.01-cache-2.11-cpan-d8267643d1d )