AlignAid
view release on metacpan or search on metacpan
lib/AlignAid.pm view on Meta::CPAN
use Carp;
# determine if PP will be available
use vars qw($PP_loaded);
eval { require PP; $PP_loaded = 1;};
if( $@ ) {
$PP_loaded = 0;
carp "could not load PP -- submitting to LSF or PBS queues will not be possible\n";
}
use File::Spec;
use IO::File;
use Bio::SeqIO;
=head2 new
Title : new
Usage : AlignAid->new();
Function : Constructor for AlignAid class.
Returns : Object handle.
Required Args: dir => '' - the output directory you want created
: db => '' - the database file
: fasta => '' - the file of FASTA queries
Optional Args: queue => '' - 'single' by default, 'LSF' for an LSF queue
: program => '' - the alignment program to use. 'blastn',
: 'blastp', 'blastx', 'tblastn', 'tblastx', or
: 'cross_match'
: prog_args => '' - args to pass to the alignment program
Throws : croaks if required parameters are missing or suspect.
Comments : none
=cut
sub new {
my ( $class, %arg ) = @_;
# do some crude safety checks
if ( !$arg{db} ) { croak "new requires a database"; }
if ( !-f $arg{db} ) { croak "database [$arg{db}] does not exist"; }
if ( !$arg{dir} ) { croak "new requires an output dir"; }
if ( !-e $arg{dir} ) { croak "directory [$arg{dir}] does not exist"; }
if ( !-d $arg{dir} ) { croak "[$arg{dir}] is not a directory"; }
if ( !$arg{fasta} ) { croak "new requires a fasta file of queries"; }
if ( !-f $arg{fasta} ) { croak "fasta [$arg{fasta}] does not exist"; }
if ( !-T $arg{fasta} ) { croak "fasta [$arg{fasta}] is not a text file"; }
# set optional parameters if they were passed in or use defaults if not
$arg{queue} = defined( $arg{queue} ) ? $arg{queue} : 'single';
# only allow an LSF or PBS queue if PP loaded
if ( !$PP_loaded && ($arg{queue} eq 'LSF' || $arg{queue} eq 'PBS') ) {
croak "The PP module is required for submitting jobs to LSF or PBS queues.";
}
if ( $arg{queue} ne 'single' && $arg{queue} ne 'LSF' && $arg{queue} ne 'PBS') {
croak "$arg{queue} is not a supported queue type";
}
$arg{program} = defined( $arg{program} ) ? $arg{program} : 'blastn';
$arg{chunk} = defined( $arg{chunk} ) ? $arg{chunk} : 1;
# verify all paths are absolute paths
$arg{db} = File::Spec->rel2abs( $arg{db} );
$arg{dir} = File::Spec->rel2abs( $arg{dir} );
$arg{fasta} = File::Spec->rel2abs( $arg{fasta} );
$arg{prog_name} = $arg{program};
$arg{program} = `which $arg{program}`;
chomp $arg{program};
# setup the object
my $self = {
_queue => $arg{queue},
_dir => $arg{dir},
_database => $arg{db},
_fasta => $arg{fasta},
_program => $arg{program},
_prog_args => $arg{prog_args},
_prog_name => $arg{prog_name},
_chunk => $arg{chunk},
};
bless( $self, $class );
return ($self);
}
=head2 submit
Title : submit
Usage : AlignAid->submit();
Function : start the alignment job(s) running.
Returns : 1 upon success, 0 upon failure
Required Args: outfile => '' - the file where you want the output to go
Throws : croaks if required parameters are missing or suspect.
Comments : none
=cut
sub submit {
my ( $class, %arg ) = @_;
my @jobs;
my $ret_val = 1;
# default args
$class->{_prog_args} .= ' ';
$class->{_outfile} = $arg{outfile};
croak "must supply outfile as argument: \$job->submit(outfile => 'foo')"
unless defined( $class->{_outfile} );
# run on a single processor
if ( $class->{_queue} eq 'single' ) {
# set up output file
my $outfile = $class->{_outfile};
$class->{_prog_args} .= "> $outfile";
my $string =
"$class->{_program} $class->{_database} $class->{_fasta} $class->{_prog_args}";
$ret_val = system($string);
# invert system's return values for passing back
lib/AlignAid.pm view on Meta::CPAN
else { return 0; }
}
# submit to a queueing system
else {
# open input file
my $query_fh = IO::File->new( $class->{_fasta}, "r" )
or croak "Couldn't open ", $class->{_fasta}, " :? :!";
my $counter = 0; # the counter for the blasts
my $internal = "$$" . "000";
my $ENTRIES;
my $fasta = Bio::SeqIO->new(-fh => $query_fh, '-format' => 'fasta');
while ( my $entry = $fasta->next_seq ) {
$ENTRIES .= $entry;
$counter++;
unless ( $counter < $class->{_chunk} ) {
# setup output files
my $fa_file = $class->{_dir} . "/" . $internal . ".fa";
my $out_file = $class->{_dir} . "/" . $internal . ".blast";
my $error_file = $class->{_dir} . "/" . $internal . ".errors";
# create temp fasta file
my $fa_fh = IO::File->new( $fa_file, "w" )
or croak "couldn't open $fa_file";
print $fa_fh $ENTRIES;
close $fa_fh;
# set command string depending on program
my $prog = $class->{_program};
my $db = $class->{_database};
my $args = $class->{_prog_args};
my $command;
if ( $class->{_prog_name} =~ /[t]*blast[nxp]/ ) {
$command = "$prog $db $fa_file $args";
}
elsif ( $class->{_prog_name} eq 'cross_match' ) {
$args .= " -tags -discrep_lists ";
$command = "$prog $db $fa_file $args";
}
else { croak "unrecognized alignment program"; }
# submit job
if ( $class->{_queue} eq 'LSF' ) {
my $pp = PP->create(
pp_type => 'lsf',
command => $command,
q => 'long',
eo => $error_file,
output => $out_file,
);
# actually start job (it's been holding up til now)
$pp->start();
# verify job made it onto the queue
if ( $pp->is_in_queue(1) ) {
push @jobs, $pp;
}
else {
$ret_val = 0;
warn "job didn't get submitted!";
}
}
elsif ( $class->{_queue} eq 'PBS' ) {
croak "Sorry! PBS queueing not implemented yet!\n";
}
# (re)set counters
$ENTRIES = "";
$counter = 0;
$internal++;
} # end of unless
} # end of main while
### if there are any fastas left
if ( $counter > 0 ) {
# setup output files
my $fa_file = $class->{_dir} . "/" . $internal . ".fa";
my $out_file = $class->{_dir} . "/" . $internal . ".blast";
my $error_file = $class->{_dir} . "/" . $internal . ".errors";
# submit job
if ( $class->{_queue} eq 'LSF' ) {
my $pp = PP->run(
pp_type => 'lsf',
command =>
"$class->{_program} $class->{_database} $fa_file $class->{_prog_args}",
q => 'long',
e => $error_file,
o => $out_file,
);
# verify job made it onto the queue
if ( $pp->is_in_queue(1) ) {
push @jobs, $pp;
}
else {
warn "job didn't get submitted!";
$ret_val = 0;
}
}
elsif ( $class->{_queue} eq 'PBS' ) {
croak "Sorry! PBS queueing not implemented yet!\n";
}
# (re)set counters
$ENTRIES = "";
$counter = 0;
$internal++;
}
} # end of multi-processor else
# add refs to the jobs
$class->{_jobs} = \@jobs;
return $ret_val;
}
=head2 kill_all
Title : kill_all
Usage : AlignAid->kill_all();
Function : kills all running jobs
Returns : 1 upon success, 0 upon failure
Args : none
Throws : croaks on error
Comments : none
=cut
sub kill_all {
my ( $self, %arg ) = @_;
if ( $self->{_queue} eq 'single' ) {
croak "single job killing not implemented yet";
}
elsif ( $self->{_queue} eq 'LSF' ) {
# kill each job
my $i = 0;
foreach my $job ( @{ $self->{_jobs} } ) {
$job->kill;
unless ( $job->is_in_queue(1) ) {
delete $self->{_jobs}[ $i++ ];
}
}
if ( scalar @{ $self->{_jobs} } > 0 ) {
my $num_jobs = scalar @{ $self->{_jobs} };
warn "$num_jobs weren't killed and still are in the queue";
return 0;
( run in 1.077 second using v1.01-cache-2.11-cpan-13bb782fe5a )