AlignAid

 view release on metacpan or  search on metacpan

lib/AlignAid.pm  view on Meta::CPAN

use Carp;

# determine if PP will be available
use vars qw($PP_loaded);
eval { require PP; $PP_loaded = 1;};
if( $@ ) {
    $PP_loaded = 0;
    carp "could not load PP -- submitting to LSF or PBS queues will not be possible\n";
}

use File::Spec;
use IO::File;
use Bio::SeqIO;

=head2 new

 Title        : new
 Usage        : AlignAid->new();
 Function     : Constructor for AlignAid class.
 Returns      : Object handle.
 Required Args: dir       => '' - the output directory you want created
              : db        => '' - the database file
              : fasta     => '' - the file of FASTA queries
 Optional Args: queue     => '' - 'single' by default, 'LSF' for an LSF queue
              : program   => '' - the alignment program to use. 'blastn',
              :                   'blastp', 'blastx', 'tblastn', 'tblastx', or
              :                   'cross_match'
              : prog_args => '' - args to pass to the alignment program
 Throws       : croaks if required parameters are missing or suspect.
 Comments     : none

=cut

sub new {
    my ( $class, %arg ) = @_;

    # do some crude safety checks
    if ( !$arg{db} )       { croak "new requires a database"; }
    if ( !-f $arg{db} )    { croak "database [$arg{db}] does not exist"; }
    if ( !$arg{dir} )      { croak "new requires an output dir"; }
    if ( !-e $arg{dir} )   { croak "directory [$arg{dir}] does not exist"; }
    if ( !-d $arg{dir} )   { croak "[$arg{dir}] is not a directory"; }
    if ( !$arg{fasta} )    { croak "new requires a fasta file of queries"; }
    if ( !-f $arg{fasta} ) { croak "fasta [$arg{fasta}] does not exist"; }
    if ( !-T $arg{fasta} ) { croak "fasta [$arg{fasta}] is not a text file"; }

    # set optional parameters if they were passed in or use defaults if not
    $arg{queue}   = defined( $arg{queue} )   ? $arg{queue}   : 'single';
    
    # only allow an LSF or PBS queue if PP loaded
    if ( !$PP_loaded && ($arg{queue} eq 'LSF' || $arg{queue} eq 'PBS') ) {
        croak "The PP module is required for submitting jobs to LSF or PBS queues.";
    }
    
    if ( $arg{queue} ne 'single' && $arg{queue} ne 'LSF' && $arg{queue} ne 'PBS') {
	    croak "$arg{queue} is not a supported queue type";
    }
    $arg{program} = defined( $arg{program} ) ? $arg{program} : 'blastn';
    $arg{chunk}   = defined( $arg{chunk} )   ? $arg{chunk}   : 1;

    # verify all paths are absolute paths
    $arg{db}        = File::Spec->rel2abs( $arg{db} );
    $arg{dir}       = File::Spec->rel2abs( $arg{dir} );
    $arg{fasta}     = File::Spec->rel2abs( $arg{fasta} );
    $arg{prog_name} = $arg{program};
    $arg{program}   = `which $arg{program}`;
    chomp $arg{program};

    # setup the object
    my $self = {
        _queue     => $arg{queue},
        _dir       => $arg{dir},
        _database  => $arg{db},
        _fasta     => $arg{fasta},
        _program   => $arg{program},
        _prog_args => $arg{prog_args},
	_prog_name => $arg{prog_name},
	_chunk     => $arg{chunk},
    };
    bless( $self, $class );

    return ($self);
}

=head2 submit

 Title        : submit
 Usage        : AlignAid->submit();
 Function     : start the alignment job(s) running.
 Returns      : 1 upon success, 0 upon failure
 Required Args: outfile => '' - the file where you want the output to go
 Throws       : croaks if required parameters are missing or suspect.
 Comments     : none

=cut

sub submit {
    my ( $class, %arg ) = @_;

    my @jobs;
    my $ret_val = 1;

    # default args
    $class->{_prog_args} .= ' ';
    $class->{_outfile} = $arg{outfile};
    croak "must supply outfile as argument: \$job->submit(outfile => 'foo')"
	unless defined( $class->{_outfile} );

    # run on a single processor
    if ( $class->{_queue} eq 'single' ) {

        # set up output file
        my $outfile = $class->{_outfile};
        $class->{_prog_args} .= "> $outfile";

        my $string =
"$class->{_program} $class->{_database} $class->{_fasta} $class->{_prog_args}";

        $ret_val = system($string);

        # invert system's return values for passing back

lib/AlignAid.pm  view on Meta::CPAN

        else { return 0; }
    }
    # submit to a queueing system
    else {

        # open input file
        my $query_fh = IO::File->new( $class->{_fasta}, "r" )
          or croak "Couldn't open ", $class->{_fasta}, " :? :!";

        my $counter  = 0;              # the counter for the blasts
        my $internal = "$$" . "000";
        my $ENTRIES;

        my $fasta = Bio::SeqIO->new(-fh => $query_fh, '-format' => 'fasta');
        while ( my $entry = $fasta->next_seq ) {

            $ENTRIES .= $entry;
            $counter++;

            unless ( $counter < $class->{_chunk} ) {

                # setup output files
                my $fa_file    = $class->{_dir} . "/" . $internal . ".fa";
                my $out_file   = $class->{_dir} . "/" . $internal . ".blast";
                my $error_file = $class->{_dir} . "/" . $internal . ".errors";

                # create temp fasta file
                my $fa_fh = IO::File->new( $fa_file, "w" )
                  or croak "couldn't open $fa_file";
                print $fa_fh $ENTRIES;
                close $fa_fh;

		# set command string depending on program
		my $prog = $class->{_program};
		my $db   = $class->{_database};
		my $args = $class->{_prog_args};

		my $command;
		if ( $class->{_prog_name} =~ /[t]*blast[nxp]/ ) {
		    $command = "$prog $db $fa_file $args";
		}
		elsif ( $class->{_prog_name} eq 'cross_match' ) {
		    $args .= " -tags -discrep_lists ";
		    $command = "$prog $db $fa_file $args";
		}
		else { croak "unrecognized alignment program"; }

                # submit job
                if ( $class->{_queue} eq 'LSF' ) {
                    my $pp = PP->create(
                        pp_type => 'lsf',
                        command => $command,
                        q       => 'long',
                        eo      => $error_file,
                        output  => $out_file,
                    );

		    # actually start job (it's been holding up til now)
                    $pp->start();

                    # verify job made it onto the queue
                    if ( $pp->is_in_queue(1) ) {
                        push @jobs, $pp;
                    }
                    else {
                        $ret_val = 0;
                        warn "job didn't get submitted!";
                    }

                }
                elsif ( $class->{_queue} eq 'PBS' ) {
                    croak "Sorry! PBS queueing not implemented yet!\n";
                }

                # (re)set counters
                $ENTRIES = "";
                $counter = 0;
                $internal++;
            }    # end of unless
        }    # end of main while

        ### if there are any fastas left
        if ( $counter > 0 ) {

            # setup output files
            my $fa_file    = $class->{_dir} . "/" . $internal . ".fa";
            my $out_file   = $class->{_dir} . "/" . $internal . ".blast";
            my $error_file = $class->{_dir} . "/" . $internal . ".errors";

            # submit job
            if ( $class->{_queue} eq 'LSF' ) {

                my $pp = PP->run(
                    pp_type => 'lsf',
                    command =>
"$class->{_program} $class->{_database} $fa_file $class->{_prog_args}",
                    q => 'long',
                    e => $error_file,
                    o => $out_file,
                );

                # verify job made it onto the queue
                if ( $pp->is_in_queue(1) ) {
                    push @jobs, $pp;
                }
                else {
                    warn "job didn't get submitted!";
                    $ret_val = 0;
                }
            }
            elsif ( $class->{_queue} eq 'PBS' ) {
                croak "Sorry! PBS queueing not implemented yet!\n";
            }

            # (re)set counters
            $ENTRIES = "";
            $counter = 0;
            $internal++;
        }
    }    # end of multi-processor else

    # add refs to the jobs
    $class->{_jobs} = \@jobs;

    return $ret_val;
}

=head2 kill_all

 Title        : kill_all
 Usage        : AlignAid->kill_all();
 Function     : kills all running jobs
 Returns      : 1 upon success, 0 upon failure
 Args         : none
 Throws       : croaks on error
 Comments     : none

=cut

sub kill_all {

    my ( $self, %arg ) = @_;

    if ( $self->{_queue} eq 'single' ) {
        croak "single job killing not implemented yet";
    }
    elsif ( $self->{_queue} eq 'LSF' ) {

        # kill each job
        my $i = 0;
        foreach my $job ( @{ $self->{_jobs} } ) {
            $job->kill;

            unless ( $job->is_in_queue(1) ) {
                delete $self->{_jobs}[ $i++ ];
            }
        }

        if ( scalar @{ $self->{_jobs} } > 0 ) {
            my $num_jobs = scalar @{ $self->{_jobs} };
            warn "$num_jobs weren't killed and still are in the queue";
            return 0;



( run in 1.077 second using v1.01-cache-2.11-cpan-13bb782fe5a )