Bio-MUST-Drivers

 view release on metacpan or  search on metacpan

lib/Bio/MUST/Drivers/Exonerate/Aligned.pm  view on Meta::CPAN



has 'dna_seq' => (
    is       => 'ro',
    isa      => 'Bio::MUST::Core::Seq',
    required => 1,
);

has 'pep_seq' => (
    is       => 'ro',
    isa      => 'Bio::MUST::Core::Seq',
    required => 1,
);

# TODO: homogeneize with main class
has 'code' => (
    is       => 'ro',
    isa      => 'Str',
    default  => '1',
);

has 'model' => (
    is       => 'ro',
    isa      => 'Str',
    init_arg => undef,
    writer   => '_set_model',
);

has $_ => (
    is       => 'ro',
    isa      => 'Num',
    init_arg => undef,
    writer   => '_set_' . $_,
) for qw(
     query_start  query_end
    target_start target_end
    score
);

has $_ . '_seq' => (
    is       => 'ro',
    isa      => 'Bio::MUST::Core::Seq',
    init_arg => undef,
    writer   => '_set_' . $_,
) for qw(query target spliced);


# TODO: subclass this to avoid code repetition with main class
# TODO: handle reverse complement

sub BUILD {
    my $self = shift;

    # provision executable
    my $app = use_module('Bio::MUST::Provision::Exonerate')->new;
       $app->meet();

    # build temp Ali file for input DNA seq
    my $dna = Ali->new(
        seqs => [ $self->dna_seq ],
        guessing => 0,
    );
    my $dnafile = $dna->temp_fasta;

    # build temp Ali file for input PEP seq
    my $pep = Ali->new(
        seqs => [ $self->pep_seq ],
        guessing => 0,
    );
    my $pepfile = $pep->temp_fasta;

    # execute exonerate
    my $outfile = $self->_exonerate($dnafile, $pepfile);

    # parse outfile and populate seqs
    $self->_parse_outfile($outfile);

    # check that everything ran fine
    unless ( $self->query_seq->seq_len ) {
        carp '[BMD] Warning: exonerate could not align seqs;'
            . ' returning empty seqs!';
        ### dnafile: join q{}, "\n", file($dnafile)->slurp
        ### pepfile: join q{}, "\n", file($pepfile)->slurp
    }
    elsif  ( $self->query_seq->seq_len != $self->target_seq->seq_len ) {
        carp '[BMD] Warning: query and target seqs not of same length!';
        ### dnafile: join q{}, "\n", file($dnafile)->slurp
        ### pepfile: join q{}, "\n", file($pepfile)->slurp
    }
    elsif  ( $self->spliced_seq->seq_len != 3 * $self->target_seq->seq_len ) {
        carp '[BMD] Warning: DNA and protein target seqs not of same length!';
        ### dnafile: join q{}, "\n", file($dnafile)->slurp
        ### pepfile: join q{}, "\n", file($pepfile)->slurp
    }

    # unlink temp files
    file($_)->remove for ($dnafile, $pepfile, $outfile);

    return;
}


const my %ARGS_FOR => (
    'lc' => 'protein2genome',
    'bf' => 'protein2genome:bestfit --exhaustive',
);

const my $NEW_HSP => qr{C4 \s Alignment:}xms;

sub _exonerate {
    my $self    = shift;
    my $dnafile = shift;
    my $pepfile = shift;

    # first try with heuristic 'protein2genome' model
    # if it yields > 1 HSPs try with exhaustive 'protein2genome:bestfit' model
    # it this better model fails then return first HSP from lesser model

    my $pgm = 'exonerate';
    my $code = $self->code;

    my $return = q{};
    my $remove = q{};

    my %outfile_for;
    my $hsp_n;

    MODEL:



( run in 2.430 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )