Bio-Grep

 view release on metacpan or  search on metacpan

lib/Bio/Grep/Backend/BackendI.pm  view on Meta::CPAN

        if ( defined $sort_modes{ $self->settings->sort } ) {
            my ($sort_mode) = $self->settings->sort =~ /(\w+)/xms;
            $self->settings->sort($sort_mode);    #make taint happy
        }
        else {
            $self->throw(
                -class => 'Bio::Root::BadParameter',
                -text  => 'Sort mode not valid.',
                -value => 'sort mode'
            );
        }
    }
    return 1;
}

### END OF SETTINGS TESTS

# copies the specified fasta file in the data directory and cerate a file
# <databasename>.nfo with the description, specified in the optional 2nd
# argument
# changes directory! so please save the oldpath before calling this method
sub _copy_fasta_file_and_create_nfo {
    my ( $self, $args ) = @_;

    # throw exception if filename looks wrong
    $self->is_word( $args->{basefilename}, 'Fasta filename' );

    my $newfile = $self->_cat_path_filename( $self->settings->datapath,
        $args->{basefilename} );

    $args->{filename} = $newfile;

    my %dbs = $self->get_databases;

    if ( defined $dbs{ $args->{basefilename} } ) {
        $self->warn( "Database with that name already exists.\n"
                . 'Skipping database generation.' );
        $args->{skip} = 1;
        return;
    }

    if ( defined $args->{copy} && $args->{copy} ) {
        copy( $args->{file}, $newfile )
            or $self->throw(
            -class => 'Bio::Root::IOException',
            -text  => q{Can't copy } . $args->{file} . " to $newfile",
            -value => $OS_ERROR,
            );
    }
    else {
        my $abs_path = $self->is_path( abs_path( $args->{file} ) );
        symlink $abs_path, $newfile || $self->throw(
            -class => 'Bio::Root::IOException',
            -text  => q{Can't symlink } . $abs_path . " to $newfile",
            -value => $OS_ERROR,
        );
    }
    if ( defined $args->{description} ) {
        open my $NFOFILE, '>', $newfile . '.nfo';
        print ${NFOFILE} $args->{description}
            or $self->_cannot_print("$newfile.nfo");
        close $NFOFILE;
    }
    return;
}

sub _guess_alphabet_of_file {
    my ( $self, $filename ) = @_;
    my $in = Bio::SeqIO->new( -file => $filename );
    return $in->next_seq->alphabet;
}

sub _bioseq_query {
    my ($self) = @_;
    my $query_obj = $self->settings->query;
    my $query;

    if ( !defined $query_obj ) {
        $self->throw(
            -class => 'Bio::Root::BadParameter',
            -text  => 'Query not defined.',
        );
    }
    my $db_alphabet
        = $self->get_alphabet_of_database( $self->settings->database );

    if ( eval { $query_obj->isa('Bio::Seq') } ) {
        $query = $query_obj->seq;
    }
    else {
        $query = $query_obj;
        if ( $query =~ m{\A \w+ \z}xms ) {
            if ( $db_alphabet eq 'dna' ) {
                $query =~ tr/uU/tT/;
            }
            $query_obj = Bio::Seq->new(
                -id   => '1',
                -desc => 'Query',
                -seq  => $query
            );
        }
        else {
            $query_obj = Bio::Seq->new( -id => '1', -desc => 'Query' );
        }
    }
    return ( $query, $query_obj, $db_alphabet );
}

# prepares the query, for example calculating the reverse complement
# if necessary
# returns the prepared query. settings->query is unchanged!
sub _prepare_query {
    my $self = shift;
    my ( $query, $seq, $db_alphabet ) = $self->_bioseq_query();

    if ( $seq->alphabet ne $db_alphabet ) {
        $self->throw(
            -class => 'Bio::Root::BadParameter',
            -text  => 'Alphabet of query and database not equal',
            -value => 'Seq: ' . $seq->alphabet . ", DB: $db_alphabet"
        );

lib/Bio/Grep/Backend/BackendI.pm  view on Meta::CPAN

    my $lines = 0;
    while ( my $line = <$ALFILE> ) {
        $lines++;
    }
    close $ALFILE;
    return $lines <= $DNA_ALPHABET_SIZE ? return 'dna' : return 'protein';
}

sub _delete_output {
    my ($self) = @_;
    return 0 if !defined $self->_output_fn;
    return 0 if !-e $self->_output_fn;
    unlink $self->_output_fn
        or $self->throw(
        -class => 'Bio::Root::IOException',
        -text  => q{Can't remove } . $self->_output_fn,
        -value => $OS_ERROR,
        );
    return 1;
}

sub _execute_command {
    my ( $self, $cmd ) = @_;
    $self->_delete_output();
    my ( $tmp_fh, $tmp_fn )
        = tempfile( 'parser_XXXXXXXXXXXX', DIR => $self->settings->tmppath );
    $self->_output_fh($tmp_fh);
    $self->_output_fn($tmp_fn);
    system "$cmd > $tmp_fn";

    return !$CHILD_ERROR;
}

sub _execute_command_and_return_output {
    my ( $self, $cmd ) = @_;
    my ( $writer, $reader, $err );
    my $pid = open3( $writer, $reader, $err, $cmd );
    waitpid $pid, 0;
    my $output = do { local $INPUT_RECORD_SEPARATOR = undef; <$reader> };

    #    my $error = join q{}, <$err>;
    return $output;
}

sub _create_index_and_alphabet_file {
    my ( $self, $filename ) = @_;
    my $in = Bio::SeqIO->new( -file => $filename, -format => 'Fasta' );
    my $alphabet = $in->next_seq()->alphabet();
    if ( !defined $self->features->{PROTEINS} && $alphabet eq 'protein' ) {
        $self->throw(
            -class => 'Bio::Root::BadParameter',
            -text  => 'Back-end does not support protein data',
            -value => $alphabet,
        );
    }

    # create a vmatch alphabet file
    open my $ALFILE, '>', "$filename.al1";
    if ( $alphabet eq 'dna' ) {
        print ${ALFILE} "aA\ncC\ngG\ntTuU\nnsywrkvbdhmNSYWRKVBDHM\n"
            or $self->_cannot_print("$filename.al1");
    }
    else {
        print ${ALFILE}
            "L\nV\nI\nF\nK\nR\nE\nD\nA\nG\nS\nT\nN\nQ\nY\nW\nP\nH\nM\nC\nXUBZ*\n"
            or $self->_cannot_print("$filename.al1");
    }
    close $ALFILE;

    # create database from directory of fasta files
    my $idx = Bio::Index::Fasta->new(
        -filename   => $filename . '.idx',
        -write_flag => 1
    );
    $idx->make_index( ($filename) );

    return;
}

sub _create_tmp_query_file {
    my ($self)     = @_;
    my $s          = $self->settings;
    my $query_file = 0;
    my ( $tmp_fh, $tmp_query_file );
    if ( defined $s->query_file ) {
        $query_file = $self->is_path( $s->query_file );
    }
    if ( $s->query_isset && $s->query_file_isset ) {
        $self->throw(
            -class => 'Bio::Root::BadParameter',
            -text  => 'Query and query_file are set. I am confused...',
            -value => $s->query . ' and ' . $s->query_file,
        );
    }
    my @query_seqs = ();

    my $query = q{};
    if ( !$query_file ) {
        $query = $self->_prepare_query();

        ( $tmp_fh, $tmp_query_file ) = tempfile(
            'spatter_XXXXXXXXXXXX',
            DIR    => $s->tmppath,
            UNLINK => !$ENV{BIOGREPDEBUG}
        );    # don't delete when in debug mode, so
              # we can reproduce it.

        # construct a temporary fasta file with the query for vmatch
        my $seqobj = $self->{_query_obj};

        my $outseqio = Bio::SeqIO->new(
            -fh     => $tmp_fh,
            -format => 'fasta'
        );
        $outseqio->write_seq($seqobj);

        push @query_seqs, $seqobj;
        if ( $s->direct_and_rev_com
            && !defined $self->features->{NATIVE_D_A_REV_COM} )
        {
            my $seqobj2 = $seqobj->revcom;
            if ( defined $self->features->{REVCOM_DEFAULT} ) {
                my ($desc)
                    = $seqobj->desc
                    =~ m{\A(.*) \s\(reverse\scomplement\) \z}xms;
                $seqobj2->desc($desc);



( run in 0.811 second using v1.01-cache-2.11-cpan-39bf76dae61 )