Bio-Grep
view release on metacpan or search on metacpan
lib/Bio/Grep/Backend/BackendI.pm view on Meta::CPAN
if ( defined $sort_modes{ $self->settings->sort } ) {
my ($sort_mode) = $self->settings->sort =~ /(\w+)/xms;
$self->settings->sort($sort_mode); #make taint happy
}
else {
$self->throw(
-class => 'Bio::Root::BadParameter',
-text => 'Sort mode not valid.',
-value => 'sort mode'
);
}
}
return 1;
}
### END OF SETTINGS TESTS
# copies the specified fasta file in the data directory and cerate a file
# <databasename>.nfo with the description, specified in the optional 2nd
# argument
# changes directory! so please save the oldpath before calling this method
sub _copy_fasta_file_and_create_nfo {
my ( $self, $args ) = @_;
# throw exception if filename looks wrong
$self->is_word( $args->{basefilename}, 'Fasta filename' );
my $newfile = $self->_cat_path_filename( $self->settings->datapath,
$args->{basefilename} );
$args->{filename} = $newfile;
my %dbs = $self->get_databases;
if ( defined $dbs{ $args->{basefilename} } ) {
$self->warn( "Database with that name already exists.\n"
. 'Skipping database generation.' );
$args->{skip} = 1;
return;
}
if ( defined $args->{copy} && $args->{copy} ) {
copy( $args->{file}, $newfile )
or $self->throw(
-class => 'Bio::Root::IOException',
-text => q{Can't copy } . $args->{file} . " to $newfile",
-value => $OS_ERROR,
);
}
else {
my $abs_path = $self->is_path( abs_path( $args->{file} ) );
symlink $abs_path, $newfile || $self->throw(
-class => 'Bio::Root::IOException',
-text => q{Can't symlink } . $abs_path . " to $newfile",
-value => $OS_ERROR,
);
}
if ( defined $args->{description} ) {
open my $NFOFILE, '>', $newfile . '.nfo';
print ${NFOFILE} $args->{description}
or $self->_cannot_print("$newfile.nfo");
close $NFOFILE;
}
return;
}
sub _guess_alphabet_of_file {
my ( $self, $filename ) = @_;
my $in = Bio::SeqIO->new( -file => $filename );
return $in->next_seq->alphabet;
}
sub _bioseq_query {
my ($self) = @_;
my $query_obj = $self->settings->query;
my $query;
if ( !defined $query_obj ) {
$self->throw(
-class => 'Bio::Root::BadParameter',
-text => 'Query not defined.',
);
}
my $db_alphabet
= $self->get_alphabet_of_database( $self->settings->database );
if ( eval { $query_obj->isa('Bio::Seq') } ) {
$query = $query_obj->seq;
}
else {
$query = $query_obj;
if ( $query =~ m{\A \w+ \z}xms ) {
if ( $db_alphabet eq 'dna' ) {
$query =~ tr/uU/tT/;
}
$query_obj = Bio::Seq->new(
-id => '1',
-desc => 'Query',
-seq => $query
);
}
else {
$query_obj = Bio::Seq->new( -id => '1', -desc => 'Query' );
}
}
return ( $query, $query_obj, $db_alphabet );
}
# prepares the query, for example calculating the reverse complement
# if necessary
# returns the prepared query. settings->query is unchanged!
sub _prepare_query {
my $self = shift;
my ( $query, $seq, $db_alphabet ) = $self->_bioseq_query();
if ( $seq->alphabet ne $db_alphabet ) {
$self->throw(
-class => 'Bio::Root::BadParameter',
-text => 'Alphabet of query and database not equal',
-value => 'Seq: ' . $seq->alphabet . ", DB: $db_alphabet"
);
lib/Bio/Grep/Backend/BackendI.pm view on Meta::CPAN
my $lines = 0;
while ( my $line = <$ALFILE> ) {
$lines++;
}
close $ALFILE;
return $lines <= $DNA_ALPHABET_SIZE ? return 'dna' : return 'protein';
}
sub _delete_output {
my ($self) = @_;
return 0 if !defined $self->_output_fn;
return 0 if !-e $self->_output_fn;
unlink $self->_output_fn
or $self->throw(
-class => 'Bio::Root::IOException',
-text => q{Can't remove } . $self->_output_fn,
-value => $OS_ERROR,
);
return 1;
}
sub _execute_command {
my ( $self, $cmd ) = @_;
$self->_delete_output();
my ( $tmp_fh, $tmp_fn )
= tempfile( 'parser_XXXXXXXXXXXX', DIR => $self->settings->tmppath );
$self->_output_fh($tmp_fh);
$self->_output_fn($tmp_fn);
system "$cmd > $tmp_fn";
return !$CHILD_ERROR;
}
sub _execute_command_and_return_output {
my ( $self, $cmd ) = @_;
my ( $writer, $reader, $err );
my $pid = open3( $writer, $reader, $err, $cmd );
waitpid $pid, 0;
my $output = do { local $INPUT_RECORD_SEPARATOR = undef; <$reader> };
# my $error = join q{}, <$err>;
return $output;
}
sub _create_index_and_alphabet_file {
my ( $self, $filename ) = @_;
my $in = Bio::SeqIO->new( -file => $filename, -format => 'Fasta' );
my $alphabet = $in->next_seq()->alphabet();
if ( !defined $self->features->{PROTEINS} && $alphabet eq 'protein' ) {
$self->throw(
-class => 'Bio::Root::BadParameter',
-text => 'Back-end does not support protein data',
-value => $alphabet,
);
}
# create a vmatch alphabet file
open my $ALFILE, '>', "$filename.al1";
if ( $alphabet eq 'dna' ) {
print ${ALFILE} "aA\ncC\ngG\ntTuU\nnsywrkvbdhmNSYWRKVBDHM\n"
or $self->_cannot_print("$filename.al1");
}
else {
print ${ALFILE}
"L\nV\nI\nF\nK\nR\nE\nD\nA\nG\nS\nT\nN\nQ\nY\nW\nP\nH\nM\nC\nXUBZ*\n"
or $self->_cannot_print("$filename.al1");
}
close $ALFILE;
# create database from directory of fasta files
my $idx = Bio::Index::Fasta->new(
-filename => $filename . '.idx',
-write_flag => 1
);
$idx->make_index( ($filename) );
return;
}
sub _create_tmp_query_file {
my ($self) = @_;
my $s = $self->settings;
my $query_file = 0;
my ( $tmp_fh, $tmp_query_file );
if ( defined $s->query_file ) {
$query_file = $self->is_path( $s->query_file );
}
if ( $s->query_isset && $s->query_file_isset ) {
$self->throw(
-class => 'Bio::Root::BadParameter',
-text => 'Query and query_file are set. I am confused...',
-value => $s->query . ' and ' . $s->query_file,
);
}
my @query_seqs = ();
my $query = q{};
if ( !$query_file ) {
$query = $self->_prepare_query();
( $tmp_fh, $tmp_query_file ) = tempfile(
'spatter_XXXXXXXXXXXX',
DIR => $s->tmppath,
UNLINK => !$ENV{BIOGREPDEBUG}
); # don't delete when in debug mode, so
# we can reproduce it.
# construct a temporary fasta file with the query for vmatch
my $seqobj = $self->{_query_obj};
my $outseqio = Bio::SeqIO->new(
-fh => $tmp_fh,
-format => 'fasta'
);
$outseqio->write_seq($seqobj);
push @query_seqs, $seqobj;
if ( $s->direct_and_rev_com
&& !defined $self->features->{NATIVE_D_A_REV_COM} )
{
my $seqobj2 = $seqobj->revcom;
if ( defined $self->features->{REVCOM_DEFAULT} ) {
my ($desc)
= $seqobj->desc
=~ m{\A(.*) \s\(reverse\scomplement\) \z}xms;
$seqobj2->desc($desc);
( run in 0.811 second using v1.01-cache-2.11-cpan-39bf76dae61 )