BioPerl
view release on metacpan or search on metacpan
examples/tools/standaloneblast.pl view on Meta::CPAN
# vary_masks(): Example demonstrating varying of parameter
#
# args:
# $queryseq - query sequence (can be filename (fasta), or Bio:Seq object)
# $maskvalues - reference to array of values to be used for the mask threshold
# returns: nothing
# Now we'll perform several blasts, 1 for each value of the mask threshold.
# In the default case, we use thresholds of 25%, 50% and 75%. (Recall the threshold is
# % of resudues which must match the consensus residue before deciding to use the
# position specific scoring matrix rather than the default - BLOSUM or PAM - matrix)
# We then automatically parse the resulting reports to identify which hits
# are found with any of the mask threshold values and which with only one of them.
#
sub vary_masks {
my $queryseq = shift;
my $values = shift;
print "Beginning mask-varying example... \n";
my ($report, $sbjct, $maskvalue);
my $hashhits = { }; # key is hit id, value is string of param values for which hit was found
# Get the alignment file
my $str = Bio::AlignIO->new(-file=> "$queryaln", '-format' => "$queryalnformat", );
my $aln = $str->next_aln();
foreach $maskvalue (@$values) {
print "Performing BLAST with mask threshold = $maskvalue % \n";
# Create the proper mask for 'jumpstarting'
my $mask = &create_mask($aln, $maskvalue);
my $report2 = $factory->blastpgp($queryseq, $aln, $mask);
my $r = $report2->next_result;
while($sbjct = $r->next_hit) {
$hashhits->{$sbjct->name} .= "$maskvalue";
}
}
&compare_runs( 'mask threshold' , $values , $hashhits);
return 1;
}
#################################################
# aligned_blast ():
#
#
# args:
# $queryseq - query sequence (can be filename (fasta), or Bio:Seq object)
# $queryaln - file containing alignment to be used to "jumpstart" psiblast in "-B mode"
# $queryaln *must contain $queryseq with the same name and length
# (psiblast is very picky)
# $queryalnformat - format of alignment (can = "fasta", "msf", etc)
# $jiter - number of iterations in psiblast run
# $maskvalue - threshold indicating how similar residues must be at a sequence location
# before position-specific-scoring matrix is used
# : "0" => use position specific matrix at all residues, or
# "100" => use default (eg BLOSUM) at all residues
# returns: nothing
# For this example, we'll compare the results of psiblast depending on whether psiblast itself is
# used to identify an alignment to use for blasting or whether an external alignment is given to
# psiblast
sub aligned_blast {
my $queryseq = shift;
my $queryaln = shift;
my $queryalnformat = shift;
my $jiter = shift;
my $maskvalue = shift;
my $hashhits = { };
my ($sbjct, $id);
print "\nBeginning aligned blast example... \n";
# First we do a single-iteration psiblast search but with a specified alignment to
# "jump start" psiblast
print "\nBeginning jump-start psiblast ... \n";
my $tag1 = 'jumpstart';
# $factory->j('1'); # perform single iteration
# Get the alignment file
my $str = Bio::AlignIO->new(-file=> "$queryaln", '-format' => "$queryalnformat", );
my $aln = $str->next_aln();
# Create the proper mask for 'jumpstarting'
my $mask = &create_mask($aln, $maskvalue);
my $report2 = $factory->blastpgp($queryseq, $aln, $mask);
while($sbjct = $report2->next_result) {
$hashhits->{$sbjct->name} .= "$tag1";
}
# Then we do a "plain" psiblast multiple-iteration search
print "\nBeginning multiple-iteration psiblast ... \n";
my $undefineB ;
$factory->B($undefineB);
my $tag2 = 'iterated';
$factory->j($jiter); # 'j' is blast parameter for # of iterations
my $report1 = $factory->blastpgp($queryseq);
my $total_iterations = $report1->number_of_iterations;
my $last_iteration = $report1->round($total_iterations);
while($sbjct = $last_iteration->next_result) {
$hashhits->{$sbjct->name} .= "$tag2";
}
# Now we compare the results of the searches
my $tagtype = 'iterated_or_jumpstart';
my $values = [ $tag1, $tag2];
&compare_runs( $tagtype , $values , $hashhits);
return 1;
}
#################################################
# create_mask(): creates a mask for the psiblast jumpstart alignment
# that determines at what residues position-specific
# scoring matrices (PSSMs) are used and at what
# residues default scoring matrices (eg BLOSUM) are
# used. See psiblast documentation for more details,
# args:
# $aln - SimpleAlign object with alignment
# $maskvalue - label describing type of "tags"
# returns: actual mask, ie a string of 0's and 1's which is the
# same length as each sequence in the alignment and has
# a "1" at locations where (PSSMs) are to be used
# and a "0" at all other locations.
sub create_mask {
my $aln = shift;
my $maskvalue = shift;
my $mask = "";
die "psiblast jumpstart requires all sequences to be same length \n"
unless $aln->is_flush();
my $len = $aln->length();
if ($maskvalue =~ /^(\d){1,3}$/ ) {
$mask = $aln->consensus_string($maskvalue) ;
$mask =~ s/[^\?]/1/g ;
$mask =~ s/\?/0/g ;
}
else { die "maskvalue must be an integer between 0 and 100 \n"; }
return $mask ;
}
#----------------
sub example_usage {
#----------------
#-----------------------
# Prints usage information for general parameters.
print STDERR <<"QQ_PARAMS_QQ";
Command-line accessible script variables and commands:
-------------------------------
-h : Display this usage info and exit.
-in <str> : File containing input sequences in fasta format (default = $queryseq) .
-inaln <str> : File containing input alignment for example 3 (default = $queryaln) .
-alnfmt <str> : Format of input alignment for example 3, eg "msf", "fasta", "pfam".
(default = $queryalnformat) .
-do <int> : Number of test to be executed ("1" => vary parameters,
"3" => compare iterated & jumpstart psiblast.) If omitted,
three default tests performed.
-exec <str> : Blast executable to be used in example 1. Can be "blastall" or
"blastpgp" (default is "blastpgp").
-param <str> : Parameter to be varied in example 1. Any blast parameter
can be varied (default = 'MATRIX')
-paramvals <str>: String containing parameter values in example 1, separated
by ":"'s. (default = 'BLOSUM62:BLOSUM80:PAM70')
-iter <int> : Maximum number of iterations in psiblast in example 3 (default = 2)
-maskvals <str>: String containing mask threshold values (in per-cents) for example 3,
separated by ":"'s. (default = '50:75:25')
In addition, any valid Blast parameter can be set using the syntax "parameter=>value" as in "database=>swissprot"
So some typical command lines might be:
>standaloneblast.pl -do 1 -param expectation -paramvals '1e-10:1e-5'
or
>standaloneblast.pl -do 1 -exec blastall -param q -paramvals '-1:-7' -in='t/dna1.fa' "pr=>blastn" "d=>ecoli.nt"
or
>standaloneblast.pl -do 4 -maskvals 0 -iter 3
or
>standaloneblast.pl -do 3 -maskvals '10:50:90' -in 't/data/cysprot1.fa' -alnfmt msf -inaln 't/cysprot.msf'
QQ_PARAMS_QQ
}
( run in 1.213 second using v1.01-cache-2.11-cpan-71847e10f99 )