Bio-RNA-RNAaliSplit
view release on metacpan or search on metacpan
scripts/RNAalisplit.pl view on Meta::CPAN
"TA" => 6);
Getopt::Long::config('no_ignore_case');
pod2usage(-verbose => 1) unless GetOptions("aln|a=s" => \$alifile,
"constraint|c=s" => \$constraint,
"method|m=s" => \$method,
"noribosum" => sub{$ribosum=0},
"norscape" => sub{$do_rscape=0},
"out|o=s" => \$outdir,
"rscapestat=s" => \$rscape_stat,
"scaleH" => \$scaleH,
"scaleB" => \$scaleB,
"verbose|v" => sub{$verbose = 1},
"version" => sub{$show_version = 1},
"man" => sub{pod2usage(-verbose => 2)},
"help|h" => sub{pod2usage(1)}
);
if ($show_version == 1){
print "RNAalisplit $VERSION\n";
exit(0);
}
unless (-f $alifile){
warn "Could not find input file provided via --aln|-a option";
pod2usage(-verbose => 0);
}
croak "ERROR: method dBc must be selected when using constraints, exiting ..."
if (defined $constraint && $method ne "dBc");
if($method eq "dBp" || $method eq "dHB"){
$fi = fold_input_alignment($alifile); # for later computation of BP dist
}
if($method eq "dBc"){
$fi = fold_input_alignment_constrained($alifile,$constraint);
}
my $round = 1;
my $done = 0;
while ($done != 1){
my $lround = sprintf("%03d", $round);
my $current_round_name = "round_".$lround;
my $odirname = dir($outdir,$current_round_name);
print STDERR "Computing round $lround ...\n";
alisplit($alifile,$odirname);
$round++;
$done = 1;
#TODO sort output by RNAz SVM prob and re-run with the first few as input alignments
}
###############
# subroutines #
###############
sub alisplit {
my ($alnfile,$odirn) = @_;
my ($what,$alifold,$rscape);
my $format = Bio::AlignIO->_guess_format($alnfile);
print STDERR "Guess input format $format\n";
my $AlignSplitObject = Bio::RNA::RNAaliSplit->new(ifile => $alnfile,
format => $format,
odir => $odirn);
#print Dumper($AlignSplitObject);
my $dim = $AlignSplitObject->next_aln->num_sequences;
my $stkfile = $AlignSplitObject->alignment_stk;
my $dmfile = make_distance_matrix($AlignSplitObject,$method,$odirn);
# compute Neighbor Joining tree and do split decomposition
print STDERR "Perform Split Decomposition ...\n";
my $sd = Bio::RNA::RNAaliSplit::WrapAnalyseDists->new(ifile => $dmfile,
odir => $AlignSplitObject->odir);
print STDERR "Identified ".$sd->count." splits\n";
if ($do_rscape){
print join "\t", ("#hint","RNAz prob","z-score","SCI","seqs","statistic","SSCBP","consensus structure","alignment"), "\n";
}
else {
print join "\t", ("#hint","RNAz prob","z-score","SCI","seqs","consensus structure","alignment"), "\n";
}
# run RNAalifold for the input alignment
$alifold = Bio::RNA::RNAaliSplit::WrapRNAalifold->new(ifile => $AlignSplitObject->alignment_aln,
odir => $AlignSplitObject->odir,
format => 'C',
ribosum => $ribosum);
# run RNAz for the input alignment
$rnaz = Bio::RNA::RNAaliSplit::WrapRNAz->new(ifile => $AlignSplitObject->alignment_aln,
odir => $AlignSplitObject->odir);
# run R-scape for the input alignment
if ($do_rscape){
$rscape = Bio::RNA::RNAaliSplit::WrapRscape->new(ifile => $alifold->alignment_stk, # use RNAalifold-generated stk
odir => $AlignSplitObject->odir,
statistic => $rscape_stat,
nofigures => 1);
$rscape->status == 0 ? $what = $rscape->TP : $what = "n/a";
print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,$rscape->statistic,$what,
$alifold->consensus_struc,$alnfile."\n";
}
else {
print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,
$alifold->consensus_struc,$alnfile."\n";
}
# extract split sets and run RNAz/RNAalifold/R-scape on each of them
my $splitnr=1;
while (my $sets = $sd->pop()){
my ($sa1_c,$sa1_s,$sa2_c,$sa2_s); # subalignments in Clustal and Stockholm
my $set1 = $$sets{S1};
my $set2 = $$sets{S2};
my $token = "split".$splitnr;
#print "set1: @$set1\n"; #print "set2: @$set2\n";
($sa1_c,$sa1_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set1", $set1);
($sa2_c,$sa2_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set2", $set2);
if( scalar(@$set1) > 1){evaluate_alignment($sa1_c,$sa1_s,$AlignSplitObject->odir,scalar(@$set1))}
if( scalar(@$set2) > 1){evaluate_alignment($sa2_c,$sa2_s,$AlignSplitObject->odir,scalar(@$set2))}
$splitnr++;
( run in 0.525 second using v1.01-cache-2.11-cpan-39bf76dae61 )