Bio-RNA-RNAaliSplit

 view release on metacpan or  search on metacpan

scripts/RNAalisplit.pl  view on Meta::CPAN

	    "TA" => 6);

Getopt::Long::config('no_ignore_case');
pod2usage(-verbose => 1) unless GetOptions("aln|a=s"      => \$alifile,
					   "constraint|c=s" => \$constraint,
					   "method|m=s"   => \$method,
					   "noribosum"    => sub{$ribosum=0},
					   "norscape"     => sub{$do_rscape=0},
					   "out|o=s"      => \$outdir,
					   "rscapestat=s" => \$rscape_stat,
					   "scaleH"       => \$scaleH,
					   "scaleB"       => \$scaleB,
					   "verbose|v"    => sub{$verbose = 1},
					   "version"      => sub{$show_version = 1},
					   "man"          => sub{pod2usage(-verbose => 2)},
					   "help|h"       => sub{pod2usage(1)}
					   );

if ($show_version == 1){
  print "RNAalisplit $VERSION\n";
  exit(0);
}

unless (-f $alifile){
  warn "Could not find input file provided via --aln|-a option";
  pod2usage(-verbose => 0);
}

croak "ERROR: method dBc must be selected when using constraints, exiting ..."
  if (defined $constraint && $method ne "dBc");

if($method eq "dBp" || $method eq "dHB"){
  $fi = fold_input_alignment($alifile); # for later computation of BP dist
}
if($method eq "dBc"){
  $fi = fold_input_alignment_constrained($alifile,$constraint);
}


my $round = 1;
my $done = 0;
while ($done != 1){
  my $lround = sprintf("%03d", $round);
  my $current_round_name = "round_".$lround;
  my $odirname = dir($outdir,$current_round_name);
  print STDERR "Computing round $lround ...\n";
  alisplit($alifile,$odirname);
  $round++;
  $done = 1;
  #TODO sort output by RNAz SVM prob and re-run with the first few as input alignments
}


###############
# subroutines #
###############

sub alisplit {
  my ($alnfile,$odirn) = @_;
  my ($what,$alifold,$rscape);
  my $format = Bio::AlignIO->_guess_format($alnfile);
  print STDERR "Guess input format $format\n";
  my $AlignSplitObject = Bio::RNA::RNAaliSplit->new(ifile => $alnfile,
						    format => $format,
						    odir => $odirn);
  #print Dumper($AlignSplitObject);
  my $dim = $AlignSplitObject->next_aln->num_sequences;
  my $stkfile = $AlignSplitObject->alignment_stk;
  my $dmfile = make_distance_matrix($AlignSplitObject,$method,$odirn);

  # compute Neighbor Joining tree and do split decomposition
  print STDERR "Perform Split Decomposition ...\n";
  my $sd = Bio::RNA::RNAaliSplit::WrapAnalyseDists->new(ifile => $dmfile,
							odir => $AlignSplitObject->odir);
  print STDERR "Identified ".$sd->count." splits\n";
  if ($do_rscape){
    print join "\t", ("#hint","RNAz prob","z-score","SCI","seqs","statistic","SSCBP","consensus structure","alignment"), "\n";
  }
  else {
    print join "\t", ("#hint","RNAz prob","z-score","SCI","seqs","consensus structure","alignment"), "\n";
  }

  # run RNAalifold for the input alignment
  $alifold = Bio::RNA::RNAaliSplit::WrapRNAalifold->new(ifile => $AlignSplitObject->alignment_aln,
							odir => $AlignSplitObject->odir,
							format => 'C',
							ribosum => $ribosum);

  # run RNAz for the input alignment
  $rnaz = Bio::RNA::RNAaliSplit::WrapRNAz->new(ifile => $AlignSplitObject->alignment_aln,
					       odir => $AlignSplitObject->odir);

  # run R-scape for the input alignment
  if ($do_rscape){
    $rscape = Bio::RNA::RNAaliSplit::WrapRscape->new(ifile => $alifold->alignment_stk, # use RNAalifold-generated stk
						     odir => $AlignSplitObject->odir,
						     statistic => $rscape_stat,
						     nofigures => 1);
    $rscape->status == 0 ? $what = $rscape->TP : $what = "n/a";

    print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,$rscape->statistic,$what,
      $alifold->consensus_struc,$alnfile."\n";
  }
  else {
    print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,
      $alifold->consensus_struc,$alnfile."\n";
  }

  # extract split sets and run RNAz/RNAalifold/R-scape on each of them
  my $splitnr=1;
  while (my $sets = $sd->pop()){
    my ($sa1_c,$sa1_s,$sa2_c,$sa2_s); # subalignments in Clustal and Stockholm
    my $set1 = $$sets{S1};
    my $set2 = $$sets{S2};
    my $token = "split".$splitnr;
    #print "set1: @$set1\n"; #print "set2: @$set2\n";
    ($sa1_c,$sa1_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set1", $set1);
    ($sa2_c,$sa2_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set2", $set2);
    if( scalar(@$set1) > 1){evaluate_alignment($sa1_c,$sa1_s,$AlignSplitObject->odir,scalar(@$set1))}
    if( scalar(@$set2) > 1){evaluate_alignment($sa2_c,$sa2_s,$AlignSplitObject->odir,scalar(@$set2))}
    $splitnr++;



( run in 0.525 second using v1.01-cache-2.11-cpan-39bf76dae61 )