Bio-RNA-RNAaliSplit

 view release on metacpan or  search on metacpan

scripts/RNAalisplit.pl  view on Meta::CPAN

  }
  else {
    print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,
      $alifold->consensus_struc,$alnfile."\n";
  }

  # extract split sets and run RNAz/RNAalifold/R-scape on each of them
  my $splitnr=1;
  while (my $sets = $sd->pop()){
    my ($sa1_c,$sa1_s,$sa2_c,$sa2_s); # subalignments in Clustal and Stockholm
    my $set1 = $$sets{S1};
    my $set2 = $$sets{S2};
    my $token = "split".$splitnr;
    #print "set1: @$set1\n"; #print "set2: @$set2\n";
    ($sa1_c,$sa1_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set1", $set1);
    ($sa2_c,$sa2_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set2", $set2);
    if( scalar(@$set1) > 1){evaluate_alignment($sa1_c,$sa1_s,$AlignSplitObject->odir,scalar(@$set1))}
    if( scalar(@$set2) > 1){evaluate_alignment($sa2_c,$sa2_s,$AlignSplitObject->odir,scalar(@$set2))}
    $splitnr++;
  }
}

sub evaluate_alignment {
  my ($aln, $stk, $odir, $count) = @_;
  my ($hint,$rnazo,$alifoldo,$rscapeo,$cs,$what);

  $rnazo =  Bio::RNA::RNAaliSplit::WrapRNAz->new(ifile => $aln,
						 odir => $odir);
  ($rnazo->P > $rnaz->P) ? ($hint = "?") : ($hint = "-");
  if($rnazo->P > 1.2*$rnaz->P){$hint = "x"};
  if($rnazo->P > 1.3*$rnaz->P){$hint = "X"};
  $alifoldo = Bio::RNA::RNAaliSplit::WrapRNAalifold->new(ifile => $aln,
							 odir => $odir,
							 ribosum => $ribosum);
  $cs = $alifoldo->consensus_struc;
  if ($do_rscape) {
    $rscapeo =  Bio::RNA::RNAaliSplit::WrapRscape->new(ifile => $alifoldo->alignment_stk, # use RNAalifold-generated stk
						       odir => $odir,
						       statistic => $rscape_stat,
						       nofigures => 1);
    if ($rscapeo->status == 0){ # all OK
      $what = $rscapeo->TP;
    }
    elsif ($rscapeo->status == 1){  # no significant basepairs
      $what = "0:no_sign";
    }
    elsif ($rscapeo->status == 2){  # covariation scores are almost constant, no further analysis
      $what = "0:no_data";
    }
    else { $what = "n/a" }

    print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$rscapeo->statistic,$what,$cs,$aln), "\n";
  }
  else {
    print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$cs,$aln), "\n";
  }
}

sub make_distance_matrix {
  my ($ASO,$m,$od) = @_;
  my $this_function = (caller(0))[3];
  my ($i,$j,$Dfile,$dHn,$dHx,$dBp,$dHB);
  my @pw_alns = ();
  my @D = ();
  my $check = 1;
  my $dim = $ASO->next_aln->num_sequences;

  # extract all pairwise alignments
  print STDERR "Extracting pairwise alignments ...\n";
  for ($i=1;$i<$dim;$i++){
    for($j=$i+1;$j<=$dim;$j++){
      my $token = join "_", "pw",$i,$j;
      my ($sa_clustal,$sa_stockholm) = $ASO->dump_subalignment("pairwise", $token, [$i,$j]);
      push @pw_alns, $sa_clustal->stringify;
    }
  }

  # initialize distance matrix
  for($i=0;$i<$dim;$i++){
    for ($j=0;$j<$dim;$j++){
      $D[$dim*$i+$j] = 0.;
    }
  }

  # build distance matrix based on pairwise alignments
  print STDERR "Constructing distance matrix based on pairwise alignments ...\n";
  foreach my $ali (@pw_alns){
    my $pw_aso = Bio::RNA::RNAaliSplit->new(ifile => $ali,
					    format => "ClustalW",
					    odir => $od);
    my ($i,$j) = sort split /_/, $pw_aso->ifilebn;

    $dHn = $pw_aso->hammingdistN;
    $dHx = $pw_aso->hammingdistX;
    my $id1 = $pw_aso->next_aln->get_seq_by_pos(1)->display_id;
    my $id2 = $pw_aso->next_aln->get_seq_by_pos(2)->display_id;

    if ($m eq "SCI"){
      # distance = -log( [normalized] pairwise SCI)
      my ($sci,$dsci);
      if($pw_aso->sci > 1){$sci = 1}
      elsif ($pw_aso->sci == 0.){$sci = 0.000001}
      else { $sci = $pw_aso->sci; }
      $dsci = -1*log($sci)/log(10);
      $D[$dim*($i-1)+($j-1)] =  $D[$dim*($j-1)+($i-1)] = $dsci;
      #  print "$i -> $j : $sci\t$dsci\n";
    }
    elsif ($m eq "dHn") { # hamming dist with gaps replaced by Ns
      $D[$dim*($i-1)+($j-1)] =  $D[$dim*($j-1)+($i-1)] = $dHn;
    }
    elsif ($m eq "dHx") { # hamming dist with all gap columns removed
      $D[$dim*($i-1)+($j-1)] =  $D[$dim*($j-1)+($i-1)] = $dHx;
    }
    elsif ($m eq "dBp") { # base pair distance
      carp "ERROR [$this_function] sequence \'display_id\' not found in input alignment"
	unless (exists $$fi{$id1} && exists $$fi{$id2});
      my $gss1 = $$fi{$id1}->{gss};
      my $gss2 = $$fi{$id2}->{gss};
      my $dBp = RNA::bp_distance($gss1,$gss2);
      $D[$dim*($i-1)+($j-1)] =  $D[$dim*($j-1)+($i-1)] = $dBp;
    }



( run in 0.829 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )