Bio-RNA-RNAaliSplit
view release on metacpan or search on metacpan
scripts/RNAalisplit.pl view on Meta::CPAN
}
else {
print join "\t", "-",$rnaz->P,$rnaz->z,$alifold->sci,$dim,
$alifold->consensus_struc,$alnfile."\n";
}
# extract split sets and run RNAz/RNAalifold/R-scape on each of them
my $splitnr=1;
while (my $sets = $sd->pop()){
my ($sa1_c,$sa1_s,$sa2_c,$sa2_s); # subalignments in Clustal and Stockholm
my $set1 = $$sets{S1};
my $set2 = $$sets{S2};
my $token = "split".$splitnr;
#print "set1: @$set1\n"; #print "set2: @$set2\n";
($sa1_c,$sa1_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set1", $set1);
($sa2_c,$sa2_s) = $AlignSplitObject->dump_subalignment("splits", $token.".set2", $set2);
if( scalar(@$set1) > 1){evaluate_alignment($sa1_c,$sa1_s,$AlignSplitObject->odir,scalar(@$set1))}
if( scalar(@$set2) > 1){evaluate_alignment($sa2_c,$sa2_s,$AlignSplitObject->odir,scalar(@$set2))}
$splitnr++;
}
}
sub evaluate_alignment {
my ($aln, $stk, $odir, $count) = @_;
my ($hint,$rnazo,$alifoldo,$rscapeo,$cs,$what);
$rnazo = Bio::RNA::RNAaliSplit::WrapRNAz->new(ifile => $aln,
odir => $odir);
($rnazo->P > $rnaz->P) ? ($hint = "?") : ($hint = "-");
if($rnazo->P > 1.2*$rnaz->P){$hint = "x"};
if($rnazo->P > 1.3*$rnaz->P){$hint = "X"};
$alifoldo = Bio::RNA::RNAaliSplit::WrapRNAalifold->new(ifile => $aln,
odir => $odir,
ribosum => $ribosum);
$cs = $alifoldo->consensus_struc;
if ($do_rscape) {
$rscapeo = Bio::RNA::RNAaliSplit::WrapRscape->new(ifile => $alifoldo->alignment_stk, # use RNAalifold-generated stk
odir => $odir,
statistic => $rscape_stat,
nofigures => 1);
if ($rscapeo->status == 0){ # all OK
$what = $rscapeo->TP;
}
elsif ($rscapeo->status == 1){ # no significant basepairs
$what = "0:no_sign";
}
elsif ($rscapeo->status == 2){ # covariation scores are almost constant, no further analysis
$what = "0:no_data";
}
else { $what = "n/a" }
print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$rscapeo->statistic,$what,$cs,$aln), "\n";
}
else {
print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$cs,$aln), "\n";
}
}
sub make_distance_matrix {
my ($ASO,$m,$od) = @_;
my $this_function = (caller(0))[3];
my ($i,$j,$Dfile,$dHn,$dHx,$dBp,$dHB);
my @pw_alns = ();
my @D = ();
my $check = 1;
my $dim = $ASO->next_aln->num_sequences;
# extract all pairwise alignments
print STDERR "Extracting pairwise alignments ...\n";
for ($i=1;$i<$dim;$i++){
for($j=$i+1;$j<=$dim;$j++){
my $token = join "_", "pw",$i,$j;
my ($sa_clustal,$sa_stockholm) = $ASO->dump_subalignment("pairwise", $token, [$i,$j]);
push @pw_alns, $sa_clustal->stringify;
}
}
# initialize distance matrix
for($i=0;$i<$dim;$i++){
for ($j=0;$j<$dim;$j++){
$D[$dim*$i+$j] = 0.;
}
}
# build distance matrix based on pairwise alignments
print STDERR "Constructing distance matrix based on pairwise alignments ...\n";
foreach my $ali (@pw_alns){
my $pw_aso = Bio::RNA::RNAaliSplit->new(ifile => $ali,
format => "ClustalW",
odir => $od);
my ($i,$j) = sort split /_/, $pw_aso->ifilebn;
$dHn = $pw_aso->hammingdistN;
$dHx = $pw_aso->hammingdistX;
my $id1 = $pw_aso->next_aln->get_seq_by_pos(1)->display_id;
my $id2 = $pw_aso->next_aln->get_seq_by_pos(2)->display_id;
if ($m eq "SCI"){
# distance = -log( [normalized] pairwise SCI)
my ($sci,$dsci);
if($pw_aso->sci > 1){$sci = 1}
elsif ($pw_aso->sci == 0.){$sci = 0.000001}
else { $sci = $pw_aso->sci; }
$dsci = -1*log($sci)/log(10);
$D[$dim*($i-1)+($j-1)] = $D[$dim*($j-1)+($i-1)] = $dsci;
# print "$i -> $j : $sci\t$dsci\n";
}
elsif ($m eq "dHn") { # hamming dist with gaps replaced by Ns
$D[$dim*($i-1)+($j-1)] = $D[$dim*($j-1)+($i-1)] = $dHn;
}
elsif ($m eq "dHx") { # hamming dist with all gap columns removed
$D[$dim*($i-1)+($j-1)] = $D[$dim*($j-1)+($i-1)] = $dHx;
}
elsif ($m eq "dBp") { # base pair distance
carp "ERROR [$this_function] sequence \'display_id\' not found in input alignment"
unless (exists $$fi{$id1} && exists $$fi{$id2});
my $gss1 = $$fi{$id1}->{gss};
my $gss2 = $$fi{$id2}->{gss};
my $dBp = RNA::bp_distance($gss1,$gss2);
$D[$dim*($i-1)+($j-1)] = $D[$dim*($j-1)+($i-1)] = $dBp;
}
( run in 0.829 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )