Bio-RNA-RNAaliSplit

 view release on metacpan or  search on metacpan

lib/Bio/RNA/RNAaliSplit.pm  view on Meta::CPAN

		       default => '-1',
		       predicate => 'has_hammingX',
		       init_arg => undef,
		      );


with 'FileDirUtil';

sub BUILD {
    my $self = shift;
    my $this_function = (caller(0))[3];
    confess "ERROR [$this_function] \$self->ifile not available"
      unless ($self->has_ifile);
    $self->alignment({-file => $self->ifile,
		      -format => $self->format,
		      -displayname_flat => 1} );
    $self->next_aln($self->alignment->next_aln);
    $self->next_aln->set_displayname_safe();
    $self->_get_alen();
    $self->_get_nrseq();
    unless($self->has_odir){

lib/Bio/RNA/RNAaliSplit.pm  view on Meta::CPAN

    $stkio->write_aln($stk2);
    $self->alignment_aln($ialnfile);
    $self->alignment_stk($istkfile);
    # end dump ifile

    if ($self->next_aln->num_sequences == 2){ $self->_hamming() }
  }

sub dump_subalignment {
  my ($self,$alipathsegment,$token,$what) = @_;
  my $this_function = (caller(0))[3];
  my ($aln,$aln2,$name);

  croak "ERROR [$this_function] argument 'token' not provided"
    unless (defined($token));

  # create output path
  my $ids = join "_", @$what;
  unless (defined($alipathsegment)){$alipathsegment = "tmp"}
  my $oodir = $self->odir->subdir($alipathsegment);
  mkdir($oodir);

lib/Bio/RNA/RNAaliSplit.pm  view on Meta::CPAN

  foreach my $seq ($aln->each_seq) {
    print $seqfile $seq->seq,"\n";
  }
  close($seqfile);

  return ( $oalifile_clustal,$oalifile_stockholm );
}

sub _hamming {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my $hamming = -1;
  croak "ERROR [$this_function] cannot compute Hamming distance for $self->next_aln->num_sequences sequences"
    if ($self->next_aln->num_sequences != 2);

  my $aln =  $self->next_aln->select_noncont((1,2));

  # compute Hamming distance of the aligned sequences, replacing gaps with Ns
  my $alnN = dclone($aln);
  croak("ERROR [$this_function] cannot replace gaps with Ns")
    unless ($alnN->map_chars('-','N') == 1);

lib/Bio/RNA/RNAaliSplit/AliFeature.pm  view on Meta::CPAN

		   isa => 'HashRef',
		   predicate => 'hash_csp_hash',
		   init_arg => undef,
		   writer => '_csp_hash_writer', # private writer 4 ro attribute
		  );

with 'FileDirUtil';

sub BUILD {
  my $self = shift;
   my $this_function = (caller(0))[3];
  confess "ERROR [$this_function] \$self->ifile not available"
    unless ($self->has_ifile);
  $self->alignment({-file => $self->ifile,
		    -format => $self->format,
		    -displayname_flat => 1} ); # discard position in sequence IDs
  $self->next_aln($self->alignment->next_aln);
  $self->next_aln->set_displayname_safe();
  $self->_get_alen();
  $self->_get_nrseq();
  $self->set_ifilebn;

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN

has 'dim' => (
	      is => 'rw',
	      isa => 'Num',
	      predicate => 'has_dim',
	     );

with 'FileDirUtil';

sub BUILD {
  my $self = shift;
  my $this_function = (caller(0))[3];
  confess "ERROR [$this_function] \$self->ifile not available"
    unless ($self->has_ifile);
   $analysedists = can_run('AnalyseDists') or
    croak "ERROR [$this_function] AnalyseDists not found";
  unless($self->has_odir){
    my $odir_name = "as";
    $self->odir( [$self->ifile->dir,$odir_name] );
  }
  $oodir = $self->odir->subdir("analysedists");
  my @created = make_path($oodir, {error => \my $err});

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN


  # do computation
  $self->NeighborJoining();
  $self->SplitDecomposition();
  $self->nr_splits($self->count);
}

sub NeighborJoining {
  # TODO  warn if negative branch lengths occur
  my $self = shift;
  my $this_function = (caller(0))[3];
  my ($nj_outfilename,$nj_treefilename,$nj_out,$nj_tree);

  if ($self->has_basename){
    $nj_outfilename = $self->basename.".nj.out";
    $nj_treefilename = $self->basename.".nj.ps";
  }
  else{
    $nj_outfilename = "nj.out";
    $nj_treefilename = "nj.ps";
  }

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN

  foreach my $line( @out){print $fh $line,"\n"}
  close($fh);
  rename "nj.ps", $nj_tree;
  $self->_parse_nj($stdout_buffer);
}

# parse the output of AnalyseDists -Xn
# populate array of hashes, each holding two sets of nodes corresponding to splits
sub _parse_nj {
  my ($self,$nj) = @_;
  my $this_function = (caller(0))[3];
  my %data = ();
  my $num;
  my $count = 1;
  my @lines =  split /\n/,$nj;
  foreach my $line (@lines){
    my @s1 = ();
    my @set1 = ();
    my @set2 = ();
    next if ($line =~ m/^>\s+\D/);
    if ($line =~ m/^>\s+(\d+)/){$num = $1;next}

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN

      #print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
    }
#    print Dumper(\@set1);
#    print Dumper(\@set2);
#    print "+++++++++++++++++++++++++++++++++++\n";
  }
}

sub SplitDecomposition {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my ($sd_outfilename,$sd_out);
  if ($self->has_basename){$sd_outfilename = $self->basename.".sd.out"}
  else{$sd_outfilename = "sd.out"}
  $sd_out = file($oodir,$sd_outfilename);
  open my $fh, ">", $sd_out;

  my $sd_cmd = $analysedists." -Xs < ".$self->ifile;
  my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
    run( command => $sd_cmd, verbose => 0 );
  if( !$success ) {

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN

  my @out = split /\n/, $stdout_buffer;
  foreach my $line( @out){print $fh $line,"\n"}
  close($fh);
  $self->_parse_sd($stdout_buffer); # parse split graph data
}

# parse the output of AnalyseDists -Xs
# populate array of hashes, each holding two sets of nodes corresponding to splits
sub _parse_sd {
  my ($self,$sd) = @_;
  my $this_function = (caller(0))[3];
  my $num;
  my @lines =  split /\n/,$sd;
  foreach my $line (@lines){
    next if ($line =~ m/^>\s+\D/);
    if ($line =~ m/^>\s+(\d+)/){$num = $1;next}
    last if ($line =~ m/^\s*\d+\.\d+\s+\:\s+\{\s+\[Split prime fraction\]\s+\}/g );
 #   print "$line\n";
    croak "ERROR [$this_function] Cannot parse split graph line\n$line\n"
      unless ($line =~ m/^\s*\d+\s+\d+\.\d+\s+:\s+\{\s+([\d+\s+]+)\|/g);
    my @foo = split /\s+/, $1; # set 1

lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm  view on Meta::CPAN

      $self->add( {S1=>\@set1,S2=>\@set2,ori=>"SD",type=>$type} );
    }
    else{
    #  print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
    }
  }
}

sub _get_dim {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my $dim = -1 ;
  open my $fh, "<", $self->ifile or die $!;
  while(<$fh>){
    if (m/^>\s+X\s+(\d+)/){$dim = $1;last;}
  }
  croak "ERROR [$this_function] could not parse dimension from input matrix"
    if ($dim == -1);
  close($fh);
  return $dim;
}

lib/Bio/RNA/RNAaliSplit/WrapRNAalifold.pm  view on Meta::CPAN

			is => 'rw',
			isa => 'Path::Class::File',
			predicate => 'has_stk',
			init_arg => undef,
		       );

with 'FileDirUtil';

sub BUILD {
  my $self = shift;
  my $this_function = (caller(0))[3];
  confess "ERROR [$this_function] \$self->ifile not available"
    unless ($self->has_ifile);
  $rnaalifold = can_run('RNAalifold') or
    croak "ERROR [$this_function] RNAalifold not found";
  unless($self->has_odir){
    my $odir_name = "as";
    $self->odir( [$self->ifile->dir,$odir_name] );
  }
  $oodir = $self->odir->subdir("alifold");
  my @created = make_path($oodir, {error => \my $err});
  confess "ERROR [$this_function] could not create output directory $self->oodir"
    if (@$err);
  $self->set_ifilebn;
  $self->run_rnaalifold();
}

sub run_rnaalifold {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my ($out_fn,$out,$alnps_fn,$alnps,$alirnaps_fn,$stk_fn);
  my ($alirnaps,$alidotps_fn,$alidotps,$alifoldstk);
  my $tag = "";
  if ($self->has_ribosum){$tag = ".risobum"}
  if ($self->has_basename){
    $out_fn = $self->bn.$tag."."."alifold.out";
    $alnps_fn = $self->bn.$tag."."."aln.ps";
    $alirnaps_fn = $self->bn.$tag."."."alirna.ps";
    $alidotps_fn = $self->bn.$tag."."."alidot.ps";
    $stk_fn = $self->bn.$tag."."."alifold.stk";

lib/Bio/RNA/RNAaliSplit/WrapRNAalifold.pm  view on Meta::CPAN

  rename "aln.ps", $alnps;
  rename "alirna.ps", $alirnaps;
  rename "alidot.ps", $alidotps;
  rename "RNAalifold_results.stk", $alifoldstk;
  unlink "alifold.out";
}

# parse RNAalifold output
sub _parse_rnaalifold {
  my ($self,$out) = @_;
  my $this_function = (caller(0))[3];
  my @buffer=split(/^/, $out);

  foreach my $i (0..$#buffer){
    next unless ($i == 1); # just parse consensus structure
    unless ($buffer[$i] =~ m/([\(\)\.]+)\s+\(\s*(-?\d+\.\d+)\s+=\s+(-?\d+\.\d+)\s+\+\s+(-?\d+\.\d+)\)\s+\[sci\s+=\s+(-?\d+\.\d+)\]/){
      carp "ERROR [$this_function]  cannot parse RNAalifold output:";
      croak $self->ifile.":\n$buffer[$i]";
    }
    $self->consensus_struc($1);
    $self->consensus_mfe($2);

lib/Bio/RNA/RNAaliSplit/WrapRNAz.pm  view on Meta::CPAN

	      is => 'rw',
	      isa => 'Num',
	      init_arg => undef,
	      documentation => q(Structure conservation index),
	     );

with 'FileDirUtil';

sub BUILD {
  my $self = shift;
  my $this_function = (caller(0))[3];
  confess "ERROR [$this_function] \$self->ifile not available"
    unless ($self->has_ifile);
   $rnaz = can_run('RNAz') or
     croak "ERROR [$this_function] RNAz not found";
  unless($self->has_odir){
    my $odir_name = "as";
    $self->odir( [$self->ifile->dir,$odir_name] );
  }
  $oodir = $self->odir->subdir("rnaz");
  my @created = make_path($oodir, {error => \my $err});
  confess "ERROR [$this_function] could not create output directory $self->oodir"
    if (@$err);
  $self->set_ifilebn;
  $self->run_rnaz();
}

sub run_rnaz {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my ($rnaz_outfilename,$rnaz_out);
  if ($self->has_basename){$rnaz_outfilename = $self->basename.".rnaz.out"}
  elsif ($self->has_ifilebn){$rnaz_outfilename = $self->ifilebn.".rnaz.out"}
  else{$rnaz_outfilename = "rnaz.out"}
  $rnaz_out = file($oodir,$rnaz_outfilename);
  open my $fh, ">", $rnaz_out;
  my $rnaz_cmd = $rnaz." -l -d < ".$self->ifile;
  my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
    run( command => $rnaz_cmd, verbose => 0 );
  if( !$success ) {

lib/Bio/RNA/RNAaliSplit/WrapRNAz.pm  view on Meta::CPAN

  my $stdout_buffer = join "", @$stdout_buf;
  my @out = split /\n/, $stdout_buffer;
  foreach my $line( @out){print $fh $line,"\n"}
  close($fh);
  $self->_parse_rnaz($stdout_buffer);
}

# parse RNAz output
sub _parse_rnaz {
  my ($self,$rnaz) = @_;
  my $this_function = (caller(0))[3];
  my @rnaz=split(/^/, $rnaz);
  my ($N,$identity,$columns,$decValue,$P,$z,$sci,$energy,$strand,
      $covariance,$combPerPair,$meanMFE,$consensusMFE,$consensusSeq,
      $consensusFold, $GCcontent, $ShannonEntropy);
  my @aln=();

  foreach my $i (0..$#rnaz){
    my $line=$rnaz[$i];
    $identity=$1 if ($line=~/Mean pairwise identity:\s*(-?\d+.\d+)/);
    $N=$1 if ($line=~/Sequences:\s*(\d+)/);

lib/Bio/RNA/RNAaliSplit/WrapRscape.pm  view on Meta::CPAN

		 isa => 'Int',
		 predicate => 'has_status',
		 documentation => q(R-scape program status), # 0:OK ; >0:NOTOK
		);


with 'FileDirUtil';

sub BUILD {
  my $self = shift;
  my $this_function = (caller(0))[3];
  confess "ERROR [$this_function] \$self->ifile not available"
    unless ($self->has_ifile);
  $rscape = can_run($exe) or
    croak "ERROR [$this_function] $exe not found";
  unless($self->has_odir){
    my $odir_name = "as";
    $self->odir( [$self->ifile->dir,$odir_name] );
  }
  $oodir = $self->odir->subdir("rscape");
  my @created = make_path($oodir, {error => \my $err});
  confess "ERROR [$this_function] could not create output directory $self->oodir"
    if (@$err);
  $self->set_ifilebn;
  $self->_count_seq();
  if ($self->cseq > 1){ $self->run_rscape() }
}

sub _count_seq {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my $count = 0;
  open my $stk, "<", $self->ifile or croak "ERROR [$this_function] Cannot open Stockholm file $self->ifile for reading";
  while (<$stk>){
    next if (/^#/);
    next if (/^\/\//);
    $count++;
  }
  close ($stk);
  $self->cseq($count);
}

sub run_rscape {
  my $self = shift;
  my $this_function = (caller(0))[3];
  my ($out_fn,$sout_fn,$out,$sout,$sum);
  my ($rscape_out,$rscape_sout,$rscape_sum);
  my $tag = "";
  if ($self->has_statistic){$tag = ".".$self->statistic};

  if ($self->has_basename){
    $out_fn  = $self->basename.$tag."."."rscape.out";
    $sout_fn = $self->basename.$tag."."."rscape.sorted.out";
  }
  elsif ($self->has_ifilebn){

lib/Bio/RNA/RNAaliSplit/WrapRscape.pm  view on Meta::CPAN

#  Method Target_E-val [cov_min,conv_max] [FP | TP True Found | Sen PPV F]
#  RAFS    0.05           [-1.00,0.83]    [0 | 3 11 3 | 27.27 100.00 42.86]
#        left_pos       right_pos        score   E-value
# ------------------------------------------------------------
#*	        20	        36	0.83	0.000328218
#*	        21	        35	0.83	0.000328218
#*	        22	        34	0.33	0.0497359

sub _parse_rscape {
  my ($self,$out) = @_;
  my $this_function = (caller(0))[3];
  my @buffer = ();
  my $parse1 = 0;
  my $parse2 = 0;
  my $nosbp = 0;
  open my $file, "<", $out or croak "ERROR: [$this_function] Cannot open file $out";
  while(<$file>){
    chomp;
    if (m/^#\s+MSA\s+([a-zA-Z0-9_.|]+)\s+nseq\s+(\d+)\s+\(\d+\)\s+alen\s+(\d+)\s+\(\d+\)\s+avgid\s+(\d+\.\d+)\s+\(\d+\.\d+\)\s+nbpairs\s+(\d+)\s+\(\d+\)/g){
      $self->nseq($2);
      $self->alen($3);

lib/Bio/RNA/RNAaliSplit/WrapRscape.pm  view on Meta::CPAN

  }
  else{
    croak "ERROR: [$this_function] ambiguous status when parsing rscape output file. parse1:".eval($parse1)." parse2:".eval($parse2)." This shouldn't happen ...";
    $self->status(3);
    return;
  }
}

sub _check_attributes {
  my ($self,$out) = @_;
  my $this_function = (caller(0))[3];

  $self->has_cseq     ? 1 : croak "ERROR [$this_function] \$self->cseq not set ". $self->ifile;
  $self->has_nseq     ? 1 : croak "ERROR [$this_function] \$self->nseq not set ". $self->ifile;
  $self->has_alen     ? 1 : croak "ERROR [$this_function] \$self->alen not set". $self->ifile;
  $self->has_nbpairs  ? 1 : croak "ERROR [$this_function] \$self->nbpairs not set". $self->ifile;
  $self->has_evalue   ? 1 : croak "ERROR [$this_function] \$self->evalue not set". $self->ifile;
  $self->has_FP       ? 1 : croak "ERROR [$this_function] \$self->FP not set". $self->ifile;
  $self->has_TP       ? 1 : croak "ERROR [$this_function] \$self->TP not set". $self->ifile;
  $self->has_T        ? 1 : croak "ERROR [$this_function] \$self->T not set". $self->ifile;
  $self->has_F        ? 1 : croak "ERROR [$this_function] \$self->F not set". $self->ifile;

scripts/RNAalisplit.pl  view on Meta::CPAN


    print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$rscapeo->statistic,$what,$cs,$aln), "\n";
  }
  else {
    print join "\t",($hint,$rnazo->P,$rnazo->z,$alifoldo->sci,$count,$cs,$aln), "\n";
  }
}

sub make_distance_matrix {
  my ($ASO,$m,$od) = @_;
  my $this_function = (caller(0))[3];
  my ($i,$j,$Dfile,$dHn,$dHx,$dBp,$dHB);
  my @pw_alns = ();
  my @D = ();
  my $check = 1;
  my $dim = $ASO->next_aln->num_sequences;

  # extract all pairwise alignments
  print STDERR "Extracting pairwise alignments ...\n";
  for ($i=1;$i<$dim;$i++){
    for($j=$i+1;$j<=$dim;$j++){



( run in 0.336 second using v1.01-cache-2.11-cpan-a9ef4e587e4 )