Bio-RNA-RNAaliSplit
view release on metacpan or search on metacpan
lib/Bio/RNA/RNAaliSplit.pm view on Meta::CPAN
# -*-CPerl-*-
# Last changed Time-stamp: <2019-08-25 20:54:13 mtw>
# Bio::RNA::RNAaliSplit.pm: Handler for horizontally splitting alignments
package Bio::RNA::RNAaliSplit;
use version; our $VERSION = qv('0.11');
use Carp;
use Data::Dumper;
use Moose;
use Moose::Util::TypeConstraints;
use Path::Class;
use File::Basename;
use IPC::Cmd qw(can_run run);
#use Bio::AlignIO;
use Storable 'dclone';
use File::Path qw(make_path);
use diagnostics;
extends 'Bio::RNA::RNAaliSplit::AliHandler';
has 'alignment_aln' => (
is => 'rw',
isa => 'Path::Class::File',
predicate => 'has_aln_file',
init_arg => undef,
);
has 'alignment_stk' => (
is => 'rw',
isa => 'Path::Class::File',
predicate => 'has_stk_file',
init_arg => undef,
);
has 'hammingdistN' => (
is => 'rw',
isa => 'Num',
default => '-1',
predicate => 'has_hammingN',
init_arg => undef,
);
has 'hammingdistX' => (
is => 'rw',
isa => 'Num',
default => '-1',
predicate => 'has_hammingX',
init_arg => undef,
);
with 'FileDirUtil';
sub BUILD {
my $self = shift;
my $this_function = (caller(0))[3];
confess "ERROR [$this_function] \$self->ifile not available"
unless ($self->has_ifile);
$self->alignment({-file => $self->ifile,
-format => $self->format,
-displayname_flat => 1} );
$self->next_aln($self->alignment->next_aln);
$self->next_aln->set_displayname_safe();
$self->_get_alen();
$self->_get_nrseq();
unless($self->has_odir){
my $odir_name = "as";
$self->odir( [$self->ifile->dir,$odir_name] );
}
my @created = make_path($self->odir, {error => \my $err});
confess "ERROR [$this_function] could not create output directory $self->odir"
if (@$err);
$self->set_ifilebn;
# dump ifile as aln and stk in ClustalW format to odir/input
my $iodir = $self->odir->subdir('input');
mkdir($iodir);
my $ialnfile = file($iodir,$self->ifilebn.".aln");
my $istkfile = file($iodir,$self->ifilebn.".stk");
my $alnio = Bio::AlignIO->new(-file => ">$ialnfile",
-format => "clustalw",
-flush => 0,
-displayname_flat => 1 );
my $stkio = Bio::AlignIO->new(-file => ">$istkfile",
-format => "stockholm",
-flush => 0,
-displayname_flat => 1 );
my $aln2 = $self->next_aln->select_noncont((1..$self->next_aln->num_sequences));
my $stk2 = $self->next_aln->select_noncont((1..$self->next_aln->num_sequences));
$alnio->write_aln($aln2);
$stkio->write_aln($stk2);
$self->alignment_aln($ialnfile);
$self->alignment_stk($istkfile);
# end dump ifile
if ($self->next_aln->num_sequences == 2){ $self->_hamming() }
}
sub dump_subalignment {
my ($self,$alipathsegment,$token,$what) = @_;
my $this_function = (caller(0))[3];
my ($aln,$aln2,$name);
croak "ERROR [$this_function] argument 'token' not provided"
unless (defined($token));
# create output path
my $ids = join "_", @$what;
unless (defined($alipathsegment)){$alipathsegment = "tmp"}
my $oodir = $self->odir->subdir($alipathsegment);
mkdir($oodir);
# create info file
my $oinfofile = file($oodir,$token.".info");
open my $oinfo, ">", $oinfofile or die $!;
foreach my $entry (@$what){
my $key = $entry-1;
my $val = ${$self->next_aln}{_order}->{$key};
print $oinfo join "\t", ($entry, $val, "\n");
}
close($oinfo);
# create subalignment in Clustal and Stockholm format
my $oalifile_clustal = file($oodir,$token.".aln");
my $oalifile_stockholm = file($oodir,$token.".stk");
my $oali_clustal = Bio::AlignIO->new(-file => ">$oalifile_clustal",
-format => "ClustalW",
-flush => 0,
-displayname_flat => 1 );
my $oali_stockholm = Bio::AlignIO->new(-file => ">$oalifile_stockholm",
-format => "Stockholm",
-flush => 0,
-displayname_flat => 1 );
$aln = $self->next_aln->select_noncont(@$what);
$oali_clustal->write_aln( $aln );
$oali_stockholm->write_aln( $aln );
# create subalignment fasta file
my $ofafile = file($oodir,$token.".fa");
my $ofa = Bio::AlignIO->new(-file => ">$ofafile",
-format => "fasta",
-flush => 0,
-displayname_flat => 1 );
$aln2 = $aln->remove_gaps;
$ofa->write_aln( $aln2 );
# extract sequences from alignment and dump to .seq file
# NOTE that these sequences do contain gap symbols, intentionally (!)
# these can then be replaced by Ns to compute eg hamming distance
my $oseqfile = file($oodir,$token.".seq");
open my $seqfile, ">", $oseqfile or die $!;
foreach my $seq ($aln->each_seq) {
print $seqfile $seq->seq,"\n";
}
close($seqfile);
return ( $oalifile_clustal,$oalifile_stockholm );
}
sub _hamming {
my $self = shift;
my $this_function = (caller(0))[3];
my $hamming = -1;
croak "ERROR [$this_function] cannot compute Hamming distance for $self->next_aln->num_sequences sequences"
if ($self->next_aln->num_sequences != 2);
my $aln = $self->next_aln->select_noncont((1,2));
# compute Hamming distance of the aligned sequences, replacing gaps with Ns
my $alnN = dclone($aln);
croak("ERROR [$this_function] cannot replace gaps with Ns")
unless ($alnN->map_chars('-','N') == 1);
my $seq1 = $alnN->get_seq_by_pos(1)->seq;
my $seq2 = $alnN->get_seq_by_pos(2)->seq;
croak "ERROR [$this_function] sequence length differs"
unless(length($seq1)==length($seq2));
my $hammingN = ($seq1 ^ $seq2) =~ tr/\001-\255//;
$self->hammingdistN($hammingN);
# print $self->ifilebn,":\n";
# print ">>s1: $seq1\n";
# print ">>s2: $seq2\n";
# print "** dhN = ".$self->hammingdistN."\n";
# print "+++\n";
}
no Moose;
=head1 NAME
Bio::RNA::RNAaliSplit - Split and deconvolute structural RNA multiple
sequence alignments
=head1 VERSION
Version 0.11
=cut
=head1 SYNOPSIS
This module is a L<Moose> handler for horizontal splitting and
evaluation of structural RNA multiple sequence alignments. It employs
third party tools (RNAalifold, RNAz, R-scape) for classification of
subalignments, each folding into a common consensus structure.
=head1 AUTHOR
Michael T. Wolfinger, C<< <michael at wolfinger.eu> >>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-bio-rna-rnaalisplit at rt.cpan.org>, or through the web
interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-RNAaliSplit>.
I will be notified, and then you'll automatically be notified of
progress on your bug as I make changes.
( run in 1.943 second using v1.01-cache-2.11-cpan-524268b4103 )