Bio-RNA-RNAaliSplit
view release on metacpan or search on metacpan
lib/Bio/RNA/RNAaliSplit/AliFeature.pm view on Meta::CPAN
# -*-CPerl-*-
# Last changed Time-stamp: <2019-04-24 00:47:47 mtw>
#
# Derive features of an alignment, in particular scores to compare
# different alignments of the same sequences
package Bio::RNA::RNAaliSplit::AliFeature;
use Moose;
use namespace::autoclean;
use version; our $VERSION = qv('0.11');
use diagnostics;
use Data::Dumper;
use Carp;
extends 'Bio::RNA::RNAaliSplit::AliHandler';
has 'sop' => ( # sum of pairs score
is => 'rw',
isa => 'Int',
predicate => 'has_sSOP',
init_arg => undef,
);
has '_csp' => ( # column sequence positions
is => 'ro', # read-only
isa => 'ArrayRef',
predicate => 'has_sCSP',
init_arg => undef,
writer => '_cspwriter', # private writer
);
has 'csp_hash' => (
is => 'ro',
isa => 'HashRef',
predicate => 'hash_csp_hash',
init_arg => undef,
writer => '_csp_hash_writer', # private writer 4 ro attribute
);
with 'FileDirUtil';
sub BUILD {
my $self = shift;
my $this_function = (caller(0))[3];
confess "ERROR [$this_function] \$self->ifile not available"
unless ($self->has_ifile);
$self->alignment({-file => $self->ifile,
-format => $self->format,
-displayname_flat => 1} ); # discard position in sequence IDs
$self->next_aln($self->alignment->next_aln);
$self->next_aln->set_displayname_safe();
$self->_get_alen();
$self->_get_nrseq();
$self->set_ifilebn;
# compute sum of pairs score
#$self->compute_sop();
# compute sequence position for each column
$self->_get_column_sequence_positions();
# compute CSP hash
$self->_csp_hash();
}
# Compute Sum of Pairs scoring for an alignment
sub compute_sop {
my $self = shift;
# print "### in ccompute_sop###\n";
my @alignment=();
my $sp=0;
for (my $i=1;$i<=$self->nrseq;$i++){
push @alignment, $self->next_aln->get_seq_by_pos($i)->seq;
}
# print Dumper (\@alignment);
for (my $c=0;$c<eval($self->alen);$c++){ # loop over alignment columns
my $colscore=0;
# print "$c\n";
for (my $i=0;$i<eval($self->nrseq);$i++){
for (my $j=$i+1;$j<eval($self->nrseq);$j++){
my $ci = substr($alignment[$i],$c,1);
my $cj = substr($alignment[$j],$c,1);
if (1) { # if ($self->distancemeasure eq "e"){ # edit distance
if ($ci ne $cj) {$colscore += 1}
}
# print " > $ci$i $cj$j $colscore <\n";
} # end for j
} # end for i
# print " > ---------- <\n";
$sp += $colscore;
} # end for c
$self->sop($sp);
}
sub _get_column_sequence_positions {
my $self = shift;
my @loclist = ();
foreach my $i( 1..$self->nrseq ) {
my @ll=();
my $seq = $self->next_aln->get_seq_by_pos($i);
# print $seq->seq."\n";
$ll[0] = $self->alen;
for (my $j=1;$j<=$self->alen;$j++){
( run in 1.218 second using v1.01-cache-2.11-cpan-5a3173703d6 )