Bio-RNA-Treekin
view release on metacpan or search on metacpan
lib/Bio/RNA/Treekin/Record.pm view on Meta::CPAN
# Bio/RNA/Treekin/Record.pm
# Stores a data from a single row of the Treekin file, i.e. the populations of
# all minima at a given time point.
package Bio::RNA::Treekin::Record;
our $VERSION = '0.05';
use v5.14; # required for non-destructive subst m///r
use strict;
use warnings;
use Moose;
use MooseX::StrictConstructor;
use namespace::autoclean;
use autodie qw(:all);
use Scalar::Util qw(reftype openhandle);
use List::Util qw(first pairmap max uniqnum all);
use Carp qw(croak);
use Bio::RNA::Treekin::PopulationDataRecord;
use overload '""' => \&stringify;
has '_population_data' => (
is => 'ro',
required => 1,
init_arg => 'population_data',
);
has 'date' => (is => 'ro', required => 1);
has 'sequence' => (is => 'ro', required => 1);
has 'method' => (is => 'ro', required => 1);
has 'start_time' => (is => 'ro', required => 1);
has 'stop_time' => (is => 'ro', required => 1);
has 'temperature' => (is => 'ro', required => 1);
has 'basename' => (is => 'ro', required => 1);
has 'time_increment' => (is => 'ro', required => 1);
has 'degeneracy' => (is => 'ro', required => 1);
has 'absorbing_state' => (is => 'ro', required => 1);
has 'states_limit' => (is => 'ro', required => 1);
# Add optional attributes including predicate.
has $_ => (
is => 'ro',
required => 0,
predicate => "has_$_",
)
foreach qw(
info
init_population
rates_file
file_index
cmd
of_iterations
);
# Get number of population data rows stored.
sub population_data_count {
my ($self) = @_;
my $data_count = @{ $self->_population_data };
return $data_count;
}
# Number of states / minima in this simulation.
# Get number of mins in the first population record; it should be the
# same for all records.
sub min_count {
my $self = shift;
my $first_pop = $self->population(0);
confess 'min_count: no population data present'
unless defined $first_pop;
my $min_count = $first_pop->min_count;
return $min_count;
}
# Return a list of all minima, i. e. 1..n, where n is the total number of
# minima.
sub mins {
my ($self) = @_;
my @mins = 1..$self->min_count;
return @mins;
}
# Keep only the population data for the selected minima, remove all other.
# Will NOT rescale populations, so they may no longer sum up to 1.
# Arguments:
# mins: List of mins to keep. Will be sorted and uniq'ed (cf. splice()).
# Returns the return value of splice().
sub keep_mins {
my ($self, @kept_mins) = @_;
@kept_mins = uniqnum sort {$a <=> $b} @kept_mins; # sort / uniq'ify
return $self->splice_mins(@kept_mins);
}
# Keep only the population data for the selected minima, remove all other.
# May duplicate and re-order.
# mins: List of mins to keep. Will be used as is.
# Returns itself.
sub splice_mins {
my ($self, @kept_mins) = @_;
my $min_count = $self->min_count;
confess 'Cannot splice, minimum out of bounds'
unless all {$_ >= 1 and $_ <= $min_count} @kept_mins;
# Directly update raw population data here instead of doing tons of
# calls passing the same min array.
my @kept_indices = map {$_ - 1} @kept_mins;
for my $pop_data (@{$self->_population_data}) { # each point in time
lib/Bio/RNA/Treekin/Record.pm view on Meta::CPAN
}
# Read in file.
my ($header_lines_ref, $population_data_lines_ref)
= $class->_read_record_lines($record_handle);
# Parse file.
my @header_args = $class->_parse_header_lines($header_lines_ref);
my @data_args
= $class->_parse_population_data_lines($population_data_lines_ref);
my %args = (@header_args, @data_args);
return $class->$orig(\%args);
};
sub BUILD {
my $self = shift;
# Force construction despite laziness.
$self->min_count;
# Adjust min count of initial population as it was not known when
# initial values were extracted from Treekin cmd.
$self->init_population->set_min_count( $self->min_count )
if $self->has_init_population;
}
sub stringify {
my $self = shift;
# Format header line value of rates file entry.
my $make_rates_file_val = sub {
$self->rates_file . ' (#' . $self->file_index . ')';
};
# Header
my @header_entries = (
$self->has_rates_file ? ('Rates file' => $make_rates_file_val->()) : (),
$self->has_info ? ('Info' => $self->info) : (),
$self->has_cmd ? ('Cmd' => $self->cmd) : (),
'Date' => $self->date,
'Sequence' => $self->sequence,
'Method' => $self->method,
'Start time' => $self->start_time,
'Stop time' => $self->stop_time,
'Temperature' => $self->temperature,
'Basename' => $self->basename,
'Time increment' => $self->time_increment,
'Degeneracy' => $self->degeneracy,
'Absorbing state' => $self->absorbing_state,
'States limit' => $self->states_limit,
);
my $header_str = join "\n", pairmap { "# $a: $b" } @header_entries;
# Population data
my $population_str
= join "\n", map { "$_" } @{ $self->_population_data };
# Footer (new Treekin versions only).
my $footer_str = $self->has_of_iterations
? '# of iterations: ' . $self->of_iterations
: q{};
my $self_as_str = $header_str . "\n" . $population_str;
$self_as_str .= "\n" . $footer_str if $footer_str;
return $self_as_str;
}
__PACKAGE__->meta->make_immutable;
1; # End of Bio::RNA::Treekin::Record
__END__
=pod
=encoding UTF-8
=head1 NAME
Bio::RNA::Treekin::Record - Parse, query, and manipulate I<Treekin> output.
=head1 SYNOPSIS
use Bio::RNA::Treekin;
=head1 DESCRIPTION
Parses a regular output file of I<Treekin>. Allows to query population data
as well as additional info from the header. New minima can be generated. The
stringification returns, again, a valid I<Treekin> file which can be, e. g.,
visualized using I<Grace>.
=head1 ATTRIBUTES
These attributes of the class allow to query various data from the header of
the input file.
=head2 date
The time and date of the I<Treekin> run.
=head2 sequence
The RNA sequence for which the simulation was computed.
=head2 method
The method used to build the transition matrix as documented for the
C<--method> switch of I<Treekin>.
=head2 start_time
Initial time of the simulation.
=head2 stop_time
Time at which the simulation stops.
( run in 0.413 second using v1.01-cache-2.11-cpan-96521ef73a4 )