Bio-RNA-Treekin

 view release on metacpan or  search on metacpan

lib/Bio/RNA/Treekin/Record.pm  view on Meta::CPAN

# Bio/RNA/Treekin/Record.pm

# Stores a data from a single row of the Treekin file, i.e. the populations of
# all minima at a given time point.
package Bio::RNA::Treekin::Record;
our $VERSION = '0.05';

use v5.14;                          # required for non-destructive subst m///r
use strict;
use warnings;

use Moose;
use MooseX::StrictConstructor;
use namespace::autoclean;

use autodie qw(:all);
use Scalar::Util qw(reftype openhandle);
use List::Util qw(first pairmap max uniqnum all);
use Carp qw(croak);

use Bio::RNA::Treekin::PopulationDataRecord;

use overload '""' => \&stringify;


has '_population_data'  => (
    is       => 'ro',
    required => 1,
    init_arg => 'population_data',
);

has 'date'              => (is => 'ro', required => 1);
has 'sequence'          => (is => 'ro', required => 1);
has 'method'            => (is => 'ro', required => 1);
has 'start_time'        => (is => 'ro', required => 1);
has 'stop_time'         => (is => 'ro', required => 1);
has 'temperature'       => (is => 'ro', required => 1);
has 'basename'          => (is => 'ro', required => 1);
has 'time_increment'    => (is => 'ro', required => 1);
has 'degeneracy'        => (is => 'ro', required => 1);
has 'absorbing_state'   => (is => 'ro', required => 1);
has 'states_limit'      => (is => 'ro', required => 1);

# Add optional attributes including predicate.
has $_ => (
               is        => 'ro',
               required  => 0,
               predicate => "has_$_",
          )
    foreach qw(
                 info
                 init_population
                 rates_file
                 file_index
                 cmd
                 of_iterations
            );

# Get number of population data rows stored.
sub population_data_count {
    my ($self) = @_;

    my $data_count = @{ $self->_population_data };
    return $data_count;
}

# Number of states / minima in this simulation.
# Get number of mins in the first population record; it should be the
# same for all records.
sub min_count {
    my $self = shift;

    my $first_pop = $self->population(0);
    confess 'min_count: no population data present'
        unless defined $first_pop;

    my $min_count = $first_pop->min_count;

    return $min_count;
}

# Return a list of all minima, i. e. 1..n, where n is the total number of
# minima.
sub mins {
    my ($self) = @_;
    my @mins = 1..$self->min_count;

    return @mins;
}

# Keep only the population data for the selected minima, remove all other.
# Will NOT rescale populations, so they may no longer sum up to 1.
# Arguments:
#   mins: List of mins to keep. Will be sorted and uniq'ed (cf. splice()).
# Returns the return value of splice().
sub keep_mins {
    my ($self, @kept_mins) = @_;
    @kept_mins = uniqnum sort {$a <=> $b} @kept_mins;   # sort / uniq'ify
    return $self->splice_mins(@kept_mins);
}

# Keep only the population data for the selected minima, remove all other.
# May duplicate and re-order.
#   mins: List of mins to keep. Will be used as is.
# Returns itself.
sub splice_mins {
    my ($self, @kept_mins) = @_;

    my $min_count = $self->min_count;
    confess 'Cannot splice, minimum out of bounds'
        unless all {$_ >= 1 and $_ <= $min_count} @kept_mins;

    # Directly update raw population data here instead of doing tons of
    # calls passing the same min array.
    my @kept_indices = map {$_ - 1} @kept_mins;
    for my $pop_data (@{$self->_population_data}) { # each point in time

lib/Bio/RNA/Treekin/Record.pm  view on Meta::CPAN

    }

    # Read in file.
    my ($header_lines_ref, $population_data_lines_ref)
        = $class->_read_record_lines($record_handle);

    # Parse file.
    my @header_args = $class->_parse_header_lines($header_lines_ref);
    my @data_args
        = $class->_parse_population_data_lines($population_data_lines_ref);

    my %args = (@header_args, @data_args);
    return $class->$orig(\%args);
};

sub BUILD {
    my $self = shift;

    # Force construction despite laziness.
    $self->min_count;

    # Adjust min count of initial population as it was not known when
    # initial values were extracted from Treekin cmd.
    $self->init_population->set_min_count( $self->min_count )
        if $self->has_init_population;
}

sub stringify {
    my $self = shift;

    # Format header line value of rates file entry.
    my $make_rates_file_val = sub {
        $self->rates_file . ' (#' . $self->file_index . ')';
    };

    # Header
    my @header_entries = (
        $self->has_rates_file ? ('Rates file' => $make_rates_file_val->()) : (),
        $self->has_info       ? ('Info'       => $self->info)   : (),
        $self->has_cmd        ? ('Cmd'        => $self->cmd)    : (),
        'Date'            => $self->date,
        'Sequence'        => $self->sequence,
        'Method'          => $self->method,
        'Start time'      => $self->start_time,
        'Stop time'       => $self->stop_time,
        'Temperature'     => $self->temperature,
        'Basename'        => $self->basename,
        'Time increment'  => $self->time_increment,
        'Degeneracy'      => $self->degeneracy,
        'Absorbing state' => $self->absorbing_state,
        'States limit'    => $self->states_limit,
    );

    my $header_str = join "\n", pairmap { "# $a: $b" } @header_entries;

    # Population data
    my $population_str
        = join "\n", map { "$_" } @{ $self->_population_data };

    # Footer (new Treekin versions only).
    my $footer_str = $self->has_of_iterations
                     ? '# of iterations: ' . $self->of_iterations
                     : q{};

    my $self_as_str  = $header_str . "\n" . $population_str;
    $self_as_str    .= "\n" . $footer_str if $footer_str;

    return $self_as_str;
}

__PACKAGE__->meta->make_immutable;

1;  # End of Bio::RNA::Treekin::Record


__END__


=pod

=encoding UTF-8

=head1 NAME

Bio::RNA::Treekin::Record - Parse, query, and manipulate I<Treekin> output.

=head1 SYNOPSIS

    use Bio::RNA::Treekin;

=head1 DESCRIPTION

Parses a regular output file of I<Treekin>. Allows to query population data
as well as additional info from the header. New minima can be generated. The
stringification returns, again, a valid I<Treekin> file which can be, e. g.,
visualized using I<Grace>.

=head1 ATTRIBUTES

These attributes of the class allow to query various data from the header of
the input file.

=head2 date

The time and date of the I<Treekin> run.

=head2 sequence

The RNA sequence for which the simulation was computed.

=head2 method

The method used to build the transition matrix as documented for the
C<--method> switch of I<Treekin>.

=head2 start_time

Initial time of the simulation.

=head2 stop_time

Time at which the simulation stops.



( run in 0.413 second using v1.01-cache-2.11-cpan-96521ef73a4 )