Bio-ViennaNGS

 view release on metacpan or  search on metacpan

lib/Bio/ViennaNGS/Expression.pm  view on Meta::CPAN

# -*-CPerl-*-
# Last changed Time-stamp: <2017-06-10 18:59:03 michl>

package Bio::ViennaNGS::Expression;

use Bio::ViennaNGS;
use Moose;
use Carp;
use Data::Dumper;
use Path::Class;
use Bio::ViennaNGS::Bed;
use Bio::ViennaNGS::Util qw(sortbed);
use namespace::autoclean;
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");

has 'readcountfile' => (
			is => 'rw',
			predicate => 'has_readcountfile',
		       );

has 'data' => (
	       is => 'rw',
	       isa => 'ArrayRef',
	       default => sub { [] },
	      );

has 'conds' => (
		is => 'rw',
		isa => 'Int',
		predicate => 'has_conds',
	       );

has 'nr_features' => (
		      is => 'rw',
		      isa => 'Int',
		      predicate => 'has_features',
		     );

sub parse_readcounts_bed12 {
  my ($self,$file) = @_;
  my @mcData = ();
  my ($i,$n) = (0)x2;
  my $this_function = (caller(0))[3];

  croak "ERROR [$this_function] readcount / multicov file $self->readcountfile not available\n"
    unless (-e $file);
  $self->readcountfile($file);
  open (RC_IN, "< $file") or croak $!;

  while (<RC_IN>){
    $n++;
    chomp;
    # 0:chr|1:start|2:end|3:name|4:score|5:strand
    # 6:thickStart|7:thickEnd|8:itemRgb|9:blockCount|
    # 10:blockSizes|11:blockStarts
    @mcData = split(/\t/);
    my $conditions = (scalar @mcData)-12;  # multicov extends BED12
    $self->conds($conditions);

    # NOTE: Better keep BED12 entries in a hash, generating UUIDs as
    # keys instead of storing the same BED12 entry n times (ie for
    # each sample) in $self->data

    my $bedobj =  Bio::ViennaNGS::Bed->new(chromosome   => $mcData[0],
					   start        => $mcData[1],
					   end          => $mcData[2],
					   name         => $mcData[3],
					   score        => $mcData[4],
					   strand       => $mcData[5],
					   thickStart   => $mcData[6],
					   thickEnd     => $mcData[7],
					   itemRgb      => $mcData[8],
					   blockCount   => $mcData[9],

lib/Bio/ViennaNGS/Expression.pm  view on Meta::CPAN

           C<@featCount>, which is populated from a multicov file by
           C<parse_multicov()>.

Args : C<$sample> is the sample index of C<@{$self-E<gt>data}>. This is
        especially handy if one is only interested in computing
        normalized expression values for a specific sample, rather
        than all samples in multicov BED12 file. C<$readlength> is the
        read length of the RNA-seq sequencing experiment.

Returns : Returns the mean TPM of the processed sample, which is
          invariant among samples. (TPM models relative molar
          concentration and thus fulfills the invariant average
          criterion.)

=item computeRPKM

Title : computeRPKM

Usage :  C<$obj-E<gt>computeRPKM($sample);>

Function : Computes expression values of each gene/feature present in
           C<$self-E<gt>data> in I<Reads per Kilobase per Million
           Reads (RPKM)>. C<$self-E<gt>data> is a reference to a Hash
           of Hashes data structure where keys are feature names and
           values hold a hash that must at least contain length and
           raw read counts.

Returns: Returns the mean RPKM of the processed sample.

=item write_expression_bed12

Title : write_expression_bed12

Usage : C<$obj-E<gt>write_expression_bed12($measure,$dest,$basename);>

Function : Writes normalized expression data to a bedtools multicov
           (multiBamCov)-type BED12 file.

Args : C<$measure> specifies the type in which normalized expression
       data from C<@{$self-E<gt>data}> is dumped. Allowed values are
       'TPM' and 'RPKM'. Corresponding TPM/RPKM values must have been
       computed and inserted into C<@{self-E<gt>data}> beforehand by
       C<$self-E<gt>computeTPM()> and C<$self-E<gt>computeRPKM()>,
       respectively. C<$dest> and C<$base_name> give path and base
       name of the output file, respectively.

Returns : None. The output is a position-sorted extended BED12 file.

=back

=head1 DEPENDENCIES

=over

=item L<Moose>

=item L<Carp>

=item L<Path::Class>

=item L<namespace::autoclean>

=back

=head1 SEE ALSO

=over

=item L<Bio::ViennaNGS>

=item L<Bio::ViennaNGS::Bed>

=item L<Bio::ViennaNGS::Util>

=back

=head1 AUTHOR

Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2015-2017 by Michael T. Wolfinger

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.0 or,
at your option, any later version of Perl 5 you may have available.

This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut



( run in 1.101 second using v1.01-cache-2.11-cpan-f56aa216473 )