Bio-ViennaNGS
view release on metacpan or search on metacpan
lib/Bio/ViennaNGS/Expression.pm view on Meta::CPAN
# -*-CPerl-*-
# Last changed Time-stamp: <2017-06-10 18:59:03 michl>
package Bio::ViennaNGS::Expression;
use Bio::ViennaNGS;
use Moose;
use Carp;
use Data::Dumper;
use Path::Class;
use Bio::ViennaNGS::Bed;
use Bio::ViennaNGS::Util qw(sortbed);
use namespace::autoclean;
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");
has 'readcountfile' => (
is => 'rw',
predicate => 'has_readcountfile',
);
has 'data' => (
is => 'rw',
isa => 'ArrayRef',
default => sub { [] },
);
has 'conds' => (
is => 'rw',
isa => 'Int',
predicate => 'has_conds',
);
has 'nr_features' => (
is => 'rw',
isa => 'Int',
predicate => 'has_features',
);
sub parse_readcounts_bed12 {
my ($self,$file) = @_;
my @mcData = ();
my ($i,$n) = (0)x2;
my $this_function = (caller(0))[3];
croak "ERROR [$this_function] readcount / multicov file $self->readcountfile not available\n"
unless (-e $file);
$self->readcountfile($file);
open (RC_IN, "< $file") or croak $!;
while (<RC_IN>){
$n++;
chomp;
# 0:chr|1:start|2:end|3:name|4:score|5:strand
# 6:thickStart|7:thickEnd|8:itemRgb|9:blockCount|
# 10:blockSizes|11:blockStarts
@mcData = split(/\t/);
my $conditions = (scalar @mcData)-12; # multicov extends BED12
$self->conds($conditions);
# NOTE: Better keep BED12 entries in a hash, generating UUIDs as
# keys instead of storing the same BED12 entry n times (ie for
# each sample) in $self->data
my $bedobj = Bio::ViennaNGS::Bed->new(chromosome => $mcData[0],
start => $mcData[1],
end => $mcData[2],
name => $mcData[3],
score => $mcData[4],
strand => $mcData[5],
thickStart => $mcData[6],
thickEnd => $mcData[7],
itemRgb => $mcData[8],
blockCount => $mcData[9],
lib/Bio/ViennaNGS/Expression.pm view on Meta::CPAN
C<@featCount>, which is populated from a multicov file by
C<parse_multicov()>.
Args : C<$sample> is the sample index of C<@{$self-E<gt>data}>. This is
especially handy if one is only interested in computing
normalized expression values for a specific sample, rather
than all samples in multicov BED12 file. C<$readlength> is the
read length of the RNA-seq sequencing experiment.
Returns : Returns the mean TPM of the processed sample, which is
invariant among samples. (TPM models relative molar
concentration and thus fulfills the invariant average
criterion.)
=item computeRPKM
Title : computeRPKM
Usage : C<$obj-E<gt>computeRPKM($sample);>
Function : Computes expression values of each gene/feature present in
C<$self-E<gt>data> in I<Reads per Kilobase per Million
Reads (RPKM)>. C<$self-E<gt>data> is a reference to a Hash
of Hashes data structure where keys are feature names and
values hold a hash that must at least contain length and
raw read counts.
Returns: Returns the mean RPKM of the processed sample.
=item write_expression_bed12
Title : write_expression_bed12
Usage : C<$obj-E<gt>write_expression_bed12($measure,$dest,$basename);>
Function : Writes normalized expression data to a bedtools multicov
(multiBamCov)-type BED12 file.
Args : C<$measure> specifies the type in which normalized expression
data from C<@{$self-E<gt>data}> is dumped. Allowed values are
'TPM' and 'RPKM'. Corresponding TPM/RPKM values must have been
computed and inserted into C<@{self-E<gt>data}> beforehand by
C<$self-E<gt>computeTPM()> and C<$self-E<gt>computeRPKM()>,
respectively. C<$dest> and C<$base_name> give path and base
name of the output file, respectively.
Returns : None. The output is a position-sorted extended BED12 file.
=back
=head1 DEPENDENCIES
=over
=item L<Moose>
=item L<Carp>
=item L<Path::Class>
=item L<namespace::autoclean>
=back
=head1 SEE ALSO
=over
=item L<Bio::ViennaNGS>
=item L<Bio::ViennaNGS::Bed>
=item L<Bio::ViennaNGS::Util>
=back
=head1 AUTHOR
Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2015-2017 by Michael T. Wolfinger
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.0 or,
at your option, any later version of Perl 5 you may have available.
This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
( run in 1.101 second using v1.01-cache-2.11-cpan-f56aa216473 )