Bio-ViennaNGS
view release on metacpan or search on metacpan
lib/Bio/ViennaNGS/Expression.pm view on Meta::CPAN
croak "ERROR [$this_function]: $dest does not exist\n"
unless (-d $dest);
$bedname = $base_name.".".$item.".multicov.bed12";
$bedname_u = $base_name.".".$item.".multicov.u.bed12";
$outfile = file($dest,$bedname_u);
croak "ERROR [$this_function]: $self->conds not available\n"
unless ($self->has_conds);
croak "ERROR [$this_function]: $self->nr_features not available\n"
unless ($self->has_features);
#print "=====> write_multicov: writing multicov file $bedfile with ".
# eval($self->nr_features)." lines and ".eval($self->conds)." conditions\n";
# check whether each element in @{$self->data} has the same amount of entries
for($i=0;$i<$self->conds;$i++){
my $fc = scalar keys %{$self->data->[$i]}; # of keys in %{$featCount}
#print "condition $i => $fc keys\n";
croak "ERROR [$this_function]: unequal element count in @{$self->data}"
unless($self->nr_features == $fc)
}
open (MULTICOV_OUT, "> $bedname_u") or croak $!;
# use BED12 data stored with condition 0 here, assuming its the same for all conditions
foreach $feat (keys %{${$self->data}[0]} ){
my @mcLine = ();
# retrieve BED12 line first
my $bedo = ${${$self->data}[0]}{$feat}{bed_entry};
my $bedline = $bedo->as_bed_line(12);
push @mcLine, $bedline;
# process multicov values for all samples
for($i=0;$i<$self->conds;$i++){
# print "------------> "; print "processing $i th condition "; print "<-----------\n";
croak "Could not find item $feat in mcSample $i\n"
unless ( defined ${${$self->data}[$i]}{$feat} );
push @mcLine, ${${$self->data}[$i]}{$feat}{$item};
}
#print MULTICOV_OUT join("\t",@mcLine)."\n";
my @bed12 = splice @mcLine,0,1;
print MULTICOV_OUT join("\t",@bed12)."\t";
print MULTICOV_OUT join("\t", map { sprintf "%20.4f", $_ }@mcLine),"\n";
}
close(MULTICOV_OUT);
sortbed($bedname_u,"./",$bedname,1,undef); # sort bed file
}
sub computeTPM {
my ($self,$sample,$rl) = @_;
my ($TPM,$T,$totalTPM) = (0)x3;
my ($i,$meanTPM);
# iterate through $self->data[$i] twice:
# 1. for computing T (denominator in TPM formula)
foreach $i (keys %{${$self->data}[$sample]}){
my $count = ${${$self->data}[$sample]}{$i}{count};
my $length = ${${$self->data}[$sample]}{$i}{length};
#print "count: $count\nlength: $length\n";
$T += $count * $rl / $length;
}
# 2. for computng actual TPM values
foreach $i (keys %{${$self->data}[$sample]}){
my $count = ${${$self->data}[$sample]}{$i}{count};
my $length = ${${$self->data}[$sample]}{$i}{length};
$TPM = 1000000 * $count * $rl/($length * $T);
${${$self->data}[$sample]}{$i}{TPM} = $TPM;
$totalTPM += $TPM;
}
$meanTPM = $totalTPM/$self->nr_features;
#print Dumper(${$self->data}[$sample]);
#print "totalTPM=$totalTPM | meanTPM=$meanTPM\n";
return $meanTPM;
}
sub computeRPKM {
my ($self,$sample) = @_;
my ($R,$length,$count,$totalRPKM) = (0)x4;
my ($i,$meanRPKM);
# iterate through $self->data[$i] twice:
# 1. compute the total number of reads for a sample
foreach $i (keys %{${$self->data}[$sample]}){
$R += ${${$self->data}[$sample]}{$i}{count};
}
# 2. for computing actual RPKM values
foreach $i (keys %{${$self->data}[$sample]}){
$count = ${${$self->data}[$sample]}{$i}{count};
$length = ${${$self->data}[$sample]}{$i}{length};
my $RPKM = ( $count * 1000000000 ) / ( $length * $R );
${${$self->data}[$sample]}{$i}{RPKM} = $RPKM;
$totalRPKM += $RPKM;
}
$meanRPKM = $totalRPKM/$self->nr_features;
return $meanRPKM;
}
sub computeRPKMfromTPM {
my ($self,$sample,$rl) = @_;
my ($R,$T,$length,$count,$totalRPKM) = (0)x5;
my ($i,$meanRPKM);
# iterate through $self->data[$i] twice:
# 1. compute T (denominator in TPM formula) and total number of reads
foreach $i (keys %{${$self->data}[$sample]}){
$count = ${${$self->data}[$sample]}{$i}{count};
$length = ${${$self->data}[$sample]}{$i}{length};
#print "count: $count\nlength: $length\n";
$T += $count * $rl / $length;
$R += $count;
}
# print "T: $T\tR: $R\n";
# 2. compute RPKM from TPM
foreach $i (keys %{${$self->data}[$sample]}){
my $RPKMfromTPM = ${${$self->data}[$sample]}{$i}{TPM} * ( $T * 1000 ) / ( $R * $rl);
${${$self->data}[$sample]}{$i}{RPKMfromTPM} = $RPKMfromTPM;
$totalRPKM += $RPKMfromTPM;
}
$meanRPKM = $totalRPKM/$self->nr_features;
# print Dumper(${$self->data}[$sample]);
return $meanRPKM;
}
__PACKAGE__->meta->make_immutable;
1;
__END__
=head1 NAME
Bio::ViennaNGS::Expression - An object oriented interface for
computing read-count based gene expression as TPM or RPKM
=head1 SYNOPSIS
use Bio::ViennaNGS::Expression;
my $expression = Bio::ViennaNGS::Expression->new();
# parse read counts from an extended BED12 file
$expression->parse_readcounts_bed12("$bed12");
# compute normalized expression of ith sample in Transcript per
# Million (TPM)
$expression->computeTPM($i, $readlength);
# compute normalized expression of ith sample in Reads per Kilobase
# per Million Reads (RPKM)
$expression->computeRPKM($i);
# write extended BED12 file with normalized expression in TPM for
# each condition past the 12th column
$expression->write_expression_bed12("TPM", $dest, $basename);
=head1 DESCRIPTION
This module provides a L<Moose> interface for computation of gene /
transcript expression from read counts.
=head1 METHODS
( run in 0.796 second using v1.01-cache-2.11-cpan-13bb782fe5a )