Bio-ViennaNGS
view release on metacpan or search on metacpan
lib/Bio/ViennaNGS/Expression.pm view on Meta::CPAN
# -*-CPerl-*-
# Last changed Time-stamp: <2017-06-10 18:59:03 michl>
package Bio::ViennaNGS::Expression;
use Bio::ViennaNGS;
use Moose;
use Carp;
use Data::Dumper;
use Path::Class;
use Bio::ViennaNGS::Bed;
use Bio::ViennaNGS::Util qw(sortbed);
use namespace::autoclean;
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");
has 'readcountfile' => (
is => 'rw',
predicate => 'has_readcountfile',
);
has 'data' => (
is => 'rw',
isa => 'ArrayRef',
default => sub { [] },
);
has 'conds' => (
is => 'rw',
isa => 'Int',
predicate => 'has_conds',
);
has 'nr_features' => (
is => 'rw',
isa => 'Int',
predicate => 'has_features',
);
sub parse_readcounts_bed12 {
my ($self,$file) = @_;
my @mcData = ();
my ($i,$n) = (0)x2;
my $this_function = (caller(0))[3];
croak "ERROR [$this_function] readcount / multicov file $self->readcountfile not available\n"
unless (-e $file);
$self->readcountfile($file);
open (RC_IN, "< $file") or croak $!;
while (<RC_IN>){
$n++;
chomp;
# 0:chr|1:start|2:end|3:name|4:score|5:strand
# 6:thickStart|7:thickEnd|8:itemRgb|9:blockCount|
# 10:blockSizes|11:blockStarts
@mcData = split(/\t/);
my $conditions = (scalar @mcData)-12; # multicov extends BED12
$self->conds($conditions);
# NOTE: Better keep BED12 entries in a hash, generating UUIDs as
# keys instead of storing the same BED12 entry n times (ie for
# each sample) in $self->data
my $bedobj = Bio::ViennaNGS::Bed->new(chromosome => $mcData[0],
start => $mcData[1],
end => $mcData[2],
name => $mcData[3],
score => $mcData[4],
strand => $mcData[5],
thickStart => $mcData[6],
thickEnd => $mcData[7],
itemRgb => $mcData[8],
blockCount => $mcData[9],
blockSizes => $mcData[10],
blockStarts => $mcData[11],
);
my $len = $bedobj->length;
my $id = sprintf("%s:%d-%d_%s_%s", $mcData[0],$mcData[1],
$mcData[2],$mcData[3],$mcData[5]);
#print "\$id: $id\n";
for ($i=0;$i<$self->conds;$i++){
${$self->data}[$i]{$id} = {
bed_entry => $bedobj,
length => $len,
count => $mcData[eval(12+$i)],
};
# print "sample $i:\n";
# print Dumper(${$self->data}[$i]);
}
}
$self->nr_features($n);
close(RC_IN);
}
sub write_expression_bed12 {
my ($self,$item,$dest,$base_name) = @_;
my ($bedname,$bedname_u,$outfile,$feat,$i);
my $this_function = (caller(0))[3];
croak "ERROR [$this_function]: $dest does not exist\n"
unless (-d $dest);
$bedname = $base_name.".".$item.".multicov.bed12";
$bedname_u = $base_name.".".$item.".multicov.u.bed12";
$outfile = file($dest,$bedname_u);
croak "ERROR [$this_function]: $self->conds not available\n"
unless ($self->has_conds);
croak "ERROR [$this_function]: $self->nr_features not available\n"
unless ($self->has_features);
#print "=====> write_multicov: writing multicov file $bedfile with ".
# eval($self->nr_features)." lines and ".eval($self->conds)." conditions\n";
# check whether each element in @{$self->data} has the same amount of entries
for($i=0;$i<$self->conds;$i++){
my $fc = scalar keys %{$self->data->[$i]}; # of keys in %{$featCount}
#print "condition $i => $fc keys\n";
croak "ERROR [$this_function]: unequal element count in @{$self->data}"
unless($self->nr_features == $fc)
}
open (MULTICOV_OUT, "> $bedname_u") or croak $!;
# use BED12 data stored with condition 0 here, assuming its the same for all conditions
foreach $feat (keys %{${$self->data}[0]} ){
my @mcLine = ();
# retrieve BED12 line first
my $bedo = ${${$self->data}[0]}{$feat}{bed_entry};
my $bedline = $bedo->as_bed_line(12);
push @mcLine, $bedline;
# process multicov values for all samples
for($i=0;$i<$self->conds;$i++){
# print "------------> "; print "processing $i th condition "; print "<-----------\n";
croak "Could not find item $feat in mcSample $i\n"
unless ( defined ${${$self->data}[$i]}{$feat} );
push @mcLine, ${${$self->data}[$i]}{$feat}{$item};
}
#print MULTICOV_OUT join("\t",@mcLine)."\n";
my @bed12 = splice @mcLine,0,1;
print MULTICOV_OUT join("\t",@bed12)."\t";
print MULTICOV_OUT join("\t", map { sprintf "%20.4f", $_ }@mcLine),"\n";
}
close(MULTICOV_OUT);
sortbed($bedname_u,"./",$bedname,1,undef); # sort bed file
}
sub computeTPM {
my ($self,$sample,$rl) = @_;
my ($TPM,$T,$totalTPM) = (0)x3;
my ($i,$meanTPM);
( run in 1.975 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )