Bio-ViennaNGS

 view release on metacpan or  search on metacpan

lib/Bio/ViennaNGS/Expression.pm  view on Meta::CPAN

# -*-CPerl-*-
# Last changed Time-stamp: <2017-06-10 18:59:03 michl>

package Bio::ViennaNGS::Expression;

use Bio::ViennaNGS;
use Moose;
use Carp;
use Data::Dumper;
use Path::Class;
use Bio::ViennaNGS::Bed;
use Bio::ViennaNGS::Util qw(sortbed);
use namespace::autoclean;
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");

has 'readcountfile' => (
			is => 'rw',
			predicate => 'has_readcountfile',
		       );

has 'data' => (
	       is => 'rw',
	       isa => 'ArrayRef',
	       default => sub { [] },
	      );

has 'conds' => (
		is => 'rw',
		isa => 'Int',
		predicate => 'has_conds',
	       );

has 'nr_features' => (
		      is => 'rw',
		      isa => 'Int',
		      predicate => 'has_features',
		     );

sub parse_readcounts_bed12 {
  my ($self,$file) = @_;
  my @mcData = ();
  my ($i,$n) = (0)x2;
  my $this_function = (caller(0))[3];

  croak "ERROR [$this_function] readcount / multicov file $self->readcountfile not available\n"
    unless (-e $file);
  $self->readcountfile($file);
  open (RC_IN, "< $file") or croak $!;

  while (<RC_IN>){
    $n++;
    chomp;
    # 0:chr|1:start|2:end|3:name|4:score|5:strand
    # 6:thickStart|7:thickEnd|8:itemRgb|9:blockCount|
    # 10:blockSizes|11:blockStarts
    @mcData = split(/\t/);
    my $conditions = (scalar @mcData)-12;  # multicov extends BED12
    $self->conds($conditions);

    # NOTE: Better keep BED12 entries in a hash, generating UUIDs as
    # keys instead of storing the same BED12 entry n times (ie for
    # each sample) in $self->data

    my $bedobj =  Bio::ViennaNGS::Bed->new(chromosome   => $mcData[0],
					   start        => $mcData[1],
					   end          => $mcData[2],
					   name         => $mcData[3],
					   score        => $mcData[4],
					   strand       => $mcData[5],
					   thickStart   => $mcData[6],
					   thickEnd     => $mcData[7],
					   itemRgb      => $mcData[8],
					   blockCount   => $mcData[9],
					   blockSizes   => $mcData[10],
					   blockStarts  => $mcData[11],
					  );
    my $len = $bedobj->length;
    my $id = sprintf("%s:%d-%d_%s_%s", $mcData[0],$mcData[1],
		     $mcData[2],$mcData[3],$mcData[5]);
    #print "\$id: $id\n";
    for ($i=0;$i<$self->conds;$i++){
      ${$self->data}[$i]{$id} = {
				 bed_entry => $bedobj,
				 length    => $len,
				 count     => $mcData[eval(12+$i)],
				};
     # print "sample $i:\n";
     # print Dumper(${$self->data}[$i]);
    }
  }
  $self->nr_features($n);
  close(RC_IN);
}

sub write_expression_bed12 {
  my ($self,$item,$dest,$base_name) = @_;
  my ($bedname,$bedname_u,$outfile,$feat,$i);
  my $this_function = (caller(0))[3];

  croak "ERROR [$this_function]: $dest does not exist\n"
    unless (-d $dest);

  $bedname = $base_name.".".$item.".multicov.bed12";
  $bedname_u = $base_name.".".$item.".multicov.u.bed12";

  $outfile = file($dest,$bedname_u);

  croak "ERROR [$this_function]: $self->conds not available\n"
    unless ($self->has_conds);

  croak "ERROR [$this_function]: $self->nr_features not available\n"
    unless ($self->has_features);

  #print "=====> write_multicov: writing multicov file $bedfile with ".
  #  eval($self->nr_features)."  lines and ".eval($self->conds)." conditions\n";

  # check whether each element in @{$self->data} has the same amount of entries
  for($i=0;$i<$self->conds;$i++){
    my $fc = scalar keys %{$self->data->[$i]}; # of keys in %{$featCount}
    #print "condition $i => $fc keys\n";
    croak "ERROR [$this_function]: unequal element count in @{$self->data}"
      unless($self->nr_features == $fc)
  }

  open (MULTICOV_OUT, "> $bedname_u") or croak $!;

  # use BED12 data stored with condition 0 here, assuming its the same for all conditions
  foreach $feat (keys %{${$self->data}[0]} ){
    my @mcLine = ();
    # retrieve BED12 line first
    my $bedo = ${${$self->data}[0]}{$feat}{bed_entry};
    my $bedline = $bedo->as_bed_line(12);
    push @mcLine, $bedline;

    # process multicov values for all samples
    for($i=0;$i<$self->conds;$i++){
      # print "------------>  ";  print "processing $i th condition ";  print "<-----------\n";
      croak "Could not find item $feat in mcSample $i\n"
	unless ( defined ${${$self->data}[$i]}{$feat} );
      push @mcLine, ${${$self->data}[$i]}{$feat}{$item};

    }
    #print MULTICOV_OUT join("\t",@mcLine)."\n";
    my @bed12 = splice @mcLine,0,1;
    print MULTICOV_OUT join("\t",@bed12)."\t";
    print MULTICOV_OUT join("\t", map { sprintf "%20.4f", $_ }@mcLine),"\n";
  }
  close(MULTICOV_OUT);

  sortbed($bedname_u,"./",$bedname,1,undef);  # sort bed file
}


sub computeTPM {
  my ($self,$sample,$rl) = @_;
  my ($TPM,$T,$totalTPM) = (0)x3;
  my ($i,$meanTPM);



( run in 1.975 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )