Bio-ViennaNGS

 view release on metacpan or  search on metacpan

lib/Bio/ViennaNGS/Expression.pm  view on Meta::CPAN

  croak "ERROR [$this_function]: $dest does not exist\n"
    unless (-d $dest);

  $bedname = $base_name.".".$item.".multicov.bed12";
  $bedname_u = $base_name.".".$item.".multicov.u.bed12";

  $outfile = file($dest,$bedname_u);

  croak "ERROR [$this_function]: $self->conds not available\n"
    unless ($self->has_conds);

  croak "ERROR [$this_function]: $self->nr_features not available\n"
    unless ($self->has_features);

  #print "=====> write_multicov: writing multicov file $bedfile with ".
  #  eval($self->nr_features)."  lines and ".eval($self->conds)." conditions\n";

  # check whether each element in @{$self->data} has the same amount of entries
  for($i=0;$i<$self->conds;$i++){
    my $fc = scalar keys %{$self->data->[$i]}; # of keys in %{$featCount}
    #print "condition $i => $fc keys\n";
    croak "ERROR [$this_function]: unequal element count in @{$self->data}"
      unless($self->nr_features == $fc)
  }

  open (MULTICOV_OUT, "> $bedname_u") or croak $!;

  # use BED12 data stored with condition 0 here, assuming its the same for all conditions
  foreach $feat (keys %{${$self->data}[0]} ){
    my @mcLine = ();
    # retrieve BED12 line first
    my $bedo = ${${$self->data}[0]}{$feat}{bed_entry};
    my $bedline = $bedo->as_bed_line(12);
    push @mcLine, $bedline;

    # process multicov values for all samples
    for($i=0;$i<$self->conds;$i++){
      # print "------------>  ";  print "processing $i th condition ";  print "<-----------\n";
      croak "Could not find item $feat in mcSample $i\n"
	unless ( defined ${${$self->data}[$i]}{$feat} );
      push @mcLine, ${${$self->data}[$i]}{$feat}{$item};

    }
    #print MULTICOV_OUT join("\t",@mcLine)."\n";
    my @bed12 = splice @mcLine,0,1;
    print MULTICOV_OUT join("\t",@bed12)."\t";
    print MULTICOV_OUT join("\t", map { sprintf "%20.4f", $_ }@mcLine),"\n";
  }
  close(MULTICOV_OUT);

  sortbed($bedname_u,"./",$bedname,1,undef);  # sort bed file
}


sub computeTPM {
  my ($self,$sample,$rl) = @_;
  my ($TPM,$T,$totalTPM) = (0)x3;
  my ($i,$meanTPM);

  # iterate through $self->data[$i] twice:
  # 1. for computing T (denominator in TPM formula)
  foreach $i (keys %{${$self->data}[$sample]}){
    my $count  = ${${$self->data}[$sample]}{$i}{count};
    my $length =  ${${$self->data}[$sample]}{$i}{length};
    #print "count: $count\nlength: $length\n";
    $T += $count * $rl / $length;
  }

  # 2. for computng actual TPM values
  foreach $i (keys %{${$self->data}[$sample]}){
    my $count  = ${${$self->data}[$sample]}{$i}{count};
    my $length =  ${${$self->data}[$sample]}{$i}{length};
    $TPM = 1000000 * $count * $rl/($length * $T);
    ${${$self->data}[$sample]}{$i}{TPM} = $TPM;
    $totalTPM += $TPM;
  }

  $meanTPM = $totalTPM/$self->nr_features;

  #print Dumper(${$self->data}[$sample]);
  #print "totalTPM=$totalTPM | meanTPM=$meanTPM\n";

  return $meanTPM;
}

sub computeRPKM {
  my ($self,$sample) = @_;
  my ($R,$length,$count,$totalRPKM) = (0)x4;
  my ($i,$meanRPKM);

  # iterate through $self->data[$i] twice:
  # 1. compute the total number of reads for a sample
  foreach $i (keys %{${$self->data}[$sample]}){
    $R += ${${$self->data}[$sample]}{$i}{count};
  }

  # 2. for computing actual RPKM values
  foreach $i (keys %{${$self->data}[$sample]}){
    $count  = ${${$self->data}[$sample]}{$i}{count};
    $length = ${${$self->data}[$sample]}{$i}{length};
    my $RPKM = ( $count * 1000000000 ) / ( $length * $R );
    ${${$self->data}[$sample]}{$i}{RPKM} = $RPKM;
    $totalRPKM += $RPKM;
  }
  $meanRPKM = $totalRPKM/$self->nr_features;
  return $meanRPKM;
}

sub computeRPKMfromTPM {
  my ($self,$sample,$rl) = @_;
  my ($R,$T,$length,$count,$totalRPKM) = (0)x5;
  my ($i,$meanRPKM);

  # iterate through $self->data[$i] twice:
  # 1. compute T (denominator in TPM formula) and total number of reads
  foreach $i (keys %{${$self->data}[$sample]}){
    $count  = ${${$self->data}[$sample]}{$i}{count};
    $length =  ${${$self->data}[$sample]}{$i}{length};
    #print "count: $count\nlength: $length\n";
    $T += $count * $rl / $length;
    $R += $count;
  }
 # print "T: $T\tR: $R\n";

  # 2. compute RPKM from TPM
  foreach $i (keys %{${$self->data}[$sample]}){
     my $RPKMfromTPM = ${${$self->data}[$sample]}{$i}{TPM} * ( $T * 1000 ) / ( $R * $rl);
     ${${$self->data}[$sample]}{$i}{RPKMfromTPM} =  $RPKMfromTPM;
     $totalRPKM += $RPKMfromTPM;
  }
  $meanRPKM = $totalRPKM/$self->nr_features;

 # print Dumper(${$self->data}[$sample]);

  return $meanRPKM;
}

__PACKAGE__->meta->make_immutable;

1;
__END__


=head1 NAME

Bio::ViennaNGS::Expression - An object oriented interface for
computing read-count based gene expression as TPM or RPKM

=head1 SYNOPSIS

  use Bio::ViennaNGS::Expression;

  my $expression = Bio::ViennaNGS::Expression->new();

  # parse read counts from an extended BED12 file
  $expression->parse_readcounts_bed12("$bed12");

  # compute normalized expression of ith sample in Transcript per
  # Million (TPM)
  $expression->computeTPM($i, $readlength);

  # compute normalized expression of ith sample in Reads per Kilobase
  # per Million Reads (RPKM)
  $expression->computeRPKM($i);

  # write extended BED12 file with normalized expression in TPM for
  # each condition past the 12th column
  $expression->write_expression_bed12("TPM", $dest, $basename);

=head1 DESCRIPTION

This module provides a L<Moose> interface for computation of gene /
transcript expression from read counts.

=head1 METHODS



( run in 0.796 second using v1.01-cache-2.11-cpan-13bb782fe5a )