AI-TensorFlow-Libtensorflow

 view release on metacpan or  search on metacpan

lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod  view on Meta::CPAN

        Interval->new( -seq_id => 'chr11',
            -start => 1,
            -end   => $hg_db->length('chr11') )->resize( $hg_db->length('chr11') ) );
}

my $target_interval = Interval->new( -seq_id => 'chr11',
    -start => 35_082_742 +  1, # BioPerl is 1-based
    -end   => 35_197_430 );

say "Target interval: $target_interval with length @{[ $target_interval->length ]}";

die "Target interval is not $model_central_base_pairs_length bp long"
    unless $target_interval->length == $model_central_base_pairs_length;

say "Target sequence is ", seq_info extract_sequence( $hg_db, $target_interval );


say "";


my $resized_interval = $target_interval->resize( $model_sequence_length );
say "Resized interval: $resized_interval with length @{[ $resized_interval->length ]}";

die "resize() is not working properly!" unless $resized_interval->length == $model_sequence_length;

my $seq = extract_sequence( $hg_db, $resized_interval );

say "Resized sequence is ", seq_info($seq);

my $sequence_one_hot = one_hot_dna( $seq )->dummy(-1);

say $sequence_one_hot->info; undef;

use Devel::Timer;
my $t = Devel::Timer->new;

$t->mark('prediction of sequence');

my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );

$t->mark('End of prediction of sequence');

p $predictions;

$t->mark('END');
$t->report();

my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
say $predictions_p->info; undef;

my @tracks = (
    [ 'DNASE:CD14-positive monocyte female' =>   41 => $predictions_p->slice('(41)') ],
    [ 'DNASE:keratinocyte female'           =>   42 => $predictions_p->slice('(42)') ],
    [ 'CHIP:H3K27ac:keratinocyte female'    =>  706 => $predictions_p->slice('(706)')],
    [ 'CAGE:Keratinocyte - epidermal'       => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
);

use PDL::Graphics::Gnuplot;

my $plot_output_path = 'enformer-target-interval-tracks.png';
my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );

$gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );

$gp->options(
    offsets => [ graph => "0.01, 0, 0, 0" ],
    lmargin => "at screen 0.05",
);

my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);

my @tics_opts = (mirror => 0, out => 1);

for my $i (0..$#tracks) {
    my ($title, $id, $y) = @{$tracks[$i]};
    $gp->plot( {
            title => $title,
            border => [2],
            ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
            ( $i == $#tracks
                ? ( xtics => { format => '%.3f', @tics_opts } )
                : ( xtics => 0 ) ),
            ( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : ()  ),

        },
        with => 'filledcurves',
        #'lc' => '#086eb5',

        # $x scaled by 1e7; filled curve between $y and the x-axis
        $x / 1e7, $y, pdl(0)
    );
}

$gp->end_multi;

$gp->close;

if( IN_IPERL ) {
    IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
}

# Some code that could be used for working with variants.
1 if <<'COMMENT';

use Bio::DB::HTS::VCF;

my $clinvar_tbi_path = "${clinvar_path}.tbi";
unless( -f $clinvar_tbi_path ) {
    system( qw(tabix), $clinvar_path );
}
my $v = Bio::DB::HTS::VCF->new( filename => $clinvar_path );
$v->num_variants

COMMENT

undef;

use Filesys::DiskUsage qw/du/;

my $total = du( { 'human-readable' => 1, dereference => 1 },
    $model_archive_path, $model_base, $new_model_base,

lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod  view on Meta::CPAN


  use Devel::Timer;
  my $t = Devel::Timer->new;
  
  $t->mark('prediction of sequence');
  
  my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );
  
  $t->mark('End of prediction of sequence');
  
  p $predictions;
  
  $t->mark('END');
  $t->report();

B<STREAM (STDERR)>:

=begin html

<span style="display:inline-block;margin-left:1em;"><pre style="display: block"><code><span style="color: #cc66cc;">AI::TensorFlow::Libtensorflow::Tensor</span><span style=""> </span><span style="color: #33ccff;">{</span><span style="">
    </span><span style="color: #6666cc;">Type           </span><span style=""> </span><span style="color: #cc66cc;">FLOAT</span><span style="">
    </span><span style="color: #6666cc;">Dims           </span><span style=""> </span><span style="color: #33ccff;">[</span><span style=""> </span><span style="color: #ff6633;">1</span><span style=""> </span><span style="color: #ff6633;">896</span><s...
    </span><span style="color: #6666cc;">NumDims        </span><span style=""> </span><span style="color: #ff6633;">3</span><span style="">
    </span><span style="color: #6666cc;">ElementCount   </span><span style=""> </span><span style="color: #ff6633;">4760448</span><span style="">
</span><span style="color: #33ccff;">}</span><span style="">


Devel::Timer Report -- Total time: 14.5641 secs
Interval  Time    Percent
----------------------------------------------
01 -&gt; 02  14.5634  100.00%  prediction of sequence -&gt; End of prediction of sequence
02 -&gt; 03  0.0007   0.00%  End of prediction of sequence -&gt; END
00 -&gt; 01  0.0000   0.00%  INIT -&gt; prediction of sequence
</span></code></pre></span>

=end html

Now we turn the C<TFTensor> output into a PDL ndarray.

  my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
  say $predictions_p->info; undef;

B<STREAM (STDOUT)>:

  PDL: Float D [5313,896]

=head2 Plot predicted tracks

These predictions can be plotted 

  my @tracks = (
      [ 'DNASE:CD14-positive monocyte female' =>   41 => $predictions_p->slice('(41)') ],
      [ 'DNASE:keratinocyte female'           =>   42 => $predictions_p->slice('(42)') ],
      [ 'CHIP:H3K27ac:keratinocyte female'    =>  706 => $predictions_p->slice('(706)')],
      [ 'CAGE:Keratinocyte - epidermal'       => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
  );
  
  use PDL::Graphics::Gnuplot;
  
  my $plot_output_path = 'enformer-target-interval-tracks.png';
  my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );
  
  $gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );
  
  $gp->options(
      offsets => [ graph => "0.01, 0, 0, 0" ],
      lmargin => "at screen 0.05",
  );
  
  my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);
  
  my @tics_opts = (mirror => 0, out => 1);
  
  for my $i (0..$#tracks) {
      my ($title, $id, $y) = @{$tracks[$i]};
      $gp->plot( {
              title => $title,
              border => [2],
              ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
              ( $i == $#tracks
                  ? ( xtics => { format => '%.3f', @tics_opts } )
                  : ( xtics => 0 ) ),
              ( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : ()  ),
  
          },
          with => 'filledcurves',
          #'lc' => '#086eb5',
  
          # $x scaled by 1e7; filled curve between $y and the x-axis
          $x / 1e7, $y, pdl(0)
      );
  }
  
  $gp->end_multi;
  
  $gp->close;
  
  if( IN_IPERL ) {
      IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
  }

B<DISPLAY>:

=for html <span style="display:inline-block;margin-left:1em;"><p><img						src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA+gAAAMgCAIAAAA/et9qAAAgAElEQVR4nOzdd2AUVeIH8Ddb0jshBAIEpSo1GjoIpyAgCOqd3uGdoGBBUQQFRUVBRbkTf9gOBQucqFiwUhSSgJQYCCSBkJBAet1k...

=head2 Parts of the original notebook that fall outside the scope

In the orignal notebook, there are several more steps that have not been ported here:

=over

=item 1.

"Compute contribution scores":

This task requires implementing C<@tf.function> to compile gradients.

=item 2.

"Predict the effect of a genetic variant" and "Score multiple variants":



( run in 1.194 second using v1.01-cache-2.11-cpan-5a3173703d6 )