AI-TensorFlow-Libtensorflow
view release on metacpan or search on metacpan
lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN
Interval->new( -seq_id => 'chr11',
-start => 1,
-end => $hg_db->length('chr11') )->resize( $hg_db->length('chr11') ) );
}
my $target_interval = Interval->new( -seq_id => 'chr11',
-start => 35_082_742 + 1, # BioPerl is 1-based
-end => 35_197_430 );
say "Target interval: $target_interval with length @{[ $target_interval->length ]}";
die "Target interval is not $model_central_base_pairs_length bp long"
unless $target_interval->length == $model_central_base_pairs_length;
say "Target sequence is ", seq_info extract_sequence( $hg_db, $target_interval );
say "";
my $resized_interval = $target_interval->resize( $model_sequence_length );
say "Resized interval: $resized_interval with length @{[ $resized_interval->length ]}";
die "resize() is not working properly!" unless $resized_interval->length == $model_sequence_length;
my $seq = extract_sequence( $hg_db, $resized_interval );
say "Resized sequence is ", seq_info($seq);
my $sequence_one_hot = one_hot_dna( $seq )->dummy(-1);
say $sequence_one_hot->info; undef;
use Devel::Timer;
my $t = Devel::Timer->new;
$t->mark('prediction of sequence');
my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );
$t->mark('End of prediction of sequence');
p $predictions;
$t->mark('END');
$t->report();
my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
say $predictions_p->info; undef;
my @tracks = (
[ 'DNASE:CD14-positive monocyte female' => 41 => $predictions_p->slice('(41)') ],
[ 'DNASE:keratinocyte female' => 42 => $predictions_p->slice('(42)') ],
[ 'CHIP:H3K27ac:keratinocyte female' => 706 => $predictions_p->slice('(706)')],
[ 'CAGE:Keratinocyte - epidermal' => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
);
use PDL::Graphics::Gnuplot;
my $plot_output_path = 'enformer-target-interval-tracks.png';
my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );
$gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );
$gp->options(
offsets => [ graph => "0.01, 0, 0, 0" ],
lmargin => "at screen 0.05",
);
my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);
my @tics_opts = (mirror => 0, out => 1);
for my $i (0..$#tracks) {
my ($title, $id, $y) = @{$tracks[$i]};
$gp->plot( {
title => $title,
border => [2],
ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
( $i == $#tracks
? ( xtics => { format => '%.3f', @tics_opts } )
: ( xtics => 0 ) ),
( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : () ),
},
with => 'filledcurves',
#'lc' => '#086eb5',
# $x scaled by 1e7; filled curve between $y and the x-axis
$x / 1e7, $y, pdl(0)
);
}
$gp->end_multi;
$gp->close;
if( IN_IPERL ) {
IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
}
# Some code that could be used for working with variants.
1 if <<'COMMENT';
use Bio::DB::HTS::VCF;
my $clinvar_tbi_path = "${clinvar_path}.tbi";
unless( -f $clinvar_tbi_path ) {
system( qw(tabix), $clinvar_path );
}
my $v = Bio::DB::HTS::VCF->new( filename => $clinvar_path );
$v->num_variants
COMMENT
undef;
use Filesys::DiskUsage qw/du/;
my $total = du( { 'human-readable' => 1, dereference => 1 },
$model_archive_path, $model_base, $new_model_base,
lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN
use Devel::Timer;
my $t = Devel::Timer->new;
$t->mark('prediction of sequence');
my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );
$t->mark('End of prediction of sequence');
p $predictions;
$t->mark('END');
$t->report();
B<STREAM (STDERR)>:
=begin html
<span style="display:inline-block;margin-left:1em;"><pre style="display: block"><code><span style="color: #cc66cc;">AI::TensorFlow::Libtensorflow::Tensor</span><span style=""> </span><span style="color: #33ccff;">{</span><span style="">
</span><span style="color: #6666cc;">Type </span><span style=""> </span><span style="color: #cc66cc;">FLOAT</span><span style="">
</span><span style="color: #6666cc;">Dims </span><span style=""> </span><span style="color: #33ccff;">[</span><span style=""> </span><span style="color: #ff6633;">1</span><span style=""> </span><span style="color: #ff6633;">896</span><s...
</span><span style="color: #6666cc;">NumDims </span><span style=""> </span><span style="color: #ff6633;">3</span><span style="">
</span><span style="color: #6666cc;">ElementCount </span><span style=""> </span><span style="color: #ff6633;">4760448</span><span style="">
</span><span style="color: #33ccff;">}</span><span style="">
Devel::Timer Report -- Total time: 14.5641 secs
Interval Time Percent
----------------------------------------------
01 -> 02 14.5634 100.00% prediction of sequence -> End of prediction of sequence
02 -> 03 0.0007 0.00% End of prediction of sequence -> END
00 -> 01 0.0000 0.00% INIT -> prediction of sequence
</span></code></pre></span>
=end html
Now we turn the C<TFTensor> output into a PDL ndarray.
my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
say $predictions_p->info; undef;
B<STREAM (STDOUT)>:
PDL: Float D [5313,896]
=head2 Plot predicted tracks
These predictions can be plotted
my @tracks = (
[ 'DNASE:CD14-positive monocyte female' => 41 => $predictions_p->slice('(41)') ],
[ 'DNASE:keratinocyte female' => 42 => $predictions_p->slice('(42)') ],
[ 'CHIP:H3K27ac:keratinocyte female' => 706 => $predictions_p->slice('(706)')],
[ 'CAGE:Keratinocyte - epidermal' => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
);
use PDL::Graphics::Gnuplot;
my $plot_output_path = 'enformer-target-interval-tracks.png';
my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );
$gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );
$gp->options(
offsets => [ graph => "0.01, 0, 0, 0" ],
lmargin => "at screen 0.05",
);
my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);
my @tics_opts = (mirror => 0, out => 1);
for my $i (0..$#tracks) {
my ($title, $id, $y) = @{$tracks[$i]};
$gp->plot( {
title => $title,
border => [2],
ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
( $i == $#tracks
? ( xtics => { format => '%.3f', @tics_opts } )
: ( xtics => 0 ) ),
( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : () ),
},
with => 'filledcurves',
#'lc' => '#086eb5',
# $x scaled by 1e7; filled curve between $y and the x-axis
$x / 1e7, $y, pdl(0)
);
}
$gp->end_multi;
$gp->close;
if( IN_IPERL ) {
IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
}
B<DISPLAY>:
=for html <span style="display:inline-block;margin-left:1em;"><p><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA+gAAAMgCAIAAAA/et9qAAAgAElEQVR4nOzdd2AUVeIH8Ddb0jshBAIEpSo1GjoIpyAgCOqd3uGdoGBBUQQFRUVBRbkTf9gOBQucqFiwUhSSgJQYCCSBkJBAet1k...
=head2 Parts of the original notebook that fall outside the scope
In the orignal notebook, there are several more steps that have not been ported here:
=over
=item 1.
"Compute contribution scores":
This task requires implementing C<@tf.function> to compile gradients.
=item 2.
"Predict the effect of a genetic variant" and "Score multiple variants":
( run in 1.194 second using v1.01-cache-2.11-cpan-5a3173703d6 )