AI-TensorFlow-Libtensorflow
view release on metacpan or search on metacpan
lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN
die "Status $_[0]: " . $_[0]->Message
unless $_[0]->GetCode == AI::TensorFlow::Libtensorflow::Status::OK;
return;
}
AssertOK($s);
And create helpers for converting between C<PDL> ndarrays and C<TFTensor> ndarrays.
use PDL;
use AI::TensorFlow::Libtensorflow::DataType qw(FLOAT);
use FFI::Platypus::Memory qw(memcpy);
use FFI::Platypus::Buffer qw(scalar_to_pointer);
sub FloatPDLTOTFTensor {
my ($p) = @_;
return AI::TensorFlow::Libtensorflow::Tensor->New(
FLOAT, [ reverse $p->dims ], $p->get_dataref, sub { undef $p }
);
}
sub FloatTFTensorToPDL {
my ($t) = @_;
my $pdl = zeros(float,reverse( map $t->Dim($_), 0..$t->NumDims-1 ) );
memcpy scalar_to_pointer( ${$pdl->get_dataref} ),
scalar_to_pointer( ${$t->Data} ),
$t->ByteSize;
$pdl->upd_data;
$pdl;
}
=head2 Download model and data
=over
=item *
L<Enformer model|https://tfhub.dev/deepmind/enformer/1> from
> Avsec Ž, Agarwal V, Visentin D, Ledsam JR, Grabska-Barwinska A, Taylor KR, Assael Y, Jumper J, Kohli P, Kelley DR. Effective gene expression prediction from sequence by integrating long-range interactions. I<Nat Methods>. 2021 Oct;B<18(10)>:1196...
=item *
L<Human target dataset|https://github.com/calico/basenji/tree/master/manuscripts/cross2020> from
> Kelley DR. Cross-species regulatory sequence activity prediction. I<PLoS Comput Biol>. 2020 Jul 20;B<16(7)>:e1008050. doi: L<10.1371/journal.pcbi.1008050|https://doi.org/10.1371/journal.pcbi.1008050>. PMID: L<32687525|https://pubmed.ncbi.nlm.nih....
=item *
L<UCSC hg38 genome|https://www.ncbi.nlm.nih.gov/assembly/GCA_000001405.15>. More info at L<http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/>; L<Genome Reference Consortium Human Build 38|https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26/>...
> Schneider VA, Graves-Lindsay T, Howe K, Bouk N, Chen HC, Kitts PA, Murphy TD, Pruitt KD, Thibaud-Nissen F, Albracht D, Fulton RS, Kremitzki M, Magrini V, Markovic C, McGrath S, Steinberg KM, Auger K, Chow W, Collins J, Harden G, Hubbard T, Pelan ...
=item *
L<ClinVar|https://www.ncbi.nlm.nih.gov/clinvar/> file
> Landrum MJ, Lee JM, Benson M, Brown GR, Chao C, Chitipiralla S, Gu B, Hart J, Hoffman D, Jang W, Karapetyan K, Katz K, Liu C, Maddipatla Z, Malheiro A, McDaniel K, Ovetsky M, Riley G, Zhou G, Holmes JB, Kattman BL, Maglott DR. ClinVar: improving ...
=back
# Model handle
my $model_uri = URI->new( 'https://tfhub.dev/deepmind/enformer/1' );
$model_uri->query_form( 'tf-hub-format' => 'compressed' );
my $model_base = substr( $model_uri->path, 1 ) =~ s,/,_,gr;
my $model_archive_path = "${model_base}.tar.gz";
my $model_sequence_length = 393_216; # bp
# Human targets from Basenji2 dataset
my $targets_uri = URI->new('https://raw.githubusercontent.com/calico/basenji/master/manuscripts/cross2020/targets_human.txt');
my $targets_path = 'targets_human.txt';
# Human reference genome
my $hg_uri = URI->new("http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz");
my $hg_gz_path = "hg38.fa.gz";
# From http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/md5sum.txt
my $hg_md5_digest = "1c9dcaddfa41027f17cd8f7a82c7293b";
my $clinvar_uri = URI->new('https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz');
my $clinvar_path = 'clinvar.vcf.gz';
my $http = HTTP::Tiny->new;
for my $download ( [ $model_uri => $model_archive_path ],
[ $targets_uri => $targets_path ],
[ $hg_uri => $hg_gz_path ],
[ $clinvar_uri => $clinvar_path ],) {
my ($uri, $path) = @$download;
say "Downloading $uri to $path";
next if -e $path;
$http->mirror( $uri, $path );
}
B<STREAM (STDOUT)>:
Downloading https://tfhub.dev/deepmind/enformer/1?tf-hub-format=compressed to deepmind_enformer_1.tar.gz
Downloading https://raw.githubusercontent.com/calico/basenji/master/manuscripts/cross2020/targets_human.txt to targets_human.txt
Downloading http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz to hg38.fa.gz
Downloading https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz to clinvar.vcf.gz
Now we
=over
=item 1.
extract the saved model so that we can load it and
=item 2.
check the MD5 sum of the reference genome to make sure it was downloaded correctly.
=back
use Archive::Extract;
$Archive::Extract::DEBUG = 1;
$Archive::Extract::PREFER_BIN = 1; # for the larger model, prefer bin
if( ! -e $model_base ) {
( run in 1.083 second using v1.01-cache-2.11-cpan-39bf76dae61 )