AI-TensorFlow-Libtensorflow
view release on metacpan or search on metacpan
lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN
my $encoder = append(float(0), identity(float(length($from_alphabet)-1)) );
say "Encoder is\n", $encoder->info, $encoder if $SHOW_ENCODER;
my $encoded = $encoder->index( $p->dummy(0) );
return $encoded;
}
####
{
say "Testing one-hot encoding:\n";
my $onehot_test_seq = "ACGTNtgcan";
my $test_encoded = one_hot_dna( $onehot_test_seq );
$SHOW_ENCODER = 0;
say "One-hot encoding of sequence '$onehot_test_seq' is:";
say $test_encoded->info, $test_encoded;
}
package Interval {
use Bio::Location::Simple ();
use parent qw(Bio::Location::Simple);
sub center {
my $self = shift;
my $center = int( ($self->start + $self->end ) / 2 );
my $delta = ($self->start + $self->end ) % 2;
return $center + $delta;
}
sub resize {
my ($self, $width) = @_;
my $new_interval = $self->clone;
my $center = $self->center;
my $half = int( ($width-1) / 2 );
my $offset = ($width-1) % 2;
$new_interval->start( $center - $half - $offset );
$new_interval->end( $center + $half );
return $new_interval;
}
use overload '""' => \&_op_stringify;
sub _op_stringify { sprintf "%s:%s", $_[0]->seq_id // "(no sequence)", $_[0]->to_FTstring }
}
#####
{
say "Testing interval resizing:\n";
sub _debug_resize {
my ($interval, $to, $msg) = @_;
my $resized_interval = $interval->resize($to);
die "Wrong interval size for $interval --($to)--> $resized_interval"
unless $resized_interval->length == $to;
say sprintf "Interval: %s -> %s, length %2d : %s",
$interval,
$resized_interval, $resized_interval->length,
$msg;
}
for my $interval_spec ( [4, 8], [5, 8], [5, 9], [6, 9]) {
my ($start, $end) = @$interval_spec;
my $test_interval = Interval->new( -seq_id => 'chr11', -start => $start, -end => $end );
say sprintf "Testing interval %s with length %d", $test_interval, $test_interval->length;
say "-----";
for(0..5) {
my $base = $test_interval->length;
my $to = $base + $_;
_debug_resize $test_interval, $to, "$base -> $to (+ $_)";
}
say "";
}
}
undef;
use Bio::DB::HTS::Faidx;
my $hg_db = Bio::DB::HTS::Faidx->new( $hg_bgz_path );
sub extract_sequence {
my ($db, $interval) = @_;
my $chrom_length = $db->length($interval->seq_id);
my $trimmed_interval = $interval->clone;
$trimmed_interval->start( List::Util::max( $interval->start, 1 ) );
$trimmed_interval->end( List::Util::min( $interval->end , $chrom_length ) );
# Bio::DB::HTS::Faidx is 0-based for both start and end points
my $seq = $db->get_sequence2_no_length(
$trimmed_interval->seq_id,
$trimmed_interval->start - 1,
$trimmed_interval->end - 1,
);
my $pad_upstream = 'N' x List::Util::max( -($interval->start-1), 0 );
my $pad_downstream = 'N' x List::Util::max( $interval->end - $chrom_length, 0 );
return join '', $pad_upstream, $seq, $pad_downstream;
}
sub seq_info {
my ($seq, $n) = @_;
$n ||= 10;
if( length $seq > $n ) {
sprintf "%s...%s (length %d)", uc substr($seq, 0, $n), uc substr($seq, -$n), length $seq;
} else {
sprintf "%s (length %d)", uc $seq, length $seq;
}
}
####
{
say "Testing sequence extraction:";
say "1 base: ", seq_info
extract_sequence( $hg_db,
Interval->new( -seq_id => 'chr11',
-start => 35_082_742 + 1,
-end => 35_082_742 + 1 ) );
say "3 bases: ", seq_info
extract_sequence( $hg_db,
Interval->new( -seq_id => 'chr11',
-start => 1,
lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN
[1 0 0 0]
[0 0 0 0]
]
B<RESULT>:
1
Note that in the above, the PDL ndarray's
=over
=item *
first dimension is 4 which matches the last dimension of the input C<TFTensor>;
=item *
second dimension is the sequence length which matches the penultimate dimension of the input C<TFTensor>.
=back
Now we need a way to deal with the sequence interval. We're going to use 1-based coordinates as BioPerl does. In fact, we'll extend a BioPerl class.
package Interval {
use Bio::Location::Simple ();
use parent qw(Bio::Location::Simple);
sub center {
my $self = shift;
my $center = int( ($self->start + $self->end ) / 2 );
my $delta = ($self->start + $self->end ) % 2;
return $center + $delta;
}
sub resize {
my ($self, $width) = @_;
my $new_interval = $self->clone;
my $center = $self->center;
my $half = int( ($width-1) / 2 );
my $offset = ($width-1) % 2;
$new_interval->start( $center - $half - $offset );
$new_interval->end( $center + $half );
return $new_interval;
}
use overload '""' => \&_op_stringify;
sub _op_stringify { sprintf "%s:%s", $_[0]->seq_id // "(no sequence)", $_[0]->to_FTstring }
}
#####
{
say "Testing interval resizing:\n";
sub _debug_resize {
my ($interval, $to, $msg) = @_;
my $resized_interval = $interval->resize($to);
die "Wrong interval size for $interval --($to)--> $resized_interval"
unless $resized_interval->length == $to;
say sprintf "Interval: %s -> %s, length %2d : %s",
$interval,
$resized_interval, $resized_interval->length,
$msg;
}
for my $interval_spec ( [4, 8], [5, 8], [5, 9], [6, 9]) {
my ($start, $end) = @$interval_spec;
my $test_interval = Interval->new( -seq_id => 'chr11', -start => $start, -end => $end );
say sprintf "Testing interval %s with length %d", $test_interval, $test_interval->length;
say "-----";
for(0..5) {
my $base = $test_interval->length;
my $to = $base + $_;
_debug_resize $test_interval, $to, "$base -> $to (+ $_)";
}
say "";
}
}
undef;
B<STREAM (STDOUT)>:
Testing interval resizing:
Testing interval chr11:4..8 with length 5
-----
Interval: chr11:4..8 -> chr11:4..8, length 5 : 5 -> 5 (+ 0)
Interval: chr11:4..8 -> chr11:3..8, length 6 : 5 -> 6 (+ 1)
Interval: chr11:4..8 -> chr11:3..9, length 7 : 5 -> 7 (+ 2)
Interval: chr11:4..8 -> chr11:2..9, length 8 : 5 -> 8 (+ 3)
Interval: chr11:4..8 -> chr11:2..10, length 9 : 5 -> 9 (+ 4)
Interval: chr11:4..8 -> chr11:1..10, length 10 : 5 -> 10 (+ 5)
Testing interval chr11:5..8 with length 4
-----
Interval: chr11:5..8 -> chr11:5..8, length 4 : 4 -> 4 (+ 0)
Interval: chr11:5..8 -> chr11:5..9, length 5 : 4 -> 5 (+ 1)
Interval: chr11:5..8 -> chr11:4..9, length 6 : 4 -> 6 (+ 2)
Interval: chr11:5..8 -> chr11:4..10, length 7 : 4 -> 7 (+ 3)
Interval: chr11:5..8 -> chr11:3..10, length 8 : 4 -> 8 (+ 4)
Interval: chr11:5..8 -> chr11:3..11, length 9 : 4 -> 9 (+ 5)
Testing interval chr11:5..9 with length 5
-----
Interval: chr11:5..9 -> chr11:5..9, length 5 : 5 -> 5 (+ 0)
Interval: chr11:5..9 -> chr11:4..9, length 6 : 5 -> 6 (+ 1)
Interval: chr11:5..9 -> chr11:4..10, length 7 : 5 -> 7 (+ 2)
Interval: chr11:5..9 -> chr11:3..10, length 8 : 5 -> 8 (+ 3)
Interval: chr11:5..9 -> chr11:3..11, length 9 : 5 -> 9 (+ 4)
Interval: chr11:5..9 -> chr11:2..11, length 10 : 5 -> 10 (+ 5)
Testing interval chr11:6..9 with length 4
-----
Interval: chr11:6..9 -> chr11:6..9, length 4 : 4 -> 4 (+ 0)
Interval: chr11:6..9 -> chr11:6..10, length 5 : 4 -> 5 (+ 1)
Interval: chr11:6..9 -> chr11:5..10, length 6 : 4 -> 6 (+ 2)
Interval: chr11:6..9 -> chr11:5..11, length 7 : 4 -> 7 (+ 3)
Interval: chr11:6..9 -> chr11:4..11, length 8 : 4 -> 8 (+ 4)
Interval: chr11:6..9 -> chr11:4..12, length 9 : 4 -> 9 (+ 5)
use Bio::DB::HTS::Faidx;
my $hg_db = Bio::DB::HTS::Faidx->new( $hg_bgz_path );
sub extract_sequence {
my ($db, $interval) = @_;
my $chrom_length = $db->length($interval->seq_id);
my $trimmed_interval = $interval->clone;
( run in 1.224 second using v1.01-cache-2.11-cpan-0bd6704ced7 )