start results from the CPAN

AI-TensorFlow-Libtensorflow

        undef,
        $s
    );
    AssertOK($s);

    return $outputs_t[0];
};

undef;

use PDL;

our $SHOW_ENCODER = 1;

sub one_hot_dna {
    my ($seq) = @_;

    my $from_alphabet = "NACGT";
    my $to_alphabet   = pack "C*", 0..length($from_alphabet)-1;

    # sequences from UCSC genome have both uppercase and lowercase bases
    my $from_alphabet_tr = $from_alphabet . lc $from_alphabet;
    my $to_alphabet_tr   = $to_alphabet x 2;

    my $p = zeros(byte, bytes::length($seq));
    my $p_dataref = $p->get_dataref;
    ${ $p_dataref } = $seq;
    eval "tr/$from_alphabet_tr/$to_alphabet_tr/" for ${ $p_dataref };
    $p->upd_data;

    my $encoder = append(float(0), identity(float(length($from_alphabet)-1)) );
    say "Encoder is\n", $encoder->info, $encoder if $SHOW_ENCODER;

    my $encoded  = $encoder->index( $p->dummy(0) );

    return $encoded;
}

####

{

say "Testing one-hot encoding:\n";

my $onehot_test_seq = "ACGTNtgcan";
my $test_encoded = one_hot_dna( $onehot_test_seq );
$SHOW_ENCODER = 0;

say "One-hot encoding of sequence '$onehot_test_seq' is:";
say $test_encoded->info, $test_encoded;

}

package Interval {
    use Bio::Location::Simple ();

    use parent qw(Bio::Location::Simple);

    sub center {
        my $self = shift;
        my $center = int( ($self->start + $self->end ) / 2 );
        my $delta = ($self->start + $self->end ) % 2;
        return $center + $delta;
    }

    sub resize {
        my ($self, $width) = @_;
        my $new_interval = $self->clone;

        my $center = $self->center;
        my $half   = int( ($width-1) / 2 );
        my $offset = ($width-1) % 2;

        $new_interval->start( $center - $half - $offset );
        $new_interval->end(   $center + $half  );

        return $new_interval;
    }

    use overload '""' => \&_op_stringify;

    sub _op_stringify { sprintf "%s:%s", $_[0]->seq_id // "(no sequence)", $_[0]->to_FTstring }
}

#####

{

say "Testing interval resizing:\n";
sub _debug_resize {
    my ($interval, $to, $msg) = @_;

    my $resized_interval = $interval->resize($to);

    die "Wrong interval size for $interval --($to)--> $resized_interval"
        unless $resized_interval->length == $to;

    say sprintf "Interval: %s -> %s, length %2d : %s",
        $interval,
        $resized_interval, $resized_interval->length,
        $msg;
}

for my $interval_spec ( [4, 8], [5, 8], [5, 9], [6, 9]) {
    my ($start, $end) = @$interval_spec;
    my $test_interval = Interval->new( -seq_id => 'chr11', -start => $start, -end => $end );
    say sprintf "Testing interval %s with length %d", $test_interval, $test_interval->length;
    say "-----";
    for(0..5) {
        my $base = $test_interval->length;
        my $to = $base + $_;
        _debug_resize $test_interval, $to, "$base -> $to (+ $_)";
    }
    say "";
}

}

undef;

use Bio::DB::HTS::Faidx;

my $hg_db = Bio::DB::HTS::Faidx->new( $hg_bgz_path );

sub extract_sequence {
    my ($db, $interval) = @_;

    my $chrom_length = $db->length($interval->seq_id);

    my $trimmed_interval = $interval->clone;
    $trimmed_interval->start( List::Util::max( $interval->start, 1               ) );
    $trimmed_interval->end(   List::Util::min( $interval->end  , $chrom_length   ) );

    # Bio::DB::HTS::Faidx is 0-based for both start and end points
    my $seq = $db->get_sequence2_no_length(
        $trimmed_interval->seq_id,
        $trimmed_interval->start - 1,
        $trimmed_interval->end   - 1,
    );

    my $pad_upstream   = 'N' x List::Util::max( -($interval->start-1), 0 );
    my $pad_downstream = 'N' x List::Util::max( $interval->end - $chrom_length, 0 );

    return join '', $pad_upstream, $seq, $pad_downstream;
}

sub seq_info {
    my ($seq, $n) = @_;
    $n ||= 10;
    if( length $seq > $n ) {
        sprintf "%s...%s (length %d)", uc substr($seq, 0, $n), uc substr($seq, -$n), length $seq;
    } else {
        sprintf "%s (length %d)", uc $seq, length $seq;
    }
}

####

{

say "Testing sequence extraction:";

say "1 base: ",   seq_info
    extract_sequence( $hg_db,
        Interval->new( -seq_id => 'chr11',
            -start => 35_082_742 + 1,
            -end   => 35_082_742 + 1 ) );

say "3 bases: ",  seq_info
    extract_sequence( $hg_db,
        Interval->new( -seq_id => 'chr11',
            -start => 1,
            -end   => 1 )->resize(3) );

say "5 bases: ", seq_info
    extract_sequence( $hg_db,
        Interval->new( -seq_id => 'chr11',
            -start => $hg_db->length('chr11'),
            -end   => $hg_db->length('chr11') )->resize(5) );

say "chr11 is of length ", $hg_db->length('chr11');
say "chr11 bases: ", seq_info
    extract_sequence( $hg_db,
        Interval->new( -seq_id => 'chr11',
            -start => 1,
            -end   => $hg_db->length('chr11') )->resize( $hg_db->length('chr11') ) );
}

my $target_interval = Interval->new( -seq_id => 'chr11',
    -start => 35_082_742 +  1, # BioPerl is 1-based
    -end   => 35_197_430 );

say "Target interval: $target_interval with length @{[ $target_interval->length ]}";

die "Target interval is not $model_central_base_pairs_length bp long"
    unless $target_interval->length == $model_central_base_pairs_length;

say "Target sequence is ", seq_info extract_sequence( $hg_db, $target_interval );


say "";


my $resized_interval = $target_interval->resize( $model_sequence_length );
say "Resized interval: $resized_interval with length @{[ $resized_interval->length ]}";

die "resize() is not working properly!" unless $resized_interval->length == $model_sequence_length;

my $seq = extract_sequence( $hg_db, $resized_interval );

say "Resized sequence is ", seq_info($seq);

my $sequence_one_hot = one_hot_dna( $seq )->dummy(-1);

say $sequence_one_hot->info; undef;

use Devel::Timer;
my $t = Devel::Timer->new;

$t->mark('prediction of sequence');

my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );

$t->mark('End of prediction of sequence');

p $predictions;

$t->mark('END');
$t->report();

my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
say $predictions_p->info; undef;

my @tracks = (
    [ 'DNASE:CD14-positive monocyte female' =>   41 => $predictions_p->slice('(41)') ],
    [ 'DNASE:keratinocyte female'           =>   42 => $predictions_p->slice('(42)') ],
    [ 'CHIP:H3K27ac:keratinocyte female'    =>  706 => $predictions_p->slice('(706)')],
    [ 'CAGE:Keratinocyte - epidermal'       => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
);

use PDL::Graphics::Gnuplot;

my $plot_output_path = 'enformer-target-interval-tracks.png';
my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );

$gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );

$gp->options(
    offsets => [ graph => "0.01, 0, 0, 0" ],
    lmargin => "at screen 0.05",
);

my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);

my @tics_opts = (mirror => 0, out => 1);

for my $i (0..$#tracks) {
    my ($title, $id, $y) = @{$tracks[$i]};
    $gp->plot( {
            title => $title,
            border => [2],
            ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
            ( $i == $#tracks
                ? ( xtics => { format => '%.3f', @tics_opts } )
                : ( xtics => 0 ) ),
            ( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : ()  ),

        },
        with => 'filledcurves',
        #'lc' => '#086eb5',

        # $x scaled by 1e7; filled curve between $y and the x-axis
        $x / 1e7, $y, pdl(0)
    );
}

$gp->end_multi;

$gp->close;

if( IN_IPERL ) {
    IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
}

# Some code that could be used for working with variants.
1 if <<'COMMENT';

use Bio::DB::HTS::VCF;

my $clinvar_tbi_path = "${clinvar_path}.tbi";
unless( -f $clinvar_tbi_path ) {
    system( qw(tabix), $clinvar_path );
}
my $v = Bio::DB::HTS::VCF->new( filename => $clinvar_path );
$v->num_variants

COMMENT

undef;

use Filesys::DiskUsage qw/du/;

my $total = du( { 'human-readable' => 1, dereference => 1 },
    $model_archive_path, $model_base, $new_model_base,

    $targets_path,

    $hg_gz_path,
    $hg_bgz_path, $hg_bgz_fai_path,

    $clinvar_path,

    $plot_output_path,

lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubEnformerGeneExprPredModel.pod view on Meta::CPAN

  say "One-hot encoding of sequence '$onehot_test_seq' is:";
  say $test_encoded->info, $test_encoded;
  
  }

B<STREAM (STDOUT)>:

  Testing one-hot encoding:
  
  Encoder is
  PDL: Float D [5,4]
  [
   [0 1 0 0 0]
   [0 0 1 0 0]
   [0 0 0 1 0]
   [0 0 0 0 1]
  ]
  
  One-hot encoding of sequence 'ACGTNtgcan' is:
  PDL: Float D [4,10]
  [
   [1 0 0 0]
   [0 1 0 0]
   [0 0 1 0]
   [0 0 0 1]
   [0 0 0 0]
   [0 0 0 1]
   [0 0 1 0]
   [0 1 0 0]
   [1 0 0 0]
   [0 0 0 0]
  ]

B<RESULT>:

  1

Note that in the above, the PDL ndarray's

=over

=item *

first dimension is 4 which matches the last dimension of the input C<TFTensor>;

=item *

second dimension is the sequence length which matches the penultimate dimension of the input C<TFTensor>.

=back

Now we need a way to deal with the sequence interval. We're going to use 1-based coordinates as BioPerl does. In fact, we'll extend a BioPerl class.

  package Interval {
      use Bio::Location::Simple ();
  
      use parent qw(Bio::Location::Simple);
  
      sub center {
          my $self = shift;
          my $center = int( ($self->start + $self->end ) / 2 );
          my $delta = ($self->start + $self->end ) % 2;
          return $center + $delta;
      }
  
      sub resize {
          my ($self, $width) = @_;
          my $new_interval = $self->clone;
  
          my $center = $self->center;
          my $half   = int( ($width-1) / 2 );
          my $offset = ($width-1) % 2;
  
          $new_interval->start( $center - $half - $offset );
          $new_interval->end(   $center + $half  );
  
          return $new_interval;
      }
  
      use overload '""' => \&_op_stringify;
  
      sub _op_stringify { sprintf "%s:%s", $_[0]->seq_id // "(no sequence)", $_[0]->to_FTstring }
  }
  
  #####
  
  {
  
  say "Testing interval resizing:\n";
  sub _debug_resize {
      my ($interval, $to, $msg) = @_;
  
      my $resized_interval = $interval->resize($to);
  
      die "Wrong interval size for $interval --($to)--> $resized_interval"
          unless $resized_interval->length == $to;
  
      say sprintf "Interval: %s -> %s, length %2d : %s",
          $interval,
          $resized_interval, $resized_interval->length,
          $msg;
  }
  
  for my $interval_spec ( [4, 8], [5, 8], [5, 9], [6, 9]) {
      my ($start, $end) = @$interval_spec;
      my $test_interval = Interval->new( -seq_id => 'chr11', -start => $start, -end => $end );
      say sprintf "Testing interval %s with length %d", $test_interval, $test_interval->length;
      say "-----";
      for(0..5) {
          my $base = $test_interval->length;
          my $to = $base + $_;
          _debug_resize $test_interval, $to, "$base -> $to (+ $_)";
      }
      say "";
  }
  
  }
  
  undef;

B<STREAM (STDOUT)>:

  Testing interval resizing:
  
  Testing interval chr11:4..8 with length 5
  -----
  Interval: chr11:4..8 -> chr11:4..8, length  5 : 5 -> 5 (+ 0)
  Interval: chr11:4..8 -> chr11:3..8, length  6 : 5 -> 6 (+ 1)
  Interval: chr11:4..8 -> chr11:3..9, length  7 : 5 -> 7 (+ 2)
  Interval: chr11:4..8 -> chr11:2..9, length  8 : 5 -> 8 (+ 3)
  Interval: chr11:4..8 -> chr11:2..10, length  9 : 5 -> 9 (+ 4)
  Interval: chr11:4..8 -> chr11:1..10, length 10 : 5 -> 10 (+ 5)
  
  Testing interval chr11:5..8 with length 4
  -----
  Interval: chr11:5..8 -> chr11:5..8, length  4 : 4 -> 4 (+ 0)
  Interval: chr11:5..8 -> chr11:5..9, length  5 : 4 -> 5 (+ 1)
  Interval: chr11:5..8 -> chr11:4..9, length  6 : 4 -> 6 (+ 2)
  Interval: chr11:5..8 -> chr11:4..10, length  7 : 4 -> 7 (+ 3)
  Interval: chr11:5..8 -> chr11:3..10, length  8 : 4 -> 8 (+ 4)
  Interval: chr11:5..8 -> chr11:3..11, length  9 : 4 -> 9 (+ 5)
  
  Testing interval chr11:5..9 with length 5
  -----
  Interval: chr11:5..9 -> chr11:5..9, length  5 : 5 -> 5 (+ 0)
  Interval: chr11:5..9 -> chr11:4..9, length  6 : 5 -> 6 (+ 1)
  Interval: chr11:5..9 -> chr11:4..10, length  7 : 5 -> 7 (+ 2)
  Interval: chr11:5..9 -> chr11:3..10, length  8 : 5 -> 8 (+ 3)
  Interval: chr11:5..9 -> chr11:3..11, length  9 : 5 -> 9 (+ 4)
  Interval: chr11:5..9 -> chr11:2..11, length 10 : 5 -> 10 (+ 5)
  
  Testing interval chr11:6..9 with length 4
  -----
  Interval: chr11:6..9 -> chr11:6..9, length  4 : 4 -> 4 (+ 0)
  Interval: chr11:6..9 -> chr11:6..10, length  5 : 4 -> 5 (+ 1)
  Interval: chr11:6..9 -> chr11:5..10, length  6 : 4 -> 6 (+ 2)
  Interval: chr11:6..9 -> chr11:5..11, length  7 : 4 -> 7 (+ 3)
  Interval: chr11:6..9 -> chr11:4..11, length  8 : 4 -> 8 (+ 4)
  Interval: chr11:6..9 -> chr11:4..12, length  9 : 4 -> 9 (+ 5)
  


  use Bio::DB::HTS::Faidx;
  
  my $hg_db = Bio::DB::HTS::Faidx->new( $hg_bgz_path );
  
  sub extract_sequence {
      my ($db, $interval) = @_;
  
      my $chrom_length = $db->length($interval->seq_id);
  
      my $trimmed_interval = $interval->clone;
      $trimmed_interval->start( List::Util::max( $interval->start, 1               ) );
      $trimmed_interval->end(   List::Util::min( $interval->end  , $chrom_length   ) );
  
      # Bio::DB::HTS::Faidx is 0-based for both start and end points
      my $seq = $db->get_sequence2_no_length(
          $trimmed_interval->seq_id,
          $trimmed_interval->start - 1,
          $trimmed_interval->end   - 1,
      );
  
      my $pad_upstream   = 'N' x List::Util::max( -($interval->start-1), 0 );
      my $pad_downstream = 'N' x List::Util::max( $interval->end - $chrom_length, 0 );
  
      return join '', $pad_upstream, $seq, $pad_downstream;
  }
  
  sub seq_info {
      my ($seq, $n) = @_;
      $n ||= 10;
      if( length $seq > $n ) {
          sprintf "%s...%s (length %d)", uc substr($seq, 0, $n), uc substr($seq, -$n), length $seq;
      } else {
          sprintf "%s (length %d)", uc $seq, length $seq;
      }
  }
  
  ####
  
  {
  
  say "Testing sequence extraction:";
  
  say "1 base: ",   seq_info
      extract_sequence( $hg_db,
          Interval->new( -seq_id => 'chr11',
              -start => 35_082_742 + 1,
              -end   => 35_082_742 + 1 ) );
  
  say "3 bases: ",  seq_info
      extract_sequence( $hg_db,
          Interval->new( -seq_id => 'chr11',
              -start => 1,
              -end   => 1 )->resize(3) );
  
  say "5 bases: ", seq_info
      extract_sequence( $hg_db,
          Interval->new( -seq_id => 'chr11',
              -start => $hg_db->length('chr11'),
              -end   => $hg_db->length('chr11') )->resize(5) );
  
  say "chr11 is of length ", $hg_db->length('chr11');
  say "chr11 bases: ", seq_info
      extract_sequence( $hg_db,
          Interval->new( -seq_id => 'chr11',
              -start => 1,
              -end   => $hg_db->length('chr11') )->resize( $hg_db->length('chr11') ) );
  }

B<STREAM (STDOUT)>:

  Testing sequence extraction:
  1 base: G (length 1)
  3 bases: NNN (length 3)
  5 bases: NNNNN (length 5)
  chr11 is of length 135086622
  chr11 bases: NNNNNNNNNN...NNNNNNNNNN (length 135086622)

B<RESULT>:

  1

Now we can use the same target interval that is used in the example notebook which recreates part of L<figure 1|https://www.nature.com/articles/s41592-021-01252-x/figures/1> from the Enformer paper.

  my $target_interval = Interval->new( -seq_id => 'chr11',
      -start => 35_082_742 +  1, # BioPerl is 1-based
      -end   => 35_197_430 );
  
  say "Target interval: $target_interval with length @{[ $target_interval->length ]}";
  
  die "Target interval is not $model_central_base_pairs_length bp long"
      unless $target_interval->length == $model_central_base_pairs_length;
  
  say "Target sequence is ", seq_info extract_sequence( $hg_db, $target_interval );
  
  
  say "";
  
  
  my $resized_interval = $target_interval->resize( $model_sequence_length );
  say "Resized interval: $resized_interval with length @{[ $resized_interval->length ]}";
  
  die "resize() is not working properly!" unless $resized_interval->length == $model_sequence_length;
  
  my $seq = extract_sequence( $hg_db, $resized_interval );
  
  say "Resized sequence is ", seq_info($seq);

B<STREAM (STDOUT)>:

  Target interval: chr11:35082743..35197430 with length 114688
  Target sequence is GGTGGCAGCC...ATCTCCTTTT (length 114688)
  
  Resized interval: chr11:34943479..35336694 with length 393216
  Resized sequence is ACTAGTTCTA...GGCCCAAATC (length 393216)

B<RESULT>:

  1

To prepare the input we have to one-hot encode this resized sequence and give it a dummy dimension at the end to indicate that it is is a batch with a single sequence. Then we can turn the PDL ndarray into a C<TFTensor> and pass it to our prediction ...

  my $sequence_one_hot = one_hot_dna( $seq )->dummy(-1);
  
  say $sequence_one_hot->info; undef;

B<STREAM (STDOUT)>:

  PDL: Float D [4,393216,1]


  use Devel::Timer;
  my $t = Devel::Timer->new;
  
  $t->mark('prediction of sequence');
  
  my $predictions = $predict_on_batch->( $session, FloatPDLTOTFTensor( $sequence_one_hot ) );
  
  $t->mark('End of prediction of sequence');
  
  p $predictions;
  
  $t->mark('END');
  $t->report();

B<STREAM (STDERR)>:

=begin html

<span style="display:inline-block;margin-left:1em;"><pre style="display: block"><code><span style="color: #cc66cc;">AI::TensorFlow::Libtensorflow::Tensor</span><span style=""> </span><span style="color: #33ccff;">{</span><span style="">
    </span><span style="color: #6666cc;">Type           </span><span style=""> </span><span style="color: #cc66cc;">FLOAT</span><span style="">
    </span><span style="color: #6666cc;">Dims           </span><span style=""> </span><span style="color: #33ccff;">[</span><span style=""> </span><span style="color: #ff6633;">1</span><span style=""> </span><span style="color: #ff6633;">896</span><s...
    </span><span style="color: #6666cc;">NumDims        </span><span style=""> </span><span style="color: #ff6633;">3</span><span style="">
    </span><span style="color: #6666cc;">ElementCount   </span><span style=""> </span><span style="color: #ff6633;">4760448</span><span style="">
</span><span style="color: #33ccff;">}</span><span style="">


Devel::Timer Report -- Total time: 14.5641 secs
Interval  Time    Percent
----------------------------------------------
01 -&gt; 02  14.5634  100.00%  prediction of sequence -&gt; End of prediction of sequence
02 -&gt; 03  0.0007   0.00%  End of prediction of sequence -&gt; END
00 -&gt; 01  0.0000   0.00%  INIT -&gt; prediction of sequence
</span></code></pre></span>

=end html

Now we turn the C<TFTensor> output into a PDL ndarray.

  my $predictions_p = FloatTFTensorToPDL($predictions)->slice(',,(0)');
  say $predictions_p->info; undef;

B<STREAM (STDOUT)>:

  PDL: Float D [5313,896]

=head2 Plot predicted tracks

These predictions can be plotted 

  my @tracks = (
      [ 'DNASE:CD14-positive monocyte female' =>   41 => $predictions_p->slice('(41)') ],
      [ 'DNASE:keratinocyte female'           =>   42 => $predictions_p->slice('(42)') ],
      [ 'CHIP:H3K27ac:keratinocyte female'    =>  706 => $predictions_p->slice('(706)')],
      [ 'CAGE:Keratinocyte - epidermal'       => 4799 => log10(1 + $predictions_p->slice('(4799)')) ],
  );
  
  use PDL::Graphics::Gnuplot;
  
  my $plot_output_path = 'enformer-target-interval-tracks.png';
  my $gp = gpwin('pngcairo', font => ",10", output => $plot_output_path, size => [10,2. * @tracks], aa => 2 );
  
  $gp->multiplot( layout => [1, scalar @tracks], title => $target_interval );
  
  $gp->options(
      offsets => [ graph => "0.01, 0, 0, 0" ],
      lmargin => "at screen 0.05",
  );
  
  my $x = zeroes($predictions_p->dim(1))->xlinvals($target_interval->start, $target_interval->end);
  
  my @tics_opts = (mirror => 0, out => 1);
  
  for my $i (0..$#tracks) {
      my ($title, $id, $y) = @{$tracks[$i]};
      $gp->plot( {
              title => $title,
              border => [2],
              ytics => { @tics_opts, locations => [ ceil(($y->max-$y->min)/2)->sclr ] },
              ( $i == $#tracks
                  ? ( xtics => { format => '%.3f', @tics_opts } )
                  : ( xtics => 0 ) ),
              ( $i == $#tracks ? ( xlabel => 'location ({/Symbol \264}10^7 bases)' ) : ()  ),
  
          },
          with => 'filledcurves',
          #'lc' => '#086eb5',
  
          # $x scaled by 1e7; filled curve between $y and the x-axis
          $x / 1e7, $y, pdl(0)
      );
  }
  
  $gp->end_multi;
  
  $gp->close;
  
  if( IN_IPERL ) {
      IPerl->png( bytestream => path($plot_output_path)->slurp_raw );
  }

B<DISPLAY>:

=for html <span style="display:inline-block;margin-left:1em;"><p><img						src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA+gAAAMgCAIAAAA/et9qAAAgAElEQVR4nOzdd2AUVeIH8Ddb0jshBAIEpSo1GjoIpyAgCOqd3uGdoGBBUQQFRUVBRbkTf9gOBQucqFiwUhSSgJQYCCSBkJBAet1k...

=head2 Parts of the original notebook that fall outside the scope

In the orignal notebook, there are several more steps that have not been ported here:

=over

=item 1.

"Compute contribution scores":

This task requires implementing C<@tf.function> to compile gradients.

=item 2.

"Predict the effect of a genetic variant" and "Score multiple variants":

The first task is possible, but the second task requires loading a pre-processing pipeline for scikit-learn and unfortunately this pipeline is stored as a pickle file that is valid for an older version of scikit-learn (version 0.23.2) and as such its...

=back

  # Some code that could be used for working with variants.
  1 if <<'COMMENT';
  
  use Bio::DB::HTS::VCF;

( run in 1.395 second using v1.01-cache-2.11-cpan-c966e8aa7e8 )