AI-TensorFlow-Libtensorflow

 view release on metacpan or  search on metacpan

lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubCenterNetObjDetect.pod  view on Meta::CPAN


my %images_for_test_to_uri = (
    "beach_scene" => 'https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image2.jpg?raw=true',
);

my @image_names = sort keys %images_for_test_to_uri;
my $h = HTML::Tiny->new;

my $image_name = 'beach_scene';
if( IN_IPERL ) {
    IPerl->html(
        $h->a( { href => $images_for_test_to_uri{$image_name} },
            $h->img({
                src => $images_for_test_to_uri{$image_name},
                alt => $image_name,
                width => '100%',
            })
        ),
    );
}

sub load_image_to_pdl {
    my ($uri, $image_size) = @_;

    my $http = HTTP::Tiny->new;
    my $response = $http->get( $uri );
    die "Could not fetch image from $uri" unless $response->{success};
    say "Downloaded $uri";

    my $img = Imager->new;
    $img->read( data => $response->{content} );

    # Create PDL ndarray from Imager data in-memory.
    my $data;
    $img->write( data => \$data, type => 'raw' )
        or die "could not write ". $img->errstr;

    die "Image does not have 3 channels, it has @{[ $img->getchannels ]} channels"
        if $img->getchannels != 3;

    # $data is packed as PDL->dims == [w,h] with RGB pixels
    my $pdl_raw = zeros(byte, $img->getchannels, $img->getwidth, $img->getheight);
    ${ $pdl_raw->get_dataref } = $data;
    $pdl_raw->upd_data;

    $pdl_raw;
}

my @pdl_images = map {
    load_image_to_pdl(
        $images_for_test_to_uri{$_},
        $model_name_to_params{$model_name}{image_size}
    );
} ($image_names[0]);

my $pdl_image_batched = cat(@pdl_images);
my $t = Uint8PDLTOTFTensor($pdl_image_batched);

die "There should be 4 dimensions" unless $pdl_image_batched->ndims == 4;

die "With the final dimension of length 1" unless $pdl_image_batched->dim(3) == 1;

p $pdl_image_batched;
p $t;

my $RunSession = sub {
    my ($session, $t) = @_;
    my @outputs_t;

    my @keys = keys %{ $outputs{out} };
    my @values = $outputs{out}->@{ @keys };
    $session->Run(
        undef,
        [ values %{$outputs{in} } ], [$t],
        \@values, \@outputs_t,
        undef,
        undef,
        $s
    );
    AssertOK($s);

    return { mesh \@keys, \@outputs_t };
};

undef;

my $tftensor_output_by_name = $RunSession->($session, $t);

my %pdl_output_by_name = map {
    $_ => FloatTFTensorToPDL( $tftensor_output_by_name->{$_} )
} keys $tftensor_output_by_name->%*;

undef;

my $min_score_thresh = 0.30;

my $which_detect = which( $pdl_output_by_name{detection_scores} > $min_score_thresh );

my %subset;

$subset{detection_boxes}   = $pdl_output_by_name{detection_boxes}->dice('X', $which_detect);
$subset{detection_classes} = $pdl_output_by_name{detection_classes}->dice($which_detect);
$subset{detection_scores}  = $pdl_output_by_name{detection_scores}->dice($which_detect);

$subset{detection_class_labels}->@* = map { $labels_map{$_} } $subset{detection_classes}->list;

p %subset;

use PDL::Graphics::Gnuplot;

my $plot_output_path = 'objects-detected.png';
my $gp = gpwin('pngcairo', font => ",12", output => $plot_output_path, aa => 2, size => [10] );

my @qual_cmap = ('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6');

$gp->options(
    map {
        my $idx = $_;
        my $lc_rgb = $qual_cmap[ $subset{detection_classes}->slice("($idx)")->squeeze % @qual_cmap ];

        my $box_corners_yx_norm = $subset{detection_boxes}->slice([],$idx,[0,0,0]);

lib/AI/TensorFlow/Libtensorflow/Manual/Notebook/InferenceUsingTFHubCenterNetObjDetect.pod  view on Meta::CPAN

  my $image_name = 'beach_scene';
  if( IN_IPERL ) {
      IPerl->html(
          $h->a( { href => $images_for_test_to_uri{$image_name} },
              $h->img({
                  src => $images_for_test_to_uri{$image_name},
                  alt => $image_name,
                  width => '100%',
              })
          ),
      );
  }

=head2 Download the test image and transform it into suitable input data

We now fetch the image and prepare it to be in the needed format by using C<Imager>. Note that this model does not need the input image to be of a certain size so no resizing or padding is required.

Then we turn the C<Imager> data into a C<PDL> ndarray. Since we just need the 3 channels of the image as they are, they can be stored directly in a C<PDL> ndarray of type C<byte>.

The reason why we need to concatenate the C<PDL> ndarrays here despite the model only taking a single image at a time is to get an ndarray with four (4) dimensions with the last C<PDL> dimension of size one (1).

  sub load_image_to_pdl {
      my ($uri, $image_size) = @_;
  
      my $http = HTTP::Tiny->new;
      my $response = $http->get( $uri );
      die "Could not fetch image from $uri" unless $response->{success};
      say "Downloaded $uri";
  
      my $img = Imager->new;
      $img->read( data => $response->{content} );
  
      # Create PDL ndarray from Imager data in-memory.
      my $data;
      $img->write( data => \$data, type => 'raw' )
          or die "could not write ". $img->errstr;
  
      die "Image does not have 3 channels, it has @{[ $img->getchannels ]} channels"
          if $img->getchannels != 3;
  
      # $data is packed as PDL->dims == [w,h] with RGB pixels
      my $pdl_raw = zeros(byte, $img->getchannels, $img->getwidth, $img->getheight);
      ${ $pdl_raw->get_dataref } = $data;
      $pdl_raw->upd_data;
  
      $pdl_raw;
  }
  
  my @pdl_images = map {
      load_image_to_pdl(
          $images_for_test_to_uri{$_},
          $model_name_to_params{$model_name}{image_size}
      );
  } ($image_names[0]);
  
  my $pdl_image_batched = cat(@pdl_images);
  my $t = Uint8PDLTOTFTensor($pdl_image_batched);
  
  die "There should be 4 dimensions" unless $pdl_image_batched->ndims == 4;
  
  die "With the final dimension of length 1" unless $pdl_image_batched->dim(3) == 1;
  
  p $pdl_image_batched;
  p $t;

=head2 Run the model for inference

We can use the C<Run> method to run the session and get the multiple output C<TFTensor>s. The following uses the names in C<$outputs> mapping to help process the multiple outputs more easily.

  my $RunSession = sub {
      my ($session, $t) = @_;
      my @outputs_t;
  
      my @keys = keys %{ $outputs{out} };
      my @values = $outputs{out}->@{ @keys };
      $session->Run(
          undef,
          [ values %{$outputs{in} } ], [$t],
          \@values, \@outputs_t,
          undef,
          undef,
          $s
      );
      AssertOK($s);
  
      return { mesh \@keys, \@outputs_t };
  };
  
  undef;



  my $tftensor_output_by_name = $RunSession->($session, $t);
  
  my %pdl_output_by_name = map {
      $_ => FloatTFTensorToPDL( $tftensor_output_by_name->{$_} )
  } keys $tftensor_output_by_name->%*;
  
  undef;

=head2 Results summary

Then we use a score threshold to select the objects of interest.

  my $min_score_thresh = 0.30;
  
  my $which_detect = which( $pdl_output_by_name{detection_scores} > $min_score_thresh );
  
  my %subset;
  
  $subset{detection_boxes}   = $pdl_output_by_name{detection_boxes}->dice('X', $which_detect);
  $subset{detection_classes} = $pdl_output_by_name{detection_classes}->dice($which_detect);
  $subset{detection_scores}  = $pdl_output_by_name{detection_scores}->dice($which_detect);
  
  $subset{detection_class_labels}->@* = map { $labels_map{$_} } $subset{detection_classes}->list;
  
  p %subset;

The following uses the bounding boxes and class label information to draw boxes and labels on top of the image using Gnuplot.

  use PDL::Graphics::Gnuplot;



( run in 0.559 second using v1.01-cache-2.11-cpan-140bd7fdf52 )