App-RecordStream

 view release on metacpan or  search on metacpan

lib/App/RecordStream/Operation/annotate.pm  view on Meta::CPAN

package App::RecordStream::Operation::annotate;

our $VERSION = "4.0.25";

use strict;

use base qw(App::RecordStream::Operation);

use App::RecordStream::Executor::Getopt;
use App::RecordStream::Executor;
use App::RecordStream::Record;

sub init {
  my $this = shift;
  my $args = shift;

  my $key_groups  = App::RecordStream::KeyGroups->new();

  my $executor_options = App::RecordStream::Executor::Getopt->new();
  my $spec = {
    $executor_options->arguments(),
    'keys|k=s' => sub { $key_groups->add_groups($_[1]); },
  };

  $this->parse_options($args, $spec, ['bundling']);

  my $expression = $executor_options->get_string($args);
  my $executor = App::RecordStream::Executor->new(<<"  CODE");
    $expression
      ; # Safe from a trailing comment in \$expression
    \$r
  CODE

  if ( ! $key_groups->has_any_group() ) {
    die "Must specify at least one --key, maybe you want recs-xform instead?\n";
  }

  $this->{'EXECUTOR'}    = $executor;
  $this->{'KEYGROUP'}    = $key_groups;
  $this->{'ANNOTATIONS'} = {};
}

sub accept_record {
  my $this   = shift;
  my $record = shift;

  my $specs = $this->{'KEYGROUP'}->get_keyspecs_for_record($record);

  my @values;
  foreach my $key (sort @$specs) {
    my $value = ${$record->guess_key_from_spec($key)};
    push @values, $value;
  }

  # Join keys with the ASCII record separator character (30)
  my $synthetic_key = join(chr(30), @values);

  if ( exists $this->{'ANNOTATIONS'}->{$synthetic_key} ) {
    $this->apply_annotation($synthetic_key, $record);
    $this->push_record($record);
    return 1;
  }

  my $executor = $this->{'EXECUTOR'};

  my $store = {};

  my $hash = create_recorder({$record->as_hash()}, $store);

  my $new_record = App::RecordStream::Record->new($hash);

  my $returned_record = $executor->execute_code($new_record);

  $this->{'ANNOTATIONS'}->{$synthetic_key} = $store;

  $this->push_record($returned_record);

  return 1;
}

sub apply_annotation {
  my $this           = shift;
  my $annotation_key = shift;
  my $record         = shift;

  my $stores = $this->{'ANNOTATIONS'}->{$annotation_key};

  foreach my $keyspec (keys %$stores) {
    my $value = $stores->{$keyspec};
    ${$record->guess_key_from_spec($keyspec)} = $value;
  }
}

sub add_help_types {
  my $this = shift;
  $this->use_help_type('snippet');
  $this->use_help_type('keyspecs');
  $this->use_help_type('keygroups');
  $this->use_help_type('keys');
}

sub usage {
  my $this = shift;

  my $options = [
    App::RecordStream::Executor::options_help(),
    ['keys', 'Keys to match records by, maybe specified multiple times, may be a keygroup or keyspec'],
  ];

  my $args_string = $this->options_string($options);

  return <<USAGE;
Usage: recs-annotate <args> <expr> [<files>]
   __FORMAT_TEXT__
   <expr> is evaluated as perl on each record of input (or records from
   <files>) with \$r set to a App::RecordStream::Record object and \$line set
   to the current line number (starting at 1).  Records are analyzed for
   changes, those changes are applied to each successive record that matches
   --keys

   Only use this script if you have --keys fields that are repeated, otherwise
   recs-xform will be faster
   __FORMAT_TEXT__

IMPORTANT SNIPPET NOTE
   __FORMAT_TEXT__
   Because of the way annotations are recorded, you cannot use UNSHIFT or
   SPLICE on array refs that already exist in the record you are modifiying.
   Additionally, deletes, removes, unshifts, and other 'removing' operations
   will not apply to later records.  If you need this behavior, consider using
   recs-xform
   __FORMAT_TEXT__

$args_string

Examples:
   # Annotate records with IPs with hostnames, only doing lookup once
   ... | recs-annotate --key ip '{{hostname}} = `host {{ip}}`'

   # Record md5sums of files
   ... | recs-annotate --key filename '{{md5}} = `md5sum {{filename}}`'

   # Add url contents to records
   ... | recs-annotate --key url '{{contents}} = `curl {{url}}`'
USAGE
}

sub create_recorder {
  my $data            = shift;
  my $store           = shift;



( run in 0.558 second using v1.01-cache-2.11-cpan-39bf76dae61 )