App-RecordStream
view release on metacpan or search on metacpan
lib/App/RecordStream/Operation/annotate.pm view on Meta::CPAN
package App::RecordStream::Operation::annotate;
our $VERSION = "4.0.25";
use strict;
use base qw(App::RecordStream::Operation);
use App::RecordStream::Executor::Getopt;
use App::RecordStream::Executor;
use App::RecordStream::Record;
sub init {
my $this = shift;
my $args = shift;
my $key_groups = App::RecordStream::KeyGroups->new();
my $executor_options = App::RecordStream::Executor::Getopt->new();
my $spec = {
$executor_options->arguments(),
'keys|k=s' => sub { $key_groups->add_groups($_[1]); },
};
$this->parse_options($args, $spec, ['bundling']);
my $expression = $executor_options->get_string($args);
my $executor = App::RecordStream::Executor->new(<<" CODE");
$expression
; # Safe from a trailing comment in \$expression
\$r
CODE
if ( ! $key_groups->has_any_group() ) {
die "Must specify at least one --key, maybe you want recs-xform instead?\n";
}
$this->{'EXECUTOR'} = $executor;
$this->{'KEYGROUP'} = $key_groups;
$this->{'ANNOTATIONS'} = {};
}
sub accept_record {
my $this = shift;
my $record = shift;
my $specs = $this->{'KEYGROUP'}->get_keyspecs_for_record($record);
my @values;
foreach my $key (sort @$specs) {
my $value = ${$record->guess_key_from_spec($key)};
push @values, $value;
}
# Join keys with the ASCII record separator character (30)
my $synthetic_key = join(chr(30), @values);
if ( exists $this->{'ANNOTATIONS'}->{$synthetic_key} ) {
$this->apply_annotation($synthetic_key, $record);
$this->push_record($record);
return 1;
}
my $executor = $this->{'EXECUTOR'};
my $store = {};
my $hash = create_recorder({$record->as_hash()}, $store);
my $new_record = App::RecordStream::Record->new($hash);
my $returned_record = $executor->execute_code($new_record);
$this->{'ANNOTATIONS'}->{$synthetic_key} = $store;
$this->push_record($returned_record);
return 1;
}
sub apply_annotation {
my $this = shift;
my $annotation_key = shift;
my $record = shift;
my $stores = $this->{'ANNOTATIONS'}->{$annotation_key};
foreach my $keyspec (keys %$stores) {
my $value = $stores->{$keyspec};
${$record->guess_key_from_spec($keyspec)} = $value;
}
}
sub add_help_types {
my $this = shift;
$this->use_help_type('snippet');
$this->use_help_type('keyspecs');
$this->use_help_type('keygroups');
$this->use_help_type('keys');
}
sub usage {
my $this = shift;
my $options = [
App::RecordStream::Executor::options_help(),
['keys', 'Keys to match records by, maybe specified multiple times, may be a keygroup or keyspec'],
];
my $args_string = $this->options_string($options);
return <<USAGE;
Usage: recs-annotate <args> <expr> [<files>]
__FORMAT_TEXT__
<expr> is evaluated as perl on each record of input (or records from
<files>) with \$r set to a App::RecordStream::Record object and \$line set
to the current line number (starting at 1). Records are analyzed for
changes, those changes are applied to each successive record that matches
--keys
Only use this script if you have --keys fields that are repeated, otherwise
recs-xform will be faster
__FORMAT_TEXT__
IMPORTANT SNIPPET NOTE
__FORMAT_TEXT__
Because of the way annotations are recorded, you cannot use UNSHIFT or
SPLICE on array refs that already exist in the record you are modifiying.
Additionally, deletes, removes, unshifts, and other 'removing' operations
will not apply to later records. If you need this behavior, consider using
recs-xform
__FORMAT_TEXT__
$args_string
Examples:
# Annotate records with IPs with hostnames, only doing lookup once
... | recs-annotate --key ip '{{hostname}} = `host {{ip}}`'
# Record md5sums of files
... | recs-annotate --key filename '{{md5}} = `md5sum {{filename}}`'
# Add url contents to records
... | recs-annotate --key url '{{contents}} = `curl {{url}}`'
USAGE
}
sub create_recorder {
my $data = shift;
my $store = shift;
( run in 0.558 second using v1.01-cache-2.11-cpan-39bf76dae61 )