App-RecordStream
view release on metacpan or search on metacpan
lib/App/RecordStream/Operation/topn.pm view on Meta::CPAN
package App::RecordStream::Operation::topn;
our $VERSION = "4.0.25";
use strict;
use warnings;
use base qw(App::RecordStream::Operation);
sub init {
my $this = shift;
my $args = shift;
my $top = 10;
my $value_delimiter = "9t%7Oz%]";
my $key_groups = App::RecordStream::KeyGroups->new();
my $spec = {
"key|k=s" => sub { $key_groups->add_groups($_[1]); },
"topn|n=i" => \$top,
"delimiter=s" => \$value_delimiter,
};
$this->parse_options($args, $spec);
die "Must at least specify --topn <value>" unless $top;
$this->{'KEY_GROUPS'} = $key_groups;
$this->{'NUM'} = $top;
$this->{'DELIM'} = $value_delimiter,
$this->{'PRIOR_KEY_VALUES'} = "";
}
sub init_keys {
my $this = shift;
my $record = shift;
$this->{'KEYS'} = $this->{'KEY_GROUPS'}->get_keyspecs($record);
}
sub accept_record {
my $this = shift;
my $record = shift;
if ( ! $this->{'KEYS'} ) {
$this->init_keys($record);
}
my $current_key_values = "";
foreach my $k ( @{$this->{'KEYS'}} ) {
$current_key_values .= ${$record->guess_key_from_spec( $k )} . $this->{'DELIM'};
}
$this->{'NUM_SEEN'}->{$current_key_values}++;
if( $this->{'NUM_SEEN'}->{$current_key_values} <= $this->{'NUM'} ) {
$this->push_record($record);
}
return 1;
}
sub add_help_types {
my $this = shift;
$this->use_help_type('keyspecs');
$this->use_help_type('keygroups');
$this->use_help_type('keys');
}
sub usage
{
my $this = shift;
my $options = [
['key <keyspec>', 'Comma separated list of fields. May be specified multiple times. May be a keyspec or keygroup, see \'--help-keys\' for more'],
['topn | -n <number>', 'Number of records to output. Default is 10.'],
['delimiter <string>', 'String used internally to delimit values when performing a topn on a keyspec that inlcudeds multiple keys. This value defaults to "9t%7Oz%]" which may - under unusual and bizarre corner cases - cause false positive key ma...
];
my $args_string = $this->options_string($options);
return <<USAGE;
Usage: recs-topn <args> [<files>]
__FORMAT_TEXT__
Outputs the top n records from input stream or from <files>. You may
segment the input stream based on a list of keys such that unique values
of keys are treated as distinct input streams. This enables
top n listings per value groupings. The key values need not be contiguous
in the input record stream.
__FORMAT_TEXT__
$args_string
Examples:
Output just the top 5 records
cat records | recs-topn -n=5
(this is equivalent to executing "cat records | recs-grep '\$line < 5'")
Output just 10 records for each area
cat records | recs-sort --key area | recs-topn -n=10 --key area
Output the top 10 longest running queries per area and priority level
cat records | recs-sort --key area,priority,runtime=-n | recs-topn -n=10 --key area,priority
USAGE
}
1;
( run in 0.972 second using v1.01-cache-2.11-cpan-39bf76dae61 )