App-RecordStream
view release on metacpan or search on metacpan
lib/App/RecordStream/Operation/normalizetime.pm view on Meta::CPAN
use strict;
use base qw(App::RecordStream::Operation);
use Date::Manip qw (ParseDate UnixDate ParseDateDelta Delta_Format);
sub init {
my $this = shift;
my $args = shift;
my $key;
my $threshold;
my $strict;
my $epoch;
my $spec = {
"key|k=s" => \$key,
"strict|s" => \$strict,
"epoch|e" => \$epoch,
"threshold|n=s" => \$threshold,
};
$this->parse_options($args, $spec);
die('Must specify --key') unless ( $key );
die('Must specify --threshold') unless ( $threshold );
# if threshold is not a number, assume its a parsable string
if ( not ($threshold =~ m/^[0-9.]+$/) )
{
my $delta = ParseDateDelta($threshold);
$threshold = Delta_Format($delta, 0, '%st');
unless ( $threshold =~ m/^[0-9.]+$/ ) {
die "Threshold passed isn't a number or parsable, "
. "see perldoc Date::Manip for parseable formats\n";
}
}
my $sanitized_key = $key;
$sanitized_key =~ s!/!_!;
$this->{'KEY'} = $key;
$this->{'SANITIZED_KEY'} = $sanitized_key;
$this->{'STRICT'} = $strict;
$this->{'EPOCH'} = $epoch;
$this->{'THRESHOLD'} = $threshold;
}
sub accept_record {
my $this = shift;
my $record = shift;
my $key = $this->{'KEY'};
my $threshold = $this->{'THRESHOLD'};
my $strict = $this->{'STRICT'};
my $sanitized_key = $this->{'SANITIZED_KEY'};
my $prior_normalized_value = $this->{'PRIOR_NORMALIZED_VALUE'};
my $value = ${$record->guess_key_from_spec($key)};
my $time = $value;
if ( ! $this->{'EPOCH'} ) {
$time = UnixDate( ParseDate( $value ), "%s" );
die "I can't understand Key: $key, with value: $value" unless $time;
}
my $normalized_time_cur_period = int( $time / $threshold ) * $threshold;
my $normalized_time_prior_period = $normalized_time_cur_period - $threshold;
my $normalized_time;
if( !$strict && defined( $prior_normalized_value ) && $prior_normalized_value == $normalized_time_prior_period ) {
$normalized_time = $prior_normalized_value;
} else {
$normalized_time = $normalized_time_cur_period;
$prior_normalized_value = $normalized_time_cur_period;
$this->{'PRIOR_NORMALIZED_VALUE'} = $normalized_time_cur_period;
}
$record->{"n_$sanitized_key"} = $normalized_time;
$this->push_record($record);
return 1;
}
sub add_help_types {
my $this = shift;
$this->use_help_type('keyspecs');
$this->add_help_type(
'full',
\&full_help,
'Indepth description of normalization alogrithm'
);
}
sub full_help {
print <<FULL_HELP;
Full Help
This recs processor will generate normalized versions of date/time values and
add this value as another attribute to the record stream. Used in conjunction
with recs-collate you can aggregate information over the normalized time. For
example if you use
recs-normalized -k date --n 1 | recs-collate -k n_date -a firstrec
then this picks a single record from a stream to serve in placement of lots of
records which are close to each other in time.
The normalized time value generated depends on whether or not you are using
strict normalization or not. The default is to use non-strict.
The use of the optional --epoch argument indicates that the date/time values
are expressed in epoch seconds. This argument both speeds up the execution of
an invocation (due to avoiding the expensive perl Date:Manip executions) and is
required for correctness when the values are epoch seconds.
1. When using strict normalization then time is chunked up into fixed segments
of --threshold seconds in each segment with the first segment occurring on
January 1st 1970 at 0:00. So if the threshold is 60 seconds then the following
record stream would be produced
( run in 0.629 second using v1.01-cache-2.11-cpan-39bf76dae61 )