App-JobLog
view release on metacpan or search on metacpan
lib/App/JobLog/TimeGrammar.pm view on Meta::CPAN
package App::JobLog::TimeGrammar;
$App::JobLog::TimeGrammar::VERSION = '1.042';
# ABSTRACT: parse natural (English) language time expressions
use Exporter 'import';
our @EXPORT = qw(
parse
daytime
);
use Modern::Perl;
use DateTime;
use Class::Autouse qw(
App::JobLog::Log
);
use Carp 'croak';
use autouse 'App::JobLog::Config' => qw(
log
sunday_begins_week
pay_period_length
start_pay_period
DIRECTORY
);
use autouse 'App::JobLog::Time' => qw(
now
today
tz
);
no if $] >= 5.018, warnings => "experimental::smartmatch";
# some variables we need visible inside the date parsing regex
# %matches holds a complete parsing
# %buffer, as its name suggests, is a temporary buffer
# $d1 and $d2 are the starting and ending dates
our ( %matches, %buffer, $d1, $d2 );
# buffers for numeric month, day, or year
our ( $b1, $b2 );
# holds time of day information
our $time_buffer;
# static maps for translating month and day names to numbers
my ( %month_abbr, %day_abbr );
# the master date parsing regex
my $re = qr{
\A \s*+ (?: (?&ever) | (?&span) ) \s*+ \Z
(?(DEFINE)
(?<ever> (?: all | always | ever | (?:(?:the \s++)? (?: entire | whole ) \s++ )? log ) (?{ $matches{ever} = 1 }) )
(?<span>
((?&date)) (?{ $d1 = $^N; stow($d1) })
(?: (?&span_divider) ((?&date)) (?{ $d2 = $^N; stow($d2) }) )?
)
(?<span_divider> \s*+ (?: -++ | \b(?: through | thru | to | till?+ | until )\b ) \s*+)
(?<at> at | @ )
(?<at_time>
(?{ $time_buffer = undef })
(?: (?: \s++ | \s*+ (?&at) \s*+ ) (?&time))?
)
(?<at_time_on> (?:(?&at) \s++)? (?&time) \s++ on \s++ )
(?<date>
(?{ (%buffer, $b1, $b2, $time_buffer) = ()})
lib/App/JobLog/TimeGrammar.pm view on Meta::CPAN
if ( my $value = $h->{$key} ) {
next if $value =~ /\d/;
$value = lc $value;
if ( $value =~ /^p/ ) {
croak 'pay period not defined'
unless defined start_pay_period;
$h->{$key} = 'pay';
}
else {
$h->{$key} = substr $value, 0, 3;
}
}
}
for ( $h->{modifier} || '' ) {
when (/beg/) { $h->{modifier} = 'beginning' }
when (/end/) { $h->{modifier} = 'end' }
when (/las/) { $h->{modifier} = 'last' }
when (/thi/) { $h->{modifier} = 'this' }
when (/nex/) { $h->{modifier} = 'next' }
}
}
}
# whether the particular date expression refers to a fixed
# rather than relative date
sub is_fixed {
my $h = shift;
return 1
if exists $h->{year};
if ( $h->{type} eq 'verbal' ) {
if ( exists $h->{modifier} ) {
return 1 if $h->{modifier} =~ /this|last|next/;
}
if ( exists $h->{day} ) {
return 1 if $h->{day} =~ /yes|tod|tom/;
}
}
return 0;
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
App::JobLog::TimeGrammar - parse natural (English) language time expressions
=head1 VERSION
version 1.042
=head1 SYNOPSIS
#!/usr/bin/perl
use Modern::Perl;
use DateTime;
use App::JobLog::Time qw(tz);
use App::JobLog::TimeGrammar qw(parse);
# for demonstration purposes we modify "today"
$App::JobLog::Time::today =
DateTime->new( year => 2011, month => 2, day => 17, time_zone => tz );
for my $phrase ( 'Monday until the end of the week', 'Tuesday at 9:00 p.m.' ) {
my ( $start, $end, $endpoints ) = parse($phrase);
say $phrase;
say "$start - $end; both endpoints specified? "
. ( $endpoints ? 'yes' : 'no' );
}
produces
Monday until the end of the week
2011-02-14T00:00:00 - 2011-02-20T23:59:59; both endpoints specified? yes
Tuesday at 9:00 p.m.
2011-02-08T21:00:00 - 2011-02-15T23:59:59; both endpoints specified? no
=head1 DESCRIPTION
C<App::JobLog::TimeGrammar> converts natural language time expressions into pairs of
C<DateTime> objects representing intervals. This requires disambiguating ambiguous
terms such as 'yesterday', whose interpretation varies from day to day, and 'Friday', whose
interpretation must be fixed by some frame of reference. The heuristic used by this code
is to look first for a fixed date, either a fully specified date such as 2011/2/17 or
one fixed relative to the current moment such as 'now'. If such a date is present in the time
expression it determines the context for the other date, if it is present. Otherwise
it is assumed that the closest appropriate pair of dates immediately before the current
moment are intended.
Given a pair consisting of fixed and an ambiguous date, we assume the ambiguous date has the
sense such that it is ordered correctly relative to the fixed date and the interval between
them is minimized.
If the time expression provides no time of day, such as 8:00, it is assumed that the first moment
intended is the first second of the first day and the last moment is the last second of the second
day. If no second date is provided the endpoint of the interval will be the last moment of the single
date specified. If a larger time period such as week, month, or year is specified, e.g., 'last week', the
first moment is the first second in the period and the last moment is the last second.
If you wish to parse a single date, not an interval, you can ignore the second date, though you should
check the third value returned by C<parse>, whether an interval was parsed.
C<parse> will croak if it cannot parse the expression given.
=head2 Time Grammar
The following is a semi-formal BNF grammar of time understood by C<App::JobLog::TimeGrammar>. In this
formalization C<s> represents whitespace, C<d> represents a digit, and C<\\n> represents a back reference
to the nth item in parenthesis in the given rule. After the first three rules the rules are alphabetized
to facilitate finding them.
<expression> = s* ( <ever> | <span> ) s*
<ever> = "all" | "always" | "ever" | [ [ "the" s ] ( "entire" | "whole" ) s ] "log"
<span> = <date> [ <span_divider> <date> ]
( run in 0.306 second using v1.01-cache-2.11-cpan-0f795438458 )