App-JobLog

 view release on metacpan or  search on metacpan

lib/App/JobLog/TimeGrammar.pm  view on Meta::CPAN

package App::JobLog::TimeGrammar;
$App::JobLog::TimeGrammar::VERSION = '1.042';
# ABSTRACT: parse natural (English) language time expressions


use Exporter 'import';
our @EXPORT = qw(
  parse
  daytime
);

use Modern::Perl;
use DateTime;
use Class::Autouse qw(
  App::JobLog::Log
);
use Carp 'croak';
use autouse 'App::JobLog::Config' => qw(
  log
  sunday_begins_week
  pay_period_length
  start_pay_period
  DIRECTORY
);
use autouse 'App::JobLog::Time' => qw(
  now
  today
  tz
);
no if $] >= 5.018, warnings => "experimental::smartmatch";

# some variables we need visible inside the date parsing regex
# %matches holds a complete parsing
# %buffer, as its name suggests, is a temporary buffer
# $d1 and $d2 are the starting and ending dates
our ( %matches, %buffer, $d1, $d2 );

# buffers for numeric month, day, or year
our ( $b1, $b2 );

# holds time of day information
our $time_buffer;

# static maps for translating month and day names to numbers
my ( %month_abbr, %day_abbr );

# the master date parsing regex
my $re = qr{
    \A \s*+ (?: (?&ever) | (?&span) ) \s*+ \Z

    (?(DEFINE)

     (?<ever> (?: all | always | ever | (?:(?:the \s++)? (?: entire | whole ) \s++ )? log ) (?{ $matches{ever} = 1 }) )
     
     (?<span>
      ((?&date)) (?{ $d1 = $^N; stow($d1) })
      (?: (?&span_divider) ((?&date)) (?{ $d2 = $^N; stow($d2) }) )?
     )

     (?<span_divider> \s*+ (?: -++ | \b(?: through | thru | to | till?+ | until )\b ) \s*+)

     (?<at> at | @ )

     (?<at_time> 
       (?{ $time_buffer = undef })
       (?: (?: \s++ | \s*+ (?&at) \s*+ ) (?&time))? 
     )

     (?<at_time_on> (?:(?&at) \s++)? (?&time) \s++ on \s++ )

     (?<date>
      (?{ (%buffer, $b1, $b2, $time_buffer) = ()})

lib/App/JobLog/TimeGrammar.pm  view on Meta::CPAN

            if ( my $value = $h->{$key} ) {
                next if $value =~ /\d/;
                $value = lc $value;
                if ( $value =~ /^p/ ) {
                    croak 'pay period not defined'
                      unless defined start_pay_period;
                    $h->{$key} = 'pay';
                }
                else {
                    $h->{$key} = substr $value, 0, 3;
                }
            }
        }
        for ( $h->{modifier} || '' ) {
            when (/beg/) { $h->{modifier} = 'beginning' }
            when (/end/) { $h->{modifier} = 'end' }
            when (/las/) { $h->{modifier} = 'last' }
            when (/thi/) { $h->{modifier} = 'this' }
            when (/nex/) { $h->{modifier} = 'next' }
        }
    }
}

# whether the particular date expression refers to a fixed
# rather than relative date
sub is_fixed {
    my $h = shift;
    return 1
      if exists $h->{year};
    if ( $h->{type} eq 'verbal' ) {
        if ( exists $h->{modifier} ) {
            return 1 if $h->{modifier} =~ /this|last|next/;
        }
        if ( exists $h->{day} ) {
            return 1 if $h->{day} =~ /yes|tod|tom/;
        }
    }
    return 0;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

App::JobLog::TimeGrammar - parse natural (English) language time expressions

=head1 VERSION

version 1.042

=head1 SYNOPSIS

  #!/usr/bin/perl
  
  use Modern::Perl;
  use DateTime;
  use App::JobLog::Time qw(tz);
  use App::JobLog::TimeGrammar qw(parse);
  
  # for demonstration purposes we modify "today"
  $App::JobLog::Time::today =
    DateTime->new( year => 2011, month => 2, day => 17, time_zone => tz );

  for my $phrase ( 'Monday until the end of the week', 'Tuesday at 9:00 p.m.' ) {
      my ( $start, $end, $endpoints ) = parse($phrase);
      say $phrase;
      say "$start - $end; both endpoints specified? "
        . ( $endpoints ? 'yes' : 'no' );
  }

produces

  Monday until the end of the week
  2011-02-14T00:00:00 - 2011-02-20T23:59:59; both endpoints specified? yes
  Tuesday at 9:00 p.m.
  2011-02-08T21:00:00 - 2011-02-15T23:59:59; both endpoints specified? no

=head1 DESCRIPTION

C<App::JobLog::TimeGrammar> converts natural language time expressions into pairs of
C<DateTime> objects representing intervals. This requires disambiguating ambiguous
terms such as 'yesterday', whose interpretation varies from day to day, and 'Friday', whose
interpretation must be fixed by some frame of reference. The heuristic used by this code
is to look first for a fixed date, either a fully specified date such as 2011/2/17 or
one fixed relative to the current moment such as 'now'. If such a date is present in the time
expression it determines the context for the other date, if it is present. Otherwise
it is assumed that the closest appropriate pair of dates immediately before the current
moment are intended.

Given a pair consisting of fixed and an ambiguous date, we assume the ambiguous date has the
sense such that it is ordered correctly relative to the fixed date and the interval between
them is minimized.

If the time expression provides no time of day, such as 8:00, it is assumed that the first moment
intended is the first second of the first day and the last moment is the last second of the second
day. If no second date is provided the endpoint of the interval will be the last moment of the single
date specified. If a larger time period such as week, month, or year is specified, e.g., 'last week', the
first moment is the first second in the period and the last moment is the last second.

If you wish to parse a single date, not an interval, you can ignore the second date, though you should
check the third value returned by C<parse>, whether an interval was parsed.

C<parse> will croak if it cannot parse the expression given.

=head2 Time Grammar

The following is a semi-formal BNF grammar of time understood by C<App::JobLog::TimeGrammar>. In this
formalization C<s> represents whitespace, C<d> represents a digit, and C<\\n> represents a back reference
to the nth item in parenthesis in the given rule. After the first three rules the rules are alphabetized
to facilitate finding them.

              <expression> = s* ( <ever> | <span> ) s*
                    <ever> = "all" | "always" | "ever" | [ [ "the" s ] ( "entire" | "whole" ) s ] "log"
                    <span> = <date> [ <span_divider> <date> ]
 



( run in 0.306 second using v1.01-cache-2.11-cpan-0f795438458 )