view release on metacpan or search on metacpan
Changelog.ini view on Meta::CPAN
- Add 'Int $date ($text)' tests to French.t, German.t and Hebrew.t.
EOT
[V 2.00]
Date=2015-10-13T20:43:00
Comments= <<EOT
- Re-write using Marpa::R2, which uses the Gedcom grammar to define dates.
- Remove methods: parse_approximate_date(), parse_date_escape(), parse_datetime(),
parse_date_period(), parse_date_range(), parse_date_value(), parse_interpreted_date(),
debug(), method_index(), months_in_gregorian() and style().
- Add methods: parse(), calendar(), error().
- See the FAQ for the format of the value returned by parse().
- See the FAQ for various extensions to the Gedcom grammar.
- Accept Unicode input.
- Add support for French and German dates.
- Accept date escapes with or without '@#d' ... '@'.
- Accept dates and escapes in any case.
- Remove all reference to DateTime.
- Remove code which supplied a default day and/or month when they were missing.
- Add scripts/synopsis.pl.
- Rewrite all tests. See t/English.t, etc.
- Fix code which was correct but accidentally so. This was used, $self -> recce -> read(\$date),
instead of $self -> recce -> read(\$self -> date). Luckily, $date was correct, but after now
removing commas differently, it wasn't.
- Add 'Int $date ($text)' tests to French.t, German.t and Hebrew.t.
2.00 2015-10-13T20:43:00
- Re-write using Marpa::R2, which uses the Gedcom grammar to define dates.
- Remove methods: parse_approximate_date(), parse_date_escape(), parse_datetime(),
parse_date_period(), parse_date_range(), parse_date_value(), parse_interpreted_date(),
debug(), method_index(), months_in_gregorian() and style().
- Add methods: parse(), calendar(), error().
- See the FAQ for the format of the value returned by parse().
- See the FAQ for various extensions to the Gedcom grammar.
- Accept Unicode input.
- Add support for French and German dates.
- Accept date escapes with or without '@#d' ... '@'.
- Accept dates and escapes in any case.
- Remove all reference to DateTime.
- Remove code which supplied a default day and/or month when they were missing.
- Add scripts/synopsis.pl.
- Rewrite all tests. See t/English.t, etc.
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
use Types::Standard qw/Any ArrayRef Bool Int HashRef Str/;
has bnf =>
(
default => sub{return ''},
is => 'rw',
isa => Str,
required => 0,
);
has _calendar =>
(
default => sub{return 'Gregorian'},
is => 'rw',
isa => Str,
required => 0,
);
has canonical =>
(
default => sub{return 0},
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
minlevel => $self -> minlevel,
utf8 => 1,
}
);
}
# Initialize the action class via global variables - Yuk!
# The point is that we don't create an action instance.
# Marpa creates one but we can't get our hands on it.
$Genealogy::Gedcom::Date::Actions::calendar = $self -> clean_calendar;
$Genealogy::Gedcom::Date::Actions::logger = $self -> logger;
$self -> bnf
(
<<'END_OF_GRAMMAR'
:default ::= action => [values]
lexeme default = latm => 1 # Longest Acceptable Token Match.
# Rules, in top-down order (more-or-less).
:start ::= gedcom_date
gedcom_date ::= date
| lds_ord_date
date ::= calendar_escape calendar_date
calendar_escape ::=
calendar_escape ::= calendar_name action => calendar_name # ($t1)
| ('@#d') calendar_name ('@') action => calendar_name # "
| ('@#D') calendar_name ('@') action => calendar_name # "
calendar_date ::= gregorian_date action => gregorian_date # ($t1)
| julian_date action => julian_date # ($t1)
| french_date action => french_date # ($t1)
| german_date action => german_date # ($t1)
| hebrew_date action => hebrew_date # ($t1)
gregorian_date ::= day gregorian_month gregorian_year
| gregorian_month gregorian_year
| gregorian_year_bce
| gregorian_year
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
before ~ 'bef':i
| 'before':i
between ~ 'bet':i
| 'between':i
calculated ~ 'cal':i
| 'calculated':i
calendar_name ~ 'french r':i
| 'frenchr':i
| 'german':i
| 'gregorian':i
| 'hebrew':i
| 'julian':i
date_text ~ [^)\x{0a}\x{0b}\x{0c}\x{0d}]+
digit ~ [0-9]
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
$date[$i] = $self -> canonical_date($$result[$i]);
$date[$i] = $$result[$i]{flag} ? $date[$i] ? "$$result[$i]{flag} $date[$i]" : $$result[$i]{flag} : $date[$i];
}
return $date[1] ? "$date[0] $date[1]" : $date[0];
} # End of canonical_form.
# ------------------------------------------------
sub clean_calendar
{
my($self) = @_;
my($calendar) = $self -> _calendar;
$calendar =~ s/\@\#d(.+)\@/$1/; # Zap gobbledegook if present.
$calendar = ucfirst lc $calendar;
return $self -> _calendar($calendar);
} # End of clean_calendar.
# --------------------------------------------------
sub compare
{
my($self, $other) = @_;
my($result_1) = $self -> result;
my($date_1) = $self -> normalize_date($#$result_1 < 0 ? {} : $$result_1[0]);
my($result_2) = $other -> result;
my($date_2) = $self -> normalize_date($#$result_2 < 0 ? {} : $$result_2[0]);
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
return $result;
} # End of parse.
# --------------------------------------------------
sub process_ambiguous
{
my($self) = @_;
my($calendar) = $self -> clean_calendar;
my(%count) =
(
AND => 0,
BET => 0,
FROM => 0,
TO => 0,
);
my($result) = [];
my($item);
while (my $value = $self -> recce -> value)
{
$value = $self -> decode_result($$value);
for $item (@$value)
{
if ($$item{kind} eq 'Calendar')
{
$calendar = $$item{type};
next;
}
if ($calendar eq $$item{type})
{
# We have to allow for the fact that when 'From .. To' or 'Between ... And'
# are used, both dates are ambiguous, and we end up with double the number
# of elements in the arrayref compared to what's expected.
if (exists $$item{flag} && exists $count{$$item{flag} })
{
if ($count{$$item{flag} } == 0)
{
$count{$$item{flag} }++;
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
if ( ($$result[0]{flag} eq 'AND') && ($$result[1]{flag} eq 'BET') )
{
($$result[0], $$result[1]) = ($$result[1], $$result[0]);
}
elsif ( ($$result[0]{flag} eq 'TO') && ($$result[1]{flag} eq 'FROM') )
{
($$result[0], $$result[1]) = ($$result[1], $$result[0]);
}
}
# Reset the calendar. Note: The 'next' above skips this statement.
$calendar = $self -> clean_calendar;
}
}
return $result;
} # End of process_ambiguous.
# --------------------------------------------------
sub process_unambiguous
{
my($self) = @_;
my($calendar) = $self -> clean_calendar;
my($result) = [];
my($value) = $self -> recce -> value;
$value = $self -> decode_result($$value);
if ($#$value == 0)
{
$value = $$value[0];
if ($$value{type} =~ /^(?:$calendar|Phrase)$/)
{
$$result[0] = $value;
}
else
{
$result = [$value];
}
}
elsif ($#$value == 2)
{
$result = [$$value[0], $$value[1] ];
}
elsif ($#$value == 3)
{
$result = [$$value[1], $$value[3] ];
}
elsif ($$value[0]{kind} eq 'Calendar')
{
$calendar = $$value[0]{type};
if ($calendar eq $$value[1]{type})
{
$result = [$$value[1] ];
}
}
elsif ( ($$value[0]{type} eq $calendar) && ($$value[1]{type} eq $calendar) )
{
$result = $value;
}
return $result;
} # End of process_unambiguous.
# --------------------------------------------------
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
empty.
Returns a date string (or the empty string) normalized in various ways:
=over 4
=item o If Gregorian (in any form) was in the original string, it is discarded
This is done because it's the default.
=item o If any other calendar escape was in the original string, it is preserved
And it's output in all caps.
And as a special case, 'FRENCHR' is returned as 'FRENCH R'.
=item o If About, etc were in the orginal string, they are discarded
This means the C<flag> key in the hashref is ignored.
=back
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
=head1 Extensions to the Gedcom specification
This chapter lists exactly how this code differs from the Gedcom spec.
=over 4
=item o Input may be in Unicode
=item o Input may be in any case
=item o Input may omit calendar escapes when the date is unambigous
=item o Any of the following tokens may be used
=over 4
=item o abt, about, circa
=item o aft, after
=item o and
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
This means missing values (day, month, bce) are never fabricated. These keys only appear in the
hashref if such a token was found in the input.
Keys:
=over 4
=item o bce
If the input contains any (case-insensitive) BCE indicator, under any calendar escape, the C<bce>
key will hold the exact indicator.
=item o canonical => $string
L</parse([%args])> calls L</canonical_date($hashref)> to populate this key.
=item o day => $integer
If the input contains a day, then the C<day> key will be present.
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
=back
=item o kind => 'Date' or 'Phrase'
The C<kind> key is always present, and always takes the value 'Date' or 'Phrase'.
If the value is 'Phrase', see the C<phrase> and C<type> keys.
During processing, there can be another - undocumented - element in the arrayref. It represents
the calendar escape, and in that case C<kind> takes the value 'Calendar'. This element is discarded
before the final arrayref is returned to the caller.
=item o month => $string
If the input contains a month, then the C<month> key will be present. The case of $string will be
exactly whatever was in the input.
=item o phrase => "($string)"
If the input contains a date phrase, then the C<phrase> key will be present. The case of $string
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
=item o year => $integer
If the input contains a year, then the C<year> key is present.
If the year contains a suffix (/00), see also the C<suffix> key, above. This means the value of
the C<year> key is never "$integer/$two_digits".
=back
=head2 When should I use a calendar escape?
=over 4
=item o In theory, for every non-Gregorian date
In practice, if the month name is unique to a specific language, then the escape is not needed,
since L<Marpa::R2> and this code automatically handle ambiguity.
Likewise, if you use a Gregorian year in the form 1700/01, then the calendar escape is obvious.
The escape is, of course, always inserted into the values returned by the C<canonical> pair of
methods when they process non-Gregorian dates. That makes their output compatible with
other software. And no matter what case you use specifying the calendar escape, it is always
output in upper-case.
=item o When you wish to force the code to provide an unambiguous result
All Gregorian and Julian dates are ambiguous, unless they use the year format 1700/01.
So, to resolve the ambiguity, add the calendar escape.
=back
=head2 Why is '@' escaped with '\' when L<Data::Dumper::Concise>'s C<Dumper()> prints things?
That's just how that module handles '@'.
=head2 Does this module accept Unicode?
Yes.
See t/German.t for sample code.
=head2 Can I change the default calendar?
No. It is always Gregorian.
=head2 Are dates massaged before being processed?
Yes. Commas are replaced by spaces.
=head2 French month names
See L</Extensions to the Gedcom specification>.
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
Consider the possibility that the parse ends without a C<successful> parse, but the input is the
prefix of some input that C<can> lead to a successful parse.
Marpa is not reporting a problem during the read(), because you can add more to the input string,
and Marpa does not know that you do not plan to do this.
=item o You tried to enter the German month name 'Mär' via the shell
Read more about this by running 'perl scripts/parse.pl -h', where it discusses '-d'.
=item o You mistyped the calendar escape
Check: Are any of these valid?
=over 4
=item o @#FRENCH@
=item o @#JULIAN@
=item o @#djulian
lib/Genealogy/Gedcom/Date.pm view on Meta::CPAN
=item o Julian
=item o @#dJULIAN@
=back
Yes, the last 3 are accepted by this module, and the last one is accepted by other software.
=item o The date is in American format (month day year)
=item o You used a Julian calendar with a Gregorian year
Dates - such as 1900/01 - which do not fit the Gedcom definition of a Julian year, are filtered
out.
=back
=head1 See Also
L<File::Bom::Utils>.
lib/Genealogy/Gedcom/Date/Actions.pm view on Meta::CPAN
package Genealogy::Gedcom::Date::Actions;
use strict;
use warnings;
use Data::Dumper::Concise; # For Dumper().
our $calendar;
our $logger;
our $verbose = 0;
our $VERSION = '2.10';
# ------------------------------------------------
sub about_date
lib/Genealogy/Gedcom/Date/Actions.pm view on Meta::CPAN
$$t5{flag} = 'BET';
my($t6) = $$t4[1][0];
$$t6{flag} = 'AND';
if (ref $$t2[0] eq 'HASH')
{
$t1 = $$t2[0];
}
else
{
$t1 = {kind => 'Calendar', type => $calendar};
}
if (ref $$t4[0] eq 'HASH')
{
$t3 = $$t4[0];
}
else
{
$t3 = {kind => 'Calendar', type => $calendar};
}
$t1 = [$t1, $t5, $t3, $t6];
return $t1;
} # End of between_date.
# ------------------------------------------------
lib/Genealogy/Gedcom/Date/Actions.pm view on Meta::CPAN
my($t3) = $$t2[1];
$t3 = $$t3[0] if (ref $t3 eq 'ARRAY');
$$t3{flag} = 'CAL';
return [$$t2[0], $t3];
} # End of calculated_date.
# ------------------------------------------------
sub calendar_name
{
my($cache, $t1) = @_;
print STDERR '#=== calendar_name() action: ', Dumper($t1) if ($verbose);
$t1 =~ s/\@\#d(.+)\@/$1/; # Zap gobbledegook if present.
$t1 = ucfirst lc $t1;
return
{
kind => 'Calendar',
type => $t1,
};
} # End of calendar_name.
# ------------------------------------------------
sub date_phrase
{
my($cache, $t1) = @_;
print STDERR '#=== date_phrase() action: ', Dumper($t1) if ($verbose);
return
lib/Genealogy/Gedcom/Date/Actions.pm view on Meta::CPAN
{
my($cache, $t1, $t2) = @_;
print STDERR '#=== from_date() action: ', Dumper($t1), Dumper($t2) if ($verbose);
my($t3) = $$t2[0];
$t2 = $$t2[1];
$t2 = $$t2[0] if (ref $t2 eq 'ARRAY');
$$t2{flag} = 'FROM';
# Is there a calendar hash present?
if (ref $t3 eq 'HASH')
{
$t2 = [$t3, $t2];
}
return $t2;
} # End of from_date.
lib/Genealogy/Gedcom/Date/Actions.pm view on Meta::CPAN
{
my($cache, $t1, $t2) = @_;
print STDERR '#=== to_date() action: ', Dumper($t1), Dumper($t2) if ($verbose);
my($t3) = $$t2[0];
$t2 = $$t2[1];
$t2 = $$t2[0] if (ref $t2 eq 'ARRAY');
$$t2{flag} = 'TO';
# Is there a calendar hash present?
if (ref $t3 eq 'HASH')
{
$t2 = [$t3, $t2];
}
return $t2;
} # End of to_date.