Genealogy-Gedcom-Date

 view release on metacpan or  search on metacpan

Changelog.ini  view on Meta::CPAN

- Add 'Int $date ($text)' tests to French.t, German.t and Hebrew.t.
EOT

[V 2.00]
Date=2015-10-13T20:43:00
Comments= <<EOT
- Re-write using Marpa::R2, which uses the Gedcom grammar to define dates.
- Remove methods: parse_approximate_date(), parse_date_escape(), parse_datetime(),
parse_date_period(), parse_date_range(), parse_date_value(), parse_interpreted_date(),
debug(), method_index(), months_in_gregorian() and style().
- Add methods: parse(), calendar(), error().
- See the FAQ for the format of the value returned by parse().
- See the FAQ for various extensions to the Gedcom grammar.
- Accept Unicode input.
- Add support for French and German dates.
- Accept date escapes with or without '@#d' ... '@'.
- Accept dates and escapes in any case.
- Remove all reference to DateTime.
- Remove code which supplied a default day and/or month when they were missing.
- Add scripts/synopsis.pl.
- Rewrite all tests. See t/English.t, etc.

Changes  view on Meta::CPAN

	- Fix code which was correct but accidentally so. This was used, $self -> recce -> read(\$date),
		instead of $self -> recce -> read(\$self -> date). Luckily, $date was correct, but after now
		removing commas differently, it wasn't.
	- Add 'Int $date ($text)' tests to French.t, German.t and Hebrew.t.

2.00  2015-10-13T20:43:00
	- Re-write using Marpa::R2, which uses the Gedcom grammar to define dates.
	- Remove methods: parse_approximate_date(), parse_date_escape(), parse_datetime(),
		parse_date_period(), parse_date_range(), parse_date_value(), parse_interpreted_date(),
		debug(), method_index(), months_in_gregorian() and style().
	- Add methods: parse(), calendar(), error().
	- See the FAQ for the format of the value returned by parse().
	- See the FAQ for various extensions to the Gedcom grammar.
	- Accept Unicode input.
	- Add support for French and German dates.
	- Accept date escapes with or without '@#d' ... '@'.
	- Accept dates and escapes in any case.
	- Remove all reference to DateTime.
	- Remove code which supplied a default day and/or month when they were missing.
	- Add scripts/synopsis.pl.
	- Rewrite all tests. See t/English.t, etc.

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

use Types::Standard qw/Any ArrayRef Bool Int HashRef Str/;

has bnf =>
(
	default  => sub{return ''},
	is       => 'rw',
	isa      => Str,
	required => 0,
);

has _calendar =>
(
	default  => sub{return 'Gregorian'},
	is       => 'rw',
	isa      => Str,
	required => 0,
);

has canonical =>
(
	default  => sub{return 0},

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

				minlevel       => $self -> minlevel,
				utf8           => 1,
			}
		);
	}

	# Initialize the action class via global variables - Yuk!
	# The point is that we don't create an action instance.
	# Marpa creates one but we can't get our hands on it.

	$Genealogy::Gedcom::Date::Actions::calendar = $self -> clean_calendar;
	$Genealogy::Gedcom::Date::Actions::logger   = $self -> logger;

	$self -> bnf
	(
<<'END_OF_GRAMMAR'

:default				::= action => [values]

lexeme default			= latm => 1		# Longest Acceptable Token Match.

# Rules, in top-down order (more-or-less).

:start					::= gedcom_date

gedcom_date				::= date
							| lds_ord_date

date					::= calendar_escape calendar_date

calendar_escape			::=
calendar_escape			::= calendar_name 					action => calendar_name		# ($t1)
							| ('@#d') calendar_name ('@')	action => calendar_name		#   "
							| ('@#D') calendar_name ('@')	action => calendar_name		#   "

calendar_date			::= gregorian_date					action => gregorian_date	# ($t1)
							| julian_date					action => julian_date		# ($t1)
							| french_date					action => french_date		# ($t1)
							| german_date					action => german_date		# ($t1)
							| hebrew_date					action => hebrew_date		# ($t1)

gregorian_date			::= day gregorian_month gregorian_year
							| gregorian_month gregorian_year
							| gregorian_year_bce
							| gregorian_year

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN


before					~ 'bef':i
							| 'before':i

between					~ 'bet':i
							| 'between':i

calculated				~ 'cal':i
							| 'calculated':i

calendar_name			~ 'french r':i
							| 'frenchr':i
							| 'german':i
							| 'gregorian':i
							| 'hebrew':i
							| 'julian':i

date_text				~ [^)\x{0a}\x{0b}\x{0c}\x{0d}]+

digit					~ [0-9]

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

		$date[$i] = $self -> canonical_date($$result[$i]);
		$date[$i] = $$result[$i]{flag} ? $date[$i] ? "$$result[$i]{flag} $date[$i]" : $$result[$i]{flag} : $date[$i];
	}

	return $date[1] ? "$date[0] $date[1]" : $date[0];

} # End of canonical_form.

# ------------------------------------------------

sub clean_calendar
{
	my($self)     = @_;
	my($calendar) = $self -> _calendar;
	$calendar     =~ s/\@\#d(.+)\@/$1/; # Zap gobbledegook if present.
	$calendar     = ucfirst lc $calendar;

	return $self -> _calendar($calendar);

} # End of clean_calendar.

# --------------------------------------------------

sub compare
{
	my($self, $other)	= @_;
	my($result_1)		= $self -> result;
	my($date_1)			= $self -> normalize_date($#$result_1 < 0 ? {} : $$result_1[0]);
	my($result_2)		= $other -> result;
	my($date_2)			= $self -> normalize_date($#$result_2 < 0 ? {} : $$result_2[0]);

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN


	return $result;

} # End of parse.

# --------------------------------------------------

sub process_ambiguous
{
	my($self)     = @_;
	my($calendar) = $self -> clean_calendar;
	my(%count)    =
	(
		AND  => 0,
		BET  => 0,
		FROM => 0,
		TO   => 0,
	);
	my($result) = [];

	my($item);

	while (my $value = $self -> recce -> value)
	{
		$value = $self -> decode_result($$value);

		for $item (@$value)
		{
			if ($$item{kind} eq 'Calendar')
			{
				$calendar = $$item{type};

				next;
			}

			if ($calendar eq $$item{type})
			{
				# We have to allow for the fact that when 'From .. To' or 'Between ... And'
				# are used, both dates are ambiguous, and we end up with double the number
				# of elements in the arrayref compared to what's expected.

				if (exists $$item{flag} && exists $count{$$item{flag} })
				{
					if ($count{$$item{flag} } == 0)
					{
						$count{$$item{flag} }++;

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

				if ( ($$result[0]{flag} eq 'AND') && ($$result[1]{flag} eq 'BET') )
				{
					($$result[0], $$result[1]) = ($$result[1], $$result[0]);
				}
				elsif ( ($$result[0]{flag} eq 'TO') && ($$result[1]{flag} eq 'FROM') )
				{
					($$result[0], $$result[1]) = ($$result[1], $$result[0]);
				}
			}

			# Reset the calendar. Note: The 'next' above skips this statement.

			$calendar = $self -> clean_calendar;
		}
	}

	return $result;

} # End of process_ambiguous.

# --------------------------------------------------

sub process_unambiguous
{
	my($self)     = @_;
	my($calendar) = $self -> clean_calendar;
	my($result)   = [];
	my($value)    = $self -> recce -> value;
	$value        = $self -> decode_result($$value);

	if ($#$value == 0)
	{
		$value = $$value[0];

		if ($$value{type} =~ /^(?:$calendar|Phrase)$/)
		{
			$$result[0] = $value;
		}
		else
		{
			$result = [$value];
		}
	}
	elsif ($#$value == 2)
	{
		$result = [$$value[0], $$value[1] ];
	}
	elsif ($#$value == 3)
	{
		$result = [$$value[1], $$value[3] ];
	}
	elsif ($$value[0]{kind} eq 'Calendar')
	{
		$calendar = $$value[0]{type};

		if ($calendar eq $$value[1]{type})
		{
			$result = [$$value[1] ];
		}
	}
	elsif ( ($$value[0]{type} eq $calendar) && ($$value[1]{type} eq $calendar) )
	{
		$result = $value;
	}

	return $result;

} # End of process_unambiguous.

# --------------------------------------------------

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

empty.

Returns a date string (or the empty string) normalized in various ways:

=over 4

=item o If Gregorian (in any form) was in the original string, it is discarded

This is done because it's the default.

=item o If any other calendar escape was in the original string, it is preserved

And it's output in all caps.

And as a special case, 'FRENCHR' is returned as 'FRENCH R'.

=item o If About, etc were in the orginal string, they are discarded

This means the C<flag> key in the hashref is ignored.

=back

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

=head1 Extensions to the Gedcom specification

This chapter lists exactly how this code differs from the Gedcom spec.

=over 4

=item o Input may be in Unicode

=item o Input may be in any case

=item o Input may omit calendar escapes when the date is unambigous

=item o Any of the following tokens may be used

=over 4

=item o abt, about, circa

=item o aft, after

=item o and

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN


This means missing values (day, month, bce) are never fabricated. These keys only appear in the
hashref if such a token was found in the input.

Keys:

=over 4

=item o bce

If the input contains any (case-insensitive) BCE indicator, under any calendar escape, the C<bce>
key will hold the exact indicator.

=item o canonical => $string

L</parse([%args])> calls L</canonical_date($hashref)> to populate this key.

=item o day => $integer

If the input contains a day, then the C<day> key will be present.

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN


=back

=item o kind => 'Date' or 'Phrase'

The C<kind> key is always present, and always takes the value 'Date' or 'Phrase'.

If the value is 'Phrase', see the C<phrase> and C<type> keys.

During processing, there can be another - undocumented - element in the arrayref. It represents
the calendar escape, and in that case C<kind> takes the value 'Calendar'. This element is discarded
before the final arrayref is returned to the caller.

=item o month => $string

If the input contains a month, then the C<month> key will be present. The case of $string will be
exactly whatever was in the input.

=item o phrase => "($string)"

If the input contains a date phrase, then the C<phrase> key will be present. The case of $string

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN


=item o year => $integer

If the input contains a year, then the C<year> key is present.

If the year contains a suffix (/00), see also the C<suffix> key, above. This means the value of
the C<year> key is never "$integer/$two_digits".

=back

=head2 When should I use a calendar escape?

=over 4

=item o In theory, for every non-Gregorian date

In practice, if the month name is unique to a specific language, then the escape is not needed,
since L<Marpa::R2> and this code automatically handle ambiguity.

Likewise, if you use a Gregorian year in the form 1700/01, then the calendar escape is obvious.

The escape is, of course, always inserted into the values returned by the C<canonical> pair of
methods when they process non-Gregorian dates. That makes their output compatible with
other software. And no matter what case you use specifying the calendar escape, it is always
output in upper-case.

=item o When you wish to force the code to provide an unambiguous result

All Gregorian and Julian dates are ambiguous, unless they use the year format 1700/01.

So, to resolve the ambiguity, add the calendar escape.

=back

=head2 Why is '@' escaped with '\' when L<Data::Dumper::Concise>'s C<Dumper()> prints things?

That's just how that module handles '@'.

=head2 Does this module accept Unicode?

Yes.

See t/German.t for sample code.

=head2 Can I change the default calendar?

No. It is always Gregorian.

=head2 Are dates massaged before being processed?

Yes. Commas are replaced by spaces.

=head2 French month names

See L</Extensions to the Gedcom specification>.

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

Consider the possibility that the parse ends without a C<successful> parse, but the input is the
prefix of some input that C<can> lead to a successful parse.

Marpa is not reporting a problem during the read(), because you can add more to the input string,
and Marpa does not know that you do not plan to do this.

=item o You tried to enter the German month name 'Mär' via the shell

Read more about this by running 'perl scripts/parse.pl -h', where it discusses '-d'.

=item o You mistyped the calendar escape

Check: Are any of these valid?

=over 4

=item o @#FRENCH@

=item o @#JULIAN@

=item o @#djulian

lib/Genealogy/Gedcom/Date.pm  view on Meta::CPAN

=item o Julian

=item o @#dJULIAN@

=back

Yes, the last 3 are accepted by this module, and the last one is accepted by other software.

=item o The date is in American format (month day year)

=item o You used a Julian calendar with a Gregorian year

Dates - such as 1900/01 - which do not fit the Gedcom definition of a Julian year, are filtered
out.

=back

=head1 See Also

L<File::Bom::Utils>.

lib/Genealogy/Gedcom/Date/Actions.pm  view on Meta::CPAN

package Genealogy::Gedcom::Date::Actions;

use strict;
use warnings;

use Data::Dumper::Concise; # For Dumper().

our $calendar;

our $logger;

our $verbose = 0;

our $VERSION = '2.10';

# ------------------------------------------------

sub about_date

lib/Genealogy/Gedcom/Date/Actions.pm  view on Meta::CPAN

	$$t5{flag} = 'BET';
	my($t6)    = $$t4[1][0];
	$$t6{flag} = 'AND';

	if (ref $$t2[0] eq 'HASH')
	{
		$t1 = $$t2[0];
	}
	else
	{
		$t1 = {kind => 'Calendar', type => $calendar};
	}

	if (ref $$t4[0] eq 'HASH')
	{
		$t3 = $$t4[0];
	}
	else
	{
		$t3 = {kind => 'Calendar', type => $calendar};
	}

	$t1 = [$t1, $t5, $t3, $t6];

	return $t1;

} # End of between_date.

# ------------------------------------------------

lib/Genealogy/Gedcom/Date/Actions.pm  view on Meta::CPAN

	my($t3)    = $$t2[1];
	$t3        = $$t3[0] if (ref $t3 eq 'ARRAY');
	$$t3{flag} = 'CAL';

	return [$$t2[0], $t3];

} # End of calculated_date.

# ------------------------------------------------

sub calendar_name
{
	my($cache, $t1) = @_;

	print STDERR '#=== calendar_name() action: ', Dumper($t1) if ($verbose);

	$t1 =~ s/\@\#d(.+)\@/$1/; # Zap gobbledegook if present.
	$t1 = ucfirst lc $t1;

	return
	{
		kind => 'Calendar',
		type => $t1,
	};

} # End of calendar_name.

# ------------------------------------------------

sub date_phrase
{
	my($cache, $t1) = @_;

	print STDERR '#=== date_phrase() action: ', Dumper($t1) if ($verbose);

	return

lib/Genealogy/Gedcom/Date/Actions.pm  view on Meta::CPAN

{
	my($cache, $t1, $t2) = @_;

	print STDERR '#=== from_date() action: ', Dumper($t1), Dumper($t2) if ($verbose);

	my($t3)    = $$t2[0];
	$t2        = $$t2[1];
	$t2        = $$t2[0] if (ref $t2 eq 'ARRAY');
	$$t2{flag} = 'FROM';

	# Is there a calendar hash present?

	if (ref $t3 eq 'HASH')
	{
		$t2 = [$t3, $t2];
	}

	return $t2;

} # End of from_date.

lib/Genealogy/Gedcom/Date/Actions.pm  view on Meta::CPAN

{
	my($cache, $t1, $t2) = @_;

	print STDERR '#=== to_date() action: ', Dumper($t1), Dumper($t2) if ($verbose);

	my($t3)    = $$t2[0];
	$t2        = $$t2[1];
	$t2        = $$t2[0] if (ref $t2 eq 'ARRAY');
	$$t2{flag} = 'TO';

	# Is there a calendar hash present?

	if (ref $t3 eq 'HASH')
	{
		$t2 = [$t3, $t2];
	}

	return $t2;

} # End of to_date.



( run in 0.683 second using v1.01-cache-2.11-cpan-5dc5da66d9d )