DateTime-Format-Genealogy
view release on metacpan or search on metacpan
lib/DateTime/Format/Genealogy.pm view on Meta::CPAN
C<DateTime::Format::Genealogy> is a Perl module designed to parse genealogy-style date formats and convert them into L<DateTime> objects.
It uses L<Genealogy::Gedcom::Date> to parse dates commonly found in genealogical records while also handling date ranges and approximate dates.
use DateTime::Format::Genealogy;
my $dtg = DateTime::Format::Genealogy->new();
# ...
=head1 SUBROUTINES/METHODS
=head2 new
Creates a DateTime::Format::Genealogy object.
=cut
sub new
{
my $class = shift;
# Handle hash or hashref arguments
my %args;
if((@_ == 1) && (ref $_[0] eq 'HASH')) {
# If the first argument is a hash reference, dereference it
%args = %{$_[0]};
} elsif((@_ % 2) == 0) {
# If there is an even number of arguments, treat them as key-value pairs
%args = @_;
} else {
# If there is an odd number of arguments, treat it as an error
carp(__PACKAGE__, ': Invalid arguments passed to new()');
return;
}
if(!defined($class)) {
# FIXME: this only works when no arguments are given
$class = __PACKAGE__;
} elsif(Scalar::Util::blessed($class)) {
# If $class is an object, clone it with new arguments
return bless { %{$class}, %args }, ref($class);
}
# Return the blessed object
return bless { %args }, $class;
}
=head2 parse_datetime($string)
Given a date,
runs it through L<Genealogy::Gedcom::Date> to create a L<DateTime> object.
If a date range is given, return a two-element array in array context, or undef in scalar context
Returns undef if the date can't be parsed,
is before AD100,
is just a year or,
if it is an approximate date starting with "c", "ca" or "abt".
Can be called as a class or object method.
my $dt = DateTime::Format::Genealogy->new()->parse_datetime('25 Dec 2022');
Recognizes GEDCOM calendar escapes such as @#DJULIAN@, @#DHEBREW@, and @#DFRENCH R@,
converting them to DateTime objects when the appropriate calendar modules are installed.
Mandatory arguments:
=over 4
=item * C<date>
The date to be parsed.
=back
Optional arguments:
=over 4
=item * C<quiet>
Set to fail silently if there is an error with the date.
=item * C<strict>
More strictly enforce the Gedcom standard,
for example,
don't allow long month names.
=back
=cut
sub parse_datetime {
my $self = shift;
if(!ref($self)) {
if(scalar(@_)) {
return(__PACKAGE__->new()->parse_datetime(@_));
}
return(__PACKAGE__->new()->parse_datetime($self));
} elsif(ref($self) eq 'HASH') {
return(__PACKAGE__->new()->parse_datetime($self));
}
my $params = Params::Get::get_params('date', @_);
if((!ref($params->{'date'})) && (my $date = $params->{'date'})) {
my $quiet = $params->{'quiet'};
# Detect GEDCOM calendar escape
my $calendar_type = 'DGREGORIAN';
if ($date =~ s/^@#D([A-Z ]+?)@\s*//) {
$calendar_type = 'D' . uc($1); # normalise
}
# TODO: Needs much more sanity checking
if(($date =~ /^bef\s/i) || ($date =~ /^aft\s/i) || ($date =~ /^abt\s/i)) {
Carp::carp("$date is invalid, need an exact date to create a DateTime")
unless($quiet);
return;
}
if($date =~ /^31\s+Nov/) {
Carp::carp("$date is invalid, there are only 30 days in November");
return;
}
if($date =~ /^\s*(.+\d\d)\s*\-\s*(.+\d\d)\s*$/) {
if($date =~ /^(\d{4})\-(\d{2})\-(\d{2})$/) {
my $month = ucfirst($short_month_names[$2 - 1]);
Carp::carp("Changing date '$date' to '$3 $month $1'") unless($quiet);
$date = "$3 $month $1";
} else {
Carp::carp("Changing date '$date' to 'bet $1 and $2'") unless($quiet);
$date = "bet $1 and $2";
}
}
if($date =~ /^bet (.+) and (.+)/i) {
if(wantarray) {
return $self->parse_datetime($1), $self->parse_datetime($2);
}
return;
}
my $strict = $params->{'strict'};
if((!$strict) && ($date =~ /^from (.+) to (.+)/i)) {
if(wantarray) {
return $self->parse_datetime($1), $self->parse_datetime($2);
}
return;
}
if($date !~ /^\d{3,4}$/) {
if($strict) {
if($date !~ /^(\d{1,2})\s+([A-Z]{3})\s+(\d{3,4})$/i) {
Carp::carp("Unparseable date $date - often because the month name isn't 3 letters") unless($quiet);
return;
}
} elsif($date =~ /^(\d{1,2})\s+([A-Z]{4,}+)\.?\s+(\d{3,4})$/i) {
# FIXME: Doesn't include sept
# if(my $abbrev = $month_names_to_short{lc($2)}) {
# $abbrev = ucfirst($abbrev);
if(my $abbrev = $months{ucfirst(lc($2))}) {
$date = "$1 $abbrev $3";
} elsif($2 eq 'Janv') {
# I've seen a tree that uses some French months
$date = "$1 Jan $3";
} elsif($2 eq 'Juli') {
$date = "$1 Jul $3";
} else {
Carp::carp("Unparseable date $date - often because the month name isn't 3 letters") unless($quiet);
return;
}
} elsif($date =~ /^(\d{1,2})\s+Mai\s+(\d{3,4})$/i) {
# I've seen a tree that uses some French months
$date = "$1 May $2";
} elsif($date =~ /^(\d{1,2})\s+Août\s+(\d{3,4})$/i) {
# I've seen a tree that uses some French months
$date = "$1 Aug $2";
} elsif($date =~ /^(\d{1,2})\-([A-Z]{3})\-(\d{3,4})$/i) {
# 29-Aug-1938
$date = "$1 $2 $3";
}
my $dfn = $self->{'dfn'};
if(!defined($dfn)) {
$self->{'dfn'} = $dfn = DateTime::Format::Natural->new();
}
if(($date =~ /^\d/) && (my $d = $self->_date_parser_cached($date))) {
# D:T:Natural doesn't seem to work before AD100
return if($date =~ /\s\d{1,2}$/);
my $rc = $dfn->parse_datetime($d->{'canonical'});
if($rc && $calendar_type ne 'DGREGORIAN') {
return _convert_calendar($rc, $calendar_type, $quiet);
}
return $rc;
}
if(($date !~ /^(Abt|ca?)/i) && ($date =~ /^[\w\s,]+$/)) {
# ACOM exports full month names and non-standard format dates e.g. U.S. format MMM, DD YYYY
# TODO: allow that when not in strict mode
if(my $rc = $dfn->parse_datetime($date)) {
if($dfn->success()) {
return $rc;
}
Carp::carp($dfn->error()) unless($quiet);
} else {
Carp::carp("Can't parse date '$date'") unless($quiet);
}
}
}
return; # undef
}
Carp::croak('Usage: ', __PACKAGE__, '::parse_datetime(date => $date)');
}
# Parse Gedcom format dates
# Genealogy::Gedcom::Date is expensive, so cache results
sub _date_parser_cached
{
my $self = shift;
my $params = Params::Get::get_params('date', @_);
my $date = $params->{'date'};
Carp::croak('Usage: _date_parser_cached(date => $date)') unless defined $date;
# Check and return if the date has already been parsed and cached
return $self->{'all_dates'}{$date} if exists $self->{'all_dates'}{$date};
# Initialize the date parser if not already set
my $date_parser = $self->{'date_parser'} ||= Genealogy::Gedcom::Date->new();
# Parse the date
my $parsed_date;
eval {
$parsed_date = $date_parser->parse(date => $date);
};
# Check for errors
if(my $error = $date_parser->error()) {
Carp::carp("$date: '$error'") unless $self->{'quiet'};
return;
}
# Cache and return the first parsed date if it's an array reference
if((ref($parsed_date) eq 'ARRAY') && @{$parsed_date}) {
return $self->{'all_dates'}{$date} = $parsed_date->[0];
}
return;
}
sub _convert_calendar {
my ($dt, $calendar_type, $quiet) = @_;
if($calendar_type eq 'DJULIAN') {
# In a Gedcom, DJULIAN refers to a date in the Julian calendar format, using the @#DJULIAN@ escape to indicate it
# Approximate historical offset
my $offset_days = _julian_to_gregorian_offset($dt->year);
return $dt->clone->add(days => $offset_days);
} elsif ($calendar_type eq 'DHEBREW') {
eval {
require DateTime::Calendar::Hebrew;
my $h = DateTime::Calendar::Hebrew->new(
year => $dt->year,
month => $dt->month,
day => $dt->day
);
return DateTime->from_object(object => $h);
};
Carp::carp("Hebrew calendar conversion failed: $@") if $@ && !$quiet;
} elsif ($calendar_type =~ /FRENCH R/) {
eval {
require DateTime::Calendar::FrenchRevolutionary;
my $f = DateTime::Calendar::FrenchRevolutionary->new(
year => $dt->year,
month => $dt->month,
day => $dt->day
);
return DateTime->from_object(object => $f);
};
Carp::carp("French Republican calendar conversion failed: $@") if $@ && !$quiet;
} else { # e.g DROMAN
Carp::carp("Calendar type $calendar_type not supported") unless $quiet;
}
return $dt;
}
sub _julian_to_gregorian_offset {
my $year = $_[0];
# The gap widened over centuries:
# 10 days from 5 Oct 1582 to 28 Feb 1700
# 11 days from 1 Mar 1700 to 28 Feb 1800
# 12 days from 1 Mar 1800 to 28 Feb 1900
# 13 days from 1 Mar 1900 onwards
return 10 if $year < 1700;
return 11 if $year < 1800;
return 12 if $year < 1900;
return 13;
}
1;
=head1 AUTHOR
Nigel Horne, C<< <njh at bandsman.co.uk> >>
=head1 BUGS
Please report any bugs or feature requests to the author.
This module is provided as-is without any warranty.
I can't get L<DateTime::Format::Natural> to work on dates before AD100,
so this module rejects dates that are that old.
=head1 SEE ALSO
L<Genealogy::Gedcom::Date> and
L<DateTime>
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc DateTime::Format::Genealogy
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=DateTime-Format-Genealogy>
=back
=head1 LICENSE AND COPYRIGHT
Copyright 2018-2025 Nigel Horne.
This program is released under the following licence: GPL2
( run in 0.638 second using v1.01-cache-2.11-cpan-ceb78f64989 )