Data-Validate-Sanctions
view release on metacpan or search on metacpan
lib/Data/Validate/Sanctions/Fetcher.pm view on Meta::CPAN
}
# Detect new xmlResponse format vs legacy TaggedPDF-doc format
if (exists $data->{'xmlResponse'}) {
$data = xml2hash($raw_data, array => ['entry', 'field', 'section']);
return _moha_xml_new($data);
}
return _moha_xml_legacy($raw_data, $data);
}
=head2 _moha_xml_new
Parses the new xmlResponse format from MOHA sanctions list.
=cut
sub _moha_xml_new {
my $data = shift;
my $dataset = [];
my $xml = $data->{'xmlResponse'} or die "Invalid MOHA xmlResponse format\n";
my $publish_epoch = time();
my $sections = $xml->{'section'} // [];
for my $section (@$sections) {
my $entries = $section->{'entry'} // [];
for my $entry (@$entries) {
my $fields = $entry->{'field'} // [];
my %f;
for my $field (@$fields) {
my $name = $field->{'-name'} // '';
my $value = $field->{'#text'} // '';
# Normalize whitespace in field names (real XML has newlines between column number and label)
$name =~ s/\s+/ /g;
$f{$name} = trim($value);
}
my $is_individual = exists $f{'(6) Date of Birth'};
my $name = $f{'(3) Name'} // '';
next unless $name && $name ne '-';
if ($is_individual) {
my $dob_raw = $f{'(6) Date of Birth'} // '';
my $pob = $f{'(7) Place of Birth'} // '';
my $other_names_raw = $f{'(8) Other Names'} // '';
my $nationality = $f{'(9) Nationality'} // '';
my $passport_raw = $f{'(10) Passport Number'} // '';
my $id_number = $f{'(11) Identification Card Number'} // '';
my @dob;
@dob = ($dob_raw =~ /(\d{1,2}\.\d{1,2}\.\d{4})/g) if $dob_raw && $dob_raw ne '-';
my @other_names;
push @other_names, $other_names_raw if $other_names_raw && $other_names_raw ne '-';
my @passports;
@passports = map { trim($_) } split m{/}, $passport_raw if $passport_raw && $passport_raw ne '-';
my @ids;
push @ids, $id_number if $id_number && $id_number ne '-';
_process_sanction_entry(
$dataset,
names => [$name, @other_names],
date_of_birth => \@dob,
place_of_birth => [$pob],
nationality => [$nationality],
national_id => \@ids,
passport_no => \@passports,
);
} else {
my $alias = $f{'(4) Alias'} // '';
my $other_name = $f{'(5) Other Name'} // '';
my @names_list = ($name);
push @names_list, $alias if $alias && $alias ne '-';
push @names_list, $other_name if $other_name && $other_name ne '-';
_process_sanction_entry(
$dataset,
names => \@names_list,
date_of_birth => [],
place_of_birth => [],
nationality => [],
national_id => [],
passport_no => [],
);
}
}
}
return {
updated => $publish_epoch,
content => $dataset,
};
}
=head2 _moha_xml_legacy
Parses the legacy TaggedPDF-doc XML format from MOHA sanctions list.
=cut
sub _moha_xml_legacy {
my ($raw_data, $data) = @_;
# Try to find the creation date
my $publish_date;
# Check if it's a standalone tag at the beginning
if (exists $data->{'xmp:CreateDate'}) {
$publish_date = $data->{'xmp:CreateDate'};
}
# Check if it's nested within the rdf:Description
elsif (exists $data->{'TaggedPDF-doc'}
&& exists $data->{'TaggedPDF-doc'}{'x:xmpmeta'}
&& exists $data->{'TaggedPDF-doc'}{'x:xmpmeta'}{'rdf:RDF'}
( run in 1.471 second using v1.01-cache-2.11-cpan-71847e10f99 )