Data-Validate-Sanctions

 view release on metacpan or  search on metacpan

lib/Data/Validate/Sanctions/Fetcher.pm  view on Meta::CPAN

    }

    # Detect new xmlResponse format vs legacy TaggedPDF-doc format
    if (exists $data->{'xmlResponse'}) {
        $data = xml2hash($raw_data, array => ['entry', 'field', 'section']);
        return _moha_xml_new($data);
    }

    return _moha_xml_legacy($raw_data, $data);
}

=head2 _moha_xml_new

Parses the new xmlResponse format from MOHA sanctions list.

=cut

sub _moha_xml_new {
    my $data    = shift;
    my $dataset = [];

    my $xml = $data->{'xmlResponse'} or die "Invalid MOHA xmlResponse format\n";

    my $publish_epoch = time();
    my $sections      = $xml->{'section'} // [];

    for my $section (@$sections) {
        my $entries = $section->{'entry'} // [];

        for my $entry (@$entries) {
            my $fields = $entry->{'field'} // [];

            my %f;
            for my $field (@$fields) {
                my $name  = $field->{'-name'} // '';
                my $value = $field->{'#text'} // '';
                # Normalize whitespace in field names (real XML has 
 newlines between column number and label)
                $name =~ s/\s+/ /g;
                $f{$name} = trim($value);
            }

            my $is_individual = exists $f{'(6) Date of Birth'};
            my $name          = $f{'(3) Name'} // '';
            next unless $name && $name ne '-';

            if ($is_individual) {
                my $dob_raw         = $f{'(6) Date of Birth'}               // '';
                my $pob             = $f{'(7) Place of Birth'}              // '';
                my $other_names_raw = $f{'(8) Other Names'}                 // '';
                my $nationality     = $f{'(9) Nationality'}                 // '';
                my $passport_raw    = $f{'(10) Passport Number'}            // '';
                my $id_number       = $f{'(11) Identification Card Number'} // '';

                my @dob;
                @dob = ($dob_raw =~ /(\d{1,2}\.\d{1,2}\.\d{4})/g) if $dob_raw && $dob_raw ne '-';

                my @other_names;
                push @other_names, $other_names_raw if $other_names_raw && $other_names_raw ne '-';

                my @passports;
                @passports = map { trim($_) } split m{/}, $passport_raw if $passport_raw && $passport_raw ne '-';

                my @ids;
                push @ids, $id_number if $id_number && $id_number ne '-';

                _process_sanction_entry(
                    $dataset,
                    names          => [$name, @other_names],
                    date_of_birth  => \@dob,
                    place_of_birth => [$pob],
                    nationality    => [$nationality],
                    national_id    => \@ids,
                    passport_no    => \@passports,
                );
            } else {
                my $alias      = $f{'(4) Alias'}      // '';
                my $other_name = $f{'(5) Other Name'} // '';

                my @names_list = ($name);
                push @names_list, $alias      if $alias      && $alias ne '-';
                push @names_list, $other_name if $other_name && $other_name ne '-';

                _process_sanction_entry(
                    $dataset,
                    names          => \@names_list,
                    date_of_birth  => [],
                    place_of_birth => [],
                    nationality    => [],
                    national_id    => [],
                    passport_no    => [],
                );
            }
        }
    }

    return {
        updated => $publish_epoch,
        content => $dataset,
    };
}

=head2 _moha_xml_legacy

Parses the legacy TaggedPDF-doc XML format from MOHA sanctions list.

=cut

sub _moha_xml_legacy {
    my ($raw_data, $data) = @_;

    # Try to find the creation date
    my $publish_date;

    # Check if it's a standalone tag at the beginning
    if (exists $data->{'xmp:CreateDate'}) {
        $publish_date = $data->{'xmp:CreateDate'};
    }
    # Check if it's nested within the rdf:Description
    elsif (exists $data->{'TaggedPDF-doc'}
        && exists $data->{'TaggedPDF-doc'}{'x:xmpmeta'}
        && exists $data->{'TaggedPDF-doc'}{'x:xmpmeta'}{'rdf:RDF'}



( run in 1.471 second using v1.01-cache-2.11-cpan-71847e10f99 )