Data-URIID

 view release on metacpan or  search on metacpan

lib/Data/URIID/Service.pm  view on Meta::CPAN

# Copyright (c) 2023-2025 Philipp Schafft

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: Extractor for identifiers from URIs

package Data::URIID::Service;

use v5.10;
use strict;
use warnings;

use Carp;
use JSON;
use URI;
use URI::Escape;
use Encode;
use Scalar::Util qw(weaken);
use List::Util qw(all);
use Data::Identifier::Generate v0.25;

use Data::URIID::Result;
use Data::URIID::Colour;

our $VERSION = v0.20;

use parent 'Data::URIID::Base';

my @musicbrainz_wikidata_relations = qw(P434 P435 P436 P966 P982 P1004 P1330 P1407 P4404 P5813 P6423 P8052);

my $config_wikidata = {
    type => 'wikidata-identifier',
    idmap => {
        P213   => 'isni',
        P214   => 'viaf-identifier',
        P227   => 'gnd-identifier',
        P356   => 'doi',
        P402   => 'osm-relation',
        P409   => 'libraries-australia-identifier',
        P535   => 'find-a-grave-identifier',
        P648   => 'open-library-identifier',
        P1256  => 'iconclass-identifier',
        P1315  => 'nla-trove-people-identifier',
        P1566  => 'geonames-identifier',
        P1651  => 'youtube-video-identifier',
        P2041  => 'ngv-artist-identifier',
        P2949  => 'wikitree-person-identifier',
        P3916  => 'unesco-thesaurus-identifier',
        P4684  => 'ngv-artwork-identifier',
        P6735  => 'tww-artist-identifier',
        P6804  => 'agsa-creator-identifier',
        P7033  => 'aev-identifier',
        P7704  => 'europeana-entity-identifier',
        P8168  => 'factgrid-identifier',
        P8406  => 'grove-art-online-identifier',
        P9575  => 'amc-artist-identifier',
        P10086 => 'a-p-and-p-artist-identifier',
        P10689 => 'osm-way',
        P10787 => 'factgrid-identifier',
        P11693 => 'osm-node',
        (map {$_ => 'musicbrainz-identifier'} @musicbrainz_wikidata_relations),
    },
    endpoint => {
        sparql      => 'https://query.wikidata.org/sparql',
        entitydata  => 'https://www.wikidata.org/wiki/Special:EntityData/%s.json?flavor=dump',
    },
    prefix => 'http://www.wikidata.org/entity/',
    uuid_relations => \@musicbrainz_wikidata_relations,
    special_ids => [
        {
            property => 'P1711',
            type => 'british-museum-term',
            to_service => sub {($_[0] =~ /^BIOG([1-9][0-9]+)$/)[0]},
            from_service => sub {sprintf('BIOG%u', $_[0])},
        },
    ],
    attributes => [
        (map {my $c = $_; {

lib/Data/URIID/Service.pm  view on Meta::CPAN

                $e->{displayname} //= {};
                $e->{displayname}{'*'} //= $name;
            }

            $e->{displaycolour} //= {};
            $e->{displaycolour}{'*'} //= $colour_object;
            $e->{roles} = $colour_roles;

            if ($list != \@displaycolours) {
                push(@displaycolours, [undef, $colour_object->ise, $displaycolour]);
            }
        }
    }

    # Add an entry for each colour used.
    foreach my $type (keys %own_well_known) {
        foreach my $entry (values %{$own_well_known{$type}}) {
            my $dpca = $entry->{attributes}{displaycolour} // next;
            my $displaycolour = $dpca->{'*'} // next;
            my $e = ($own_well_known{uuid}{$displaycolour->ise} //= {})->{attributes} //= {};
            $e->{displaycolour} //= {};
            $e->{displaycolour}{'*'} //= $displaycolour;
        }
    }

    foreach my $language (keys %{$own_well_known{'language-tag-identifier'}}) {
        my $uuid = Data::Identifier::Generate->generic(namespace => '47dd950c-9089-4956-87c1-54c122533219', style => 'id-based', request => $language)->uuid;
        $own_well_known{uuid}{$uuid} = $own_well_known{'language-tag-identifier'}{$language};
    }
    # Mix and match entries by identifiers to speed up lookups.
    # This step must always be the last one.
    foreach my $id_type_outer (keys %own_well_known) {
        foreach my $src_id (keys %{$own_well_known{$id_type_outer}}) {
            my $src     = $own_well_known{$id_type_outer}{$src_id};
            my $s_ids   = $src->{ids} //= {};
            my $s_attrs = $src->{attributes} //= {};

            $s_ids->{$id_type_outer} = $src_id;

            foreach my $id_type_inner (keys %{$s_ids}) {
                my $dst = ($own_well_known{$id_type_inner} //= {})->{$s_ids->{$id_type_inner}} //= {};
                if ($src != $dst) {
                    my $d_ids   = $dst->{ids} //= {};
                    my $d_attrs = $dst->{attributes} //= {};

                    $s_ids->{$_}   //= $d_ids->{$_}   foreach keys %{$d_ids};
                    $s_attrs->{$_} //= $d_attrs->{$_} foreach keys %{$d_attrs};
                    $own_well_known{$id_type_inner}{$s_ids->{$id_type_inner}} = $src;
                }
            }
        }
    }

    return $res = \%own_well_known;
}


# Private method:
sub new {
    my ($pkg, %opts) = @_;
    weaken($opts{extractor});
    return bless \%opts, $pkg;
}

# Private helper:
sub _is_online {
    my ($self) = @_;
    return $self->online && $self->extractor->online;
}

# Private method:
sub _online_lookup {
    my ($self, $result, %opts) = @_;
    my $func;

    return undef unless $self->_is_online;
    $func = $self->can(sprintf('_online_lookup__%s', $self->name =~ tr/\.:\-/_/r));
    return undef unless $func;

    return $self->$func($result, %opts);
}

# Private method:
sub _offline_lookup {
    my ($self, $result, %opts) = @_;
    my $func;

    $func = $self->can(sprintf('_offline_lookup__%s', $self->name =~ tr/\.:\-/_/r));
    return undef unless $func;

    return $self->$func($result, %opts);
}


sub name {
    my ($self) = @_;
    return $self->{name} //= $self->extractor->ise_to_name(service => $self->ise);
}


sub online {
    my ($self, $new_value) = @_;

    if (scalar(@_) == 2) {
        $self->{online} = !!$new_value;
    }

    return $self->{online};
}


sub setting {
    my ($self, $setting, $new_value) = @_;

    $self->{setting} //= {};

    if (scalar(@_) == 3) {
        $self->{setting}{$setting} = $new_value;
    }

    return $self->{setting}{$setting};



( run in 0.484 second using v1.01-cache-2.11-cpan-39bf76dae61 )