Data-URIID

 view release on metacpan or  search on metacpan

lib/Data/URIID/Service.pm  view on Meta::CPAN

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: Extractor for identifiers from URIs

package Data::URIID::Service;

use v5.10;
use strict;
use warnings;

use Carp;
use JSON;
use URI;
use URI::Escape;
use Encode;
use Scalar::Util qw(weaken);
use List::Util qw(all);
use Data::Identifier::Generate v0.25;

use Data::URIID::Result;
use Data::URIID::Colour;

our $VERSION = v0.20;

use parent 'Data::URIID::Base';

my @musicbrainz_wikidata_relations = qw(P434 P435 P436 P966 P982 P1004 P1330 P1407 P4404 P5813 P6423 P8052);

my $config_wikidata = {
    type => 'wikidata-identifier',
    idmap => {
        P213   => 'isni',
        P214   => 'viaf-identifier',
        P227   => 'gnd-identifier',
        P356   => 'doi',
        P402   => 'osm-relation',
        P409   => 'libraries-australia-identifier',
        P535   => 'find-a-grave-identifier',
        P648   => 'open-library-identifier',
        P1256  => 'iconclass-identifier',
        P1315  => 'nla-trove-people-identifier',
        P1566  => 'geonames-identifier',
        P1651  => 'youtube-video-identifier',
        P2041  => 'ngv-artist-identifier',
        P2949  => 'wikitree-person-identifier',
        P3916  => 'unesco-thesaurus-identifier',
        P4684  => 'ngv-artwork-identifier',
        P6735  => 'tww-artist-identifier',
        P6804  => 'agsa-creator-identifier',
        P7033  => 'aev-identifier',
        P7704  => 'europeana-entity-identifier',
        P8168  => 'factgrid-identifier',
        P8406  => 'grove-art-online-identifier',
        P9575  => 'amc-artist-identifier',
        P10086 => 'a-p-and-p-artist-identifier',
        P10689 => 'osm-way',
        P10787 => 'factgrid-identifier',
        P11693 => 'osm-node',
        (map {$_ => 'musicbrainz-identifier'} @musicbrainz_wikidata_relations),
    },
    endpoint => {
        sparql      => 'https://query.wikidata.org/sparql',
        entitydata  => 'https://www.wikidata.org/wiki/Special:EntityData/%s.json?flavor=dump',
    },
    prefix => 'http://www.wikidata.org/entity/',
    uuid_relations => \@musicbrainz_wikidata_relations,
    special_ids => [
        {
            property => 'P1711',
            type => 'british-museum-term',
            to_service => sub {($_[0] =~ /^BIOG([1-9][0-9]+)$/)[0]},
            from_service => sub {sprintf('BIOG%u', $_[0])},
        },
    ],
    attributes => [
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub { return ($c->[1] => {'*' => $_[0]})},
                }} (
                [P487  => 'icon_text'],     # 'Unicode character'
                [P1163 => 'media_subtype'], # 'MIME type'
            )),
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
            }} (
                [P569 => 'date_of_birth'],
                [P570 => 'date_of_death'],
            )),
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {
                    my ($value, $config) = @_;
                    return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
                    return ();
                },
            }} (
                [P856 => 'website'], # 'official website'
            )),
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {
                    my ($value, $config) = @_;
                    return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
                    return ();
                },
            }} (
                [P21  => 'sex_or_gender'], # 'sex or gender'
                [P376 => 'space_object'],  # 'located on astronomical body'
            )),
        (map {my $c = $_; {
                property => $c->[0],
                list_value => sub {
                    my ($value, $config) = @_;
                    return ($c->[1] => [[URI->new($config->{prefix} . $value->{id})]]) if defined $value->{id};
                    return ();
                },
            }} (
                [P31  => 'roles'], # 'instance of'
            )),
        {   # 'sRGB colour hex triplet'
            property => 'P465',
            from_service => sub {
                my ($value) = @_;
                return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
                return ();
            },
        },
        {   # 'coordinate location'
            property => 'P625',
            from_service => \&_online_lookup__wikibase__from_service__coordinate,
        },
    ],
};

my $config_factgrid = {
    type => 'factgrid-identifier',
    idmap => {
        P76  => 'gnd-identifier',
        P378 => 'viaf-identifier',
        P980 => 'iconclass-identifier',
    },
    endpoint => {
        sparql      => 'https://database.factgrid.de/sparql',
        entitydata  => 'https://database.factgrid.de/wiki/Special:EntityData/%s.json?flavor=dump',
    },
    prefix => 'https://database.factgrid.de/entity/',
    attributes => [
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
            }} (
                [P38 => 'date_of_death'],
                [P77 => 'date_of_birth'],
            )),
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {
                    my ($value, $config) = @_;
                    return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
                    return ();
                },
            }} (
                [P156 => 'website'], # 'Online presence'
            )),
        (map {my $c = $_; {
                property => $c->[0],
                from_service => sub {
                    my ($value, $config) = @_;
                    return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
                    return ();
                },
            }} (
                [P154 => 'sex_or_gender'], # 'Gender'
                [P625 => 'sex_or_gender'], # 'Predominant gender usage'
            )),
        {   # 'Coordinate location'
            property => 'P48',
            from_service => \&_online_lookup__wikibase__from_service__coordinate,
        },
        {   # 'Hex color'
            property => 'P696',
            from_service => sub {
                my ($value) = @_;
                return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
                return ();
            },
        },
    ],
};

my @fellig_types = qw(fellig-identifier fellig-box-number uuid oid uri wikidata-identifier e621-post-identifier e621-pool-identifier wikimedia-commons-identifier british-museum-term musicbrainz-identifier gnd-identifier e621tagtype);

my %attrmap_osm = (
    name        => 'displayname',
    description => 'description',
);

my %attrmap_open_graph = (
    title       => 'displayname',
    description => 'description',
    image       => 'thumbnail',
);

lib/Data/URIID/Service.pm  view on Meta::CPAN


sub _online_lookup__wikidata {
    my ($self, $result) = @_;
    return _online_lookup__wikibase($self, $result, $config_wikidata);
}

sub _online_lookup__factgrid {
    my ($self, $result) = @_;
    return _online_lookup__wikibase($self, $result, $config_factgrid);
}

sub _online_lookup__wikibase {
    my ($self, $result, $config) = @_;
    my $id = eval {$result->id($config->{type})};

    unless (defined $id) {
        $id = $self->_online_lookup__wikibase__stage_0($result, $config);
    }

    if (defined $id) {
        return $self->_online_lookup__wikibase__stage_1($result, $id, $config);
    }

    return undef;
}

sub _online_lookup__wikibase__stage_0 {
    my ($self, $result, $config) = @_;
    my @ids;

    foreach my $property (keys %{$config->{idmap}}) {
        my $id = eval {$result->id($config->{idmap}{$property})};
        if (defined $id) {
            if ($id !~ /['"]/) {
                push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
            }
        }
    }

    foreach my $special (@{$config->{special_ids}}) {
        my $id = eval {$result->id($special->{type})};
        if (defined $id) {
            push(@ids, sprintf('?item wdt:%s "%s"', $special->{property}, $special->{to_service}->($id)));
        }
    }

    # UUID is special:
    {
        my $id = eval {$result->id('uuid')};
        if (defined $id) {
            foreach my $property (@{$config->{uuid_relations}}) {
                push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
            }
        }
    }

    return undef unless scalar @ids;

    {
        my $q = sprintf('SELECT * WHERE { { %s } } LIMIT 1', join('} UNION {', @ids));
        my $res = $self->_get_json($config->{endpoint}{sparql}, query => {format => 'json', query => $q});
        my $item = eval {$res->{results}{bindings}[0]{item}};
        return undef unless $item;
        return undef unless ($item->{type} // '') eq 'uri';
        if (($item->{value} // '') =~ m#^\Q$config->{prefix}\E([QP][1-9][0-9]*)$#) {
            return $1;
        }
    }

    return undef;
}

sub _online_lookup__wikibase__stage_1 {
    my ($self, $result, $id, $config) = @_;
    my %ids = ($config->{type} => $id);
    my %attr;
    my %res = (id => \%ids, attributes => \%attr);
    my $data = $self->_get_json(sprintf($config->{endpoint}{entitydata}, $id), local_override => ['%s.json', $id]);

    $data = $data->{entities}{$id};

    $attr{displayname} = {map {$_ => $data->{labels}{$_}{value}}       keys %{$data->{labels}}};
    $attr{description} = {map {$_ => $data->{descriptions}{$_}{value}} keys %{$data->{descriptions}}};

    $res{wikidata_sitelinks} = $data->{sitelinks};
    foreach my $property (keys %{$config->{idmap}}) {
        foreach my $entry (@{$data->{claims}{$property} // []}) {
            $ids{$config->{idmap}{$property}} = $entry->{mainsnak}{datavalue}{value};
        }
    }

    foreach my $special (@{$config->{special_ids}}) {
        foreach my $entry (@{$data->{claims}{$special->{property}} // []}) {
            $ids{$special->{type}} //= $special->{from_service}->($entry->{mainsnak}{datavalue}{value});
        }
    }

    foreach my $attribute (@{$config->{attributes}}) {
        foreach my $entry (@{$data->{claims}{$attribute->{property}} // []}) {
            if (defined $attribute->{from_service}) {
                my %res = $attribute->{from_service}->($entry->{mainsnak}{datavalue}{value}, $config);
                $attr{$_} //= $res{$_} foreach keys %res;
            } elsif (defined $attribute->{list_value}) {
                my %res = $attribute->{list_value}->($entry->{mainsnak}{datavalue}{value}, $config);
                foreach my $key (keys %res) {
                    $attr{$key} //= [];
                    push(@{$attr{$key}}, @{$res{$key}});
                }
            }
        }
    }

    return \%res;
}

sub _online_lookup__wikibase__from_service__datetime {
    my ($key, $value) = @_;
    my $precision = $value->{precision};

    #use Data::Dumper;
    #die Dumper $value;

    if ($precision >= 9) {
        require DateTime::Format::ISO8601;

        my $dt = DateTime::Format::ISO8601->parse_datetime($value->{time} =~ s/^\+//r =~ s/-00-00T/-01-01T/r =~ s/-00T/-01T/r);
        my $val;

        if ($precision == 9) {
            $val = $dt->year;
        } elsif ($precision == 10) {
            $val = sprintf('%.4u-%.2u', $dt->year, $dt->month);
        } else {
            $val = $dt->ymd;
        }

        return ($key => $val);
    }



( run in 1.365 second using v1.01-cache-2.11-cpan-39bf76dae61 )