Data-URIID
view release on metacpan or search on metacpan
lib/Data/URIID/Service.pm view on Meta::CPAN
# licensed under Artistic License 2.0 (see LICENSE file)
# ABSTRACT: Extractor for identifiers from URIs
package Data::URIID::Service;
use v5.10;
use strict;
use warnings;
use Carp;
use JSON;
use URI;
use URI::Escape;
use Encode;
use Scalar::Util qw(weaken);
use List::Util qw(all);
use Data::Identifier::Generate v0.25;
use Data::URIID::Result;
use Data::URIID::Colour;
our $VERSION = v0.20;
use parent 'Data::URIID::Base';
my @musicbrainz_wikidata_relations = qw(P434 P435 P436 P966 P982 P1004 P1330 P1407 P4404 P5813 P6423 P8052);
my $config_wikidata = {
type => 'wikidata-identifier',
idmap => {
P213 => 'isni',
P214 => 'viaf-identifier',
P227 => 'gnd-identifier',
P356 => 'doi',
P402 => 'osm-relation',
P409 => 'libraries-australia-identifier',
P535 => 'find-a-grave-identifier',
P648 => 'open-library-identifier',
P1256 => 'iconclass-identifier',
P1315 => 'nla-trove-people-identifier',
P1566 => 'geonames-identifier',
P1651 => 'youtube-video-identifier',
P2041 => 'ngv-artist-identifier',
P2949 => 'wikitree-person-identifier',
P3916 => 'unesco-thesaurus-identifier',
P4684 => 'ngv-artwork-identifier',
P6735 => 'tww-artist-identifier',
P6804 => 'agsa-creator-identifier',
P7033 => 'aev-identifier',
P7704 => 'europeana-entity-identifier',
P8168 => 'factgrid-identifier',
P8406 => 'grove-art-online-identifier',
P9575 => 'amc-artist-identifier',
P10086 => 'a-p-and-p-artist-identifier',
P10689 => 'osm-way',
P10787 => 'factgrid-identifier',
P11693 => 'osm-node',
(map {$_ => 'musicbrainz-identifier'} @musicbrainz_wikidata_relations),
},
endpoint => {
sparql => 'https://query.wikidata.org/sparql',
entitydata => 'https://www.wikidata.org/wiki/Special:EntityData/%s.json?flavor=dump',
},
prefix => 'http://www.wikidata.org/entity/',
uuid_relations => \@musicbrainz_wikidata_relations,
special_ids => [
{
property => 'P1711',
type => 'british-museum-term',
to_service => sub {($_[0] =~ /^BIOG([1-9][0-9]+)$/)[0]},
from_service => sub {sprintf('BIOG%u', $_[0])},
},
],
attributes => [
(map {my $c = $_; {
property => $c->[0],
from_service => sub { return ($c->[1] => {'*' => $_[0]})},
}} (
[P487 => 'icon_text'], # 'Unicode character'
[P1163 => 'media_subtype'], # 'MIME type'
)),
(map {my $c = $_; {
property => $c->[0],
from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
}} (
[P569 => 'date_of_birth'],
[P570 => 'date_of_death'],
)),
(map {my $c = $_; {
property => $c->[0],
from_service => sub {
my ($value, $config) = @_;
return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
return ();
},
}} (
[P856 => 'website'], # 'official website'
)),
(map {my $c = $_; {
property => $c->[0],
from_service => sub {
my ($value, $config) = @_;
return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
return ();
},
}} (
[P21 => 'sex_or_gender'], # 'sex or gender'
[P376 => 'space_object'], # 'located on astronomical body'
)),
(map {my $c = $_; {
property => $c->[0],
list_value => sub {
my ($value, $config) = @_;
return ($c->[1] => [[URI->new($config->{prefix} . $value->{id})]]) if defined $value->{id};
return ();
},
}} (
[P31 => 'roles'], # 'instance of'
)),
{ # 'sRGB colour hex triplet'
property => 'P465',
from_service => sub {
my ($value) = @_;
return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
return ();
},
},
{ # 'coordinate location'
property => 'P625',
from_service => \&_online_lookup__wikibase__from_service__coordinate,
},
],
};
my $config_factgrid = {
type => 'factgrid-identifier',
idmap => {
P76 => 'gnd-identifier',
P378 => 'viaf-identifier',
P980 => 'iconclass-identifier',
},
endpoint => {
sparql => 'https://database.factgrid.de/sparql',
entitydata => 'https://database.factgrid.de/wiki/Special:EntityData/%s.json?flavor=dump',
},
prefix => 'https://database.factgrid.de/entity/',
attributes => [
(map {my $c = $_; {
property => $c->[0],
from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
}} (
[P38 => 'date_of_death'],
[P77 => 'date_of_birth'],
)),
(map {my $c = $_; {
property => $c->[0],
from_service => sub {
my ($value, $config) = @_;
return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
return ();
},
}} (
[P156 => 'website'], # 'Online presence'
)),
(map {my $c = $_; {
property => $c->[0],
from_service => sub {
my ($value, $config) = @_;
return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
return ();
},
}} (
[P154 => 'sex_or_gender'], # 'Gender'
[P625 => 'sex_or_gender'], # 'Predominant gender usage'
)),
{ # 'Coordinate location'
property => 'P48',
from_service => \&_online_lookup__wikibase__from_service__coordinate,
},
{ # 'Hex color'
property => 'P696',
from_service => sub {
my ($value) = @_;
return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
return ();
},
},
],
};
my @fellig_types = qw(fellig-identifier fellig-box-number uuid oid uri wikidata-identifier e621-post-identifier e621-pool-identifier wikimedia-commons-identifier british-museum-term musicbrainz-identifier gnd-identifier e621tagtype);
my %attrmap_osm = (
name => 'displayname',
description => 'description',
);
my %attrmap_open_graph = (
title => 'displayname',
description => 'description',
image => 'thumbnail',
);
lib/Data/URIID/Service.pm view on Meta::CPAN
sub _online_lookup__wikidata {
my ($self, $result) = @_;
return _online_lookup__wikibase($self, $result, $config_wikidata);
}
sub _online_lookup__factgrid {
my ($self, $result) = @_;
return _online_lookup__wikibase($self, $result, $config_factgrid);
}
sub _online_lookup__wikibase {
my ($self, $result, $config) = @_;
my $id = eval {$result->id($config->{type})};
unless (defined $id) {
$id = $self->_online_lookup__wikibase__stage_0($result, $config);
}
if (defined $id) {
return $self->_online_lookup__wikibase__stage_1($result, $id, $config);
}
return undef;
}
sub _online_lookup__wikibase__stage_0 {
my ($self, $result, $config) = @_;
my @ids;
foreach my $property (keys %{$config->{idmap}}) {
my $id = eval {$result->id($config->{idmap}{$property})};
if (defined $id) {
if ($id !~ /['"]/) {
push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
}
}
}
foreach my $special (@{$config->{special_ids}}) {
my $id = eval {$result->id($special->{type})};
if (defined $id) {
push(@ids, sprintf('?item wdt:%s "%s"', $special->{property}, $special->{to_service}->($id)));
}
}
# UUID is special:
{
my $id = eval {$result->id('uuid')};
if (defined $id) {
foreach my $property (@{$config->{uuid_relations}}) {
push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
}
}
}
return undef unless scalar @ids;
{
my $q = sprintf('SELECT * WHERE { { %s } } LIMIT 1', join('} UNION {', @ids));
my $res = $self->_get_json($config->{endpoint}{sparql}, query => {format => 'json', query => $q});
my $item = eval {$res->{results}{bindings}[0]{item}};
return undef unless $item;
return undef unless ($item->{type} // '') eq 'uri';
if (($item->{value} // '') =~ m#^\Q$config->{prefix}\E([QP][1-9][0-9]*)$#) {
return $1;
}
}
return undef;
}
sub _online_lookup__wikibase__stage_1 {
my ($self, $result, $id, $config) = @_;
my %ids = ($config->{type} => $id);
my %attr;
my %res = (id => \%ids, attributes => \%attr);
my $data = $self->_get_json(sprintf($config->{endpoint}{entitydata}, $id), local_override => ['%s.json', $id]);
$data = $data->{entities}{$id};
$attr{displayname} = {map {$_ => $data->{labels}{$_}{value}} keys %{$data->{labels}}};
$attr{description} = {map {$_ => $data->{descriptions}{$_}{value}} keys %{$data->{descriptions}}};
$res{wikidata_sitelinks} = $data->{sitelinks};
foreach my $property (keys %{$config->{idmap}}) {
foreach my $entry (@{$data->{claims}{$property} // []}) {
$ids{$config->{idmap}{$property}} = $entry->{mainsnak}{datavalue}{value};
}
}
foreach my $special (@{$config->{special_ids}}) {
foreach my $entry (@{$data->{claims}{$special->{property}} // []}) {
$ids{$special->{type}} //= $special->{from_service}->($entry->{mainsnak}{datavalue}{value});
}
}
foreach my $attribute (@{$config->{attributes}}) {
foreach my $entry (@{$data->{claims}{$attribute->{property}} // []}) {
if (defined $attribute->{from_service}) {
my %res = $attribute->{from_service}->($entry->{mainsnak}{datavalue}{value}, $config);
$attr{$_} //= $res{$_} foreach keys %res;
} elsif (defined $attribute->{list_value}) {
my %res = $attribute->{list_value}->($entry->{mainsnak}{datavalue}{value}, $config);
foreach my $key (keys %res) {
$attr{$key} //= [];
push(@{$attr{$key}}, @{$res{$key}});
}
}
}
}
return \%res;
}
sub _online_lookup__wikibase__from_service__datetime {
my ($key, $value) = @_;
my $precision = $value->{precision};
#use Data::Dumper;
#die Dumper $value;
if ($precision >= 9) {
require DateTime::Format::ISO8601;
my $dt = DateTime::Format::ISO8601->parse_datetime($value->{time} =~ s/^\+//r =~ s/-00-00T/-01-01T/r =~ s/-00T/-01T/r);
my $val;
if ($precision == 9) {
$val = $dt->year;
} elsif ($precision == 10) {
$val = sprintf('%.4u-%.2u', $dt->year, $dt->month);
} else {
$val = $dt->ymd;
}
return ($key => $val);
}
( run in 1.365 second using v1.01-cache-2.11-cpan-39bf76dae61 )