Data-URIID
view release on metacpan or search on metacpan
lib/Data/URIID/Service.pm view on Meta::CPAN
if (defined(my $displayname = $id->displayname(default => undef, no_defaults => 1))) {
$own_metadata{services}{$name}{displayname} = {'*' => $displayname};
}
}
# Private helper:
sub _extra_lookup_services {
return {
'wikidata' => [values(%{$config_wikidata->{idmap}}), qw(wikidata-identifier british-museum-term uuid)],
'wikimedia-commons' => [qw(wikimedia-commons-identifier)],
'fellig' => \@fellig_types,
'noembed.com' => [qw(youtube-video-identifier)],
'osm' => [qw(osm-node osm-way osm-relation)],
'overpass' => [qw(wikidata-identifier)],
'Data::URIID' => [
qw(uuid oid uri), # ISE,
keys %{_own_well_known()},
],
'Data::Identifier' => [
qw(uuid oid uri), # ISE,
qw(e621-post-identifier e621-pool-identifier e621tagtype e621tag), # e621
qw(danbooru2chanjp-post-identifier danbooru2chanjp-tag), # danbooru2chanjp
keys %{_own_well_known()},
],
'factgrid' => [values(%{$config_factgrid->{idmap}}), qw(factgrid-identifier)],
'doi' => [qw(doi)],
'iconclass' => ['iconclass-identifier'],
'xkcd' => ['xkcd-num'],
'e621' => ['e621-post-identifier', 'e621-pool-identifier'],
'furaffinity' => ['furaffinity-post-identifier'],
'imgur' => ['imgur-post-identifier'],
'notalwaysright' => ['notalwaysright-post-identifier'],
'ruthede' => ['ruthede-comic-post-identifier'],
'danbooru2chanjp' => ['danbooru2chanjp-post-identifier'],
}
}
sub _extra_lookup_services_digests {
return {
'e621' => ['md-5-128'],
};
}
# Private helper:
sub _get_html {
my ($self, $url, %opts) = @_;
if ($self->setting('network_deny')) {
return undef;
}
if (eval {require HTML::TreeBuilder::XPath; 1;}) {
my Data::URIID $extractor = $self->extractor;
if (defined(my $query = $opts{query})) {
$url = ref($url) ? $url->clone : URI->new($url);
$url->query_form($url->query_form, %{$query});
}
# We cannot use decoded_content()'s charset decoding here as it's buggy for JSON (and others?) response (at least in v6.18).
return eval {
my $msg = $extractor->_ua->get($url, 'Accept' => 'text/html');
return undef unless $msg->is_success;
my $val = $msg->decoded_content(ref => 1, charset => 'none');
my $r = HTML::TreeBuilder::XPath->new;
$r->parse(decode($msg->content_charset, $$val));
$r->eof;
$r;
};
} else {
return undef;
}
}
# Private helper:
sub _get_json {
my ($self, $url, %opts) = @_;
my Data::URIID $extractor = $self->extractor;
if ( $self->setting('network_deny') ) {
return undef;
}
if (defined(my $local_override = $opts{local_override})) {
if (defined(my $local_override_dir = $self->setting('local_override_dir'))) {
my ($path, @args) = @{$local_override};
if (all { defined } @args) {
my $data;
$path =~ s/%s/uri_escape_utf8(shift(@args))/ge;
$path = $local_override_dir.'/'.$path;
$data = $self->_get_json_file($path);
return $data if defined $data;
}
}
}
if (defined(my $query = $opts{query})) {
$url = ref($url) ? $url->clone : URI->new($url);
$url->query_form($url->query_form, %{$query});
}
# We cannot use decoded_content()'s charset decoding here as it's buggy for JSON response (at least in v6.18).
return eval {
my $msg = $extractor->_ua->get($url, 'Accept' => 'application/json');
return undef unless $msg->is_success;
my $val = $msg->decoded_content(ref => 1, charset => 'none');
from_json(decode($msg->content_charset, $$val));
};
}
# Private helper:
sub _get_json_file {
my ($self, $filename) = @_;
my $fh = eval { open(my $fh, '<', $filename) or die $!; $fh; } // eval { open(my $fh, '<:gzip', $filename.'.gz') or die $!; $fh; };
return undef unless $fh;
return eval {
local $/ = undef;
from_json(scalar <$fh>);
};
}
# Private helper:
sub _load_open_graph {
my ($self, $res, $html, $keys, $filters) = @_;
my $attr = $res->{attributes} //= {};
my %raw = map {$_->attr('property') => $_->attr('content')} $html->findnodes('/html/head/meta[@property]');
$filters //= {};
foreach my $key (@{$keys}) {
my $attrname = $attrmap_open_graph{$key} // croak 'BUG: Unknown key name: '.$key;
my $filter = $filters->{$key};
if (defined(my $value = $raw{'og:'.$key})) {
if (length($value)) {
if (defined $filter) {
next unless $value =~ $filter;
}
$attr->{$attrname} //= {};
$attr->{$attrname}{'*'} //= $value;
}
}
}
}
# Private helper:
sub _get_uriid_decompiled_types_json {
my ($self) = @_;
state $json = {types => {
'oid' => {alias_for => 'd08dc905-bbf6-4183-b219-67723c3c8374'},
'uri' => {alias_for => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439'},
'uuid' => {alias_for => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31'},
'wikidata-identifier' => {alias_for => 'ce7aae1e-a210-4214-926a-0ebca56d77e3'},
'gtin' => {alias_for => '82d529be-0f00-4b4f-a43f-4a22de5f5312'},
'sid' => {alias_for => 'f87a38cb-fd13-4e15-866c-e49901adbec5'},
}};
return state $decompiled = do {{
forward => $json,
backward => {map {$json->{types}{$_}{alias_for} => $_} grep {defined $json->{types}{$_}{alias_for}} keys %{$json->{types}}},
}};
}
# Private lookup drivers:
( run in 1.148 second using v1.01-cache-2.11-cpan-39bf76dae61 )