Data-Identifier
view release on metacpan or search on metacpan
lib/Data/Identifier.pm view on Meta::CPAN
#!/usr/bin/perl -w
# Copyright (c) 2023-2026 Philipp Schafft
# licensed under Artistic License 2.0 (see LICENSE file)
# ABSTRACT: format independent identifier object
package Data::Identifier;
use v5.20;
use strict;
use warnings;
use parent qw(Data::Identifier::Interface::Known Data::Identifier::Interface::Userdata);
use Carp;
use Math::BigInt lib => 'GMP';
use URI;
our $VERSION = v0.30;
use constant {
RE_UUID => qr/^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}\z/,
RE_OID => qr/^[0-2](?:\.(?:0|[1-9][0-9]*))+\z/,
RE_URI => qr/^[a-zA-Z][a-zA-Z0-9\+\.\-]+:/,
RE_UINT => qr/^(?:0|[1-9][0-9]*)\z/,
RE_SINT => qr/^(?:0|-?[1-9][0-9]*)\z/,
RE_QID => qr/^[QPL][1-9][0-9]*\z/,
RE_DOI => qr/^10\.[1-9][0-9]+(?:\.[0-9]+)*\/./,
RE_GTIN => qr/^[0-9]{8}(?:[0-9]{4,6})?\z/,
RE_UNICODE => qr/^U\+([0-9A-F]{4,7})\z/,
RE_SIMPLE_TAG => qr/^[^\p{upper case}\s]+\z/,
};
use constant {
WK_NULL => '00000000-0000-0000-0000-000000000000', # NULL, undef, ...
WK_UUID => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', # uuid
WK_OID => 'd08dc905-bbf6-4183-b219-67723c3c8374', # oid
WK_URI => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', # uri
WK_SID => 'f87a38cb-fd13-4e15-866c-e49901adbec5', # small-identifier
WK_WD => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', # wikidata-identifier
WK_GTIN => '82d529be-0f00-4b4f-a43f-4a22de5f5312', # gtin
WK_IBAN => 'b1418262-6bc9-459c-b4b0-a054d77db0ea', # iban
WK_BIC => 'c8a3a132-f160-473c-b5f3-26a748f37e62', # bic
WK_DOI => '931f155e-5a24-499b-9fbb-ed4efefe27fe', # doi
WK_FC => 'd576b9d1-47d4-43ae-b7ec-bbea1fe009ba', # factgrid-identifier
WK_UNICODE_CP => '5f167223-cc9c-4b2f-9928-9fe1b253b560', # unicode-code-point
WK_SNI => '039e0bb7-5dd3-40ee-a98c-596ff6cce405', # sirtx-numerical-identifier
WK_HDI => 'f8eb04ef-3b8a-402c-ad7c-1e6814cb1998', # host-defined-identifier
WK_UDI => '05af99f9-4578-4b79-aabe-946d8e6f5888', # user-defined-identifier
WK_CHAT0W => '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a', # chat-0-word-identifier
NS_WD => '9e10aca7-4a99-43ac-9368-6cbfa43636df', # Wikidata-namespace
NS_FC => '6491f7a9-0b29-4ef1-992c-3681cea18182', # factgrid-namespace
NS_INT => '5dd8ddbb-13a8-4d6c-9264-36e6dd6f9c99', # integer-namespace
NS_DATE => 'fc43fbba-b959-4882-b4c8-90a288b7d416', # gregorian-date-namespace
NS_GTIN => 'd95d8b1f-5091-4642-a6b0-a585313915f1', # gtin-namespace
NS_UNICODE_CP => '132aa723-a373-48bf-a88d-69f1e00f00cf', # unicode-character-namespace
};
# Features:
my $enabled_oid = 1;
my %uuid_to_uriid_org = (
WK_UUID() => 'uuid',
WK_OID() => 'oid',
WK_URI() => 'uri',
WK_SID() => 'sid',
WK_GTIN() => 'gtin',
WK_WD() => 'wikidata-identifier',
);
my %uuid_org_to_uuid = map {$uuid_to_uriid_org{$_} => $_} keys %uuid_to_uriid_org;
my $well_known_uuid = __PACKAGE__->new(ise => WK_UUID, validate => RE_UUID);
my %well_known = (
uuid => $well_known_uuid,
oid => __PACKAGE__->new($well_known_uuid => WK_OID, validate => RE_OID),
uri => __PACKAGE__->new($well_known_uuid => WK_URI, validate => RE_URI),
sid => __PACKAGE__->new($well_known_uuid => WK_SID, validate => RE_UINT),
sni => __PACKAGE__->new($well_known_uuid => WK_SNI, validate => RE_UINT),
wd => __PACKAGE__->new($well_known_uuid => WK_WD, validate => RE_QID, generate => 'id-based'),
fc => __PACKAGE__->new($well_known_uuid => WK_FC, validate => RE_QID, generate => 'id-based'),
gtin => __PACKAGE__->new($well_known_uuid => WK_GTIN, validate => RE_GTIN, generate => 'id-based'),
iban => __PACKAGE__->new($well_known_uuid => WK_IBAN),
bic => __PACKAGE__->new($well_known_uuid => WK_BIC),
doi => __PACKAGE__->new($well_known_uuid => WK_DOI, validate => RE_DOI),
# Unofficial, not part of public API:
# Also used by Data::Identifier::Util!
unicodecp => __PACKAGE__->new($well_known_uuid => WK_UNICODE_CP, validate => RE_UNICODE, generate => 'id-based'),
hdi => __PACKAGE__->new($well_known_uuid => WK_HDI, validate => RE_UINT),
udi => __PACKAGE__->new($well_known_uuid => WK_UDI, validate => RE_UINT),
null => __PACKAGE__->new($well_known_uuid => WK_NULL),
);
my %registered;
$_->register foreach values %well_known;
# Refill with namespaces:
{
my %ns = (
wd => NS_WD,
fc => NS_FC,
gtin => NS_GTIN,
unicodecp => NS_UNICODE_CP,
);
foreach my $wk (keys %ns) {
$well_known{$wk}->{namespace} //= Data::Identifier->new(ise => $ns{$wk})->register;
}
}
# Refill with sids:
{
my %wk_sids = (
WK_NULL() => 0, # NULL
'ddd60c5c-2934-404f-8f2d-fcb4da88b633' => 1, # also-shares-identifier
WK_UUID() => 2,
'bfae7574-3dae-425d-89b1-9c087c140c23' => 3, # tagname
'7f265548-81dc-4280-9550-1bd0aa4bf748' => 4, # has-type
WK_URI() => 5,
WK_OID() => 6,
# Unassigned: 7
'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1' => 8, # language-tag-identifier
WK_WD() => 9,
'923b43ae-a50e-4db3-8655-ed931d0dd6d4' => 10, # specialises
'eacbf914-52cf-4192-a42c-8ecd27c85ee1' => 11, # unicode-string
'928d02b0-7143-4ec9-b5ac-9554f02d3fb1' => 12, # integer
'dea3782c-6bcb-4ce9-8a39-f8dab399d75d' => 13, # unsigned-integer
# Unassigned: 14, 15
'6ba648c2-3657-47c2-8541-9b73c3a9b2b4' => 16, # default-context
'52a516d0-25d8-47c7-a6ba-80983e576c54' => 17, # proto-file
'1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d' => 18, # final-file-size
'6085f87e-4797-4bb2-b23d-85ff7edc1da0' => 19, # text-fragment
'4c9656eb-c130-42b7-9348-a1fee3f42050' => 20, # also-list-contains-also
'298ef373-9731-491d-824d-b2836250e865' => 21, # proto-message
'7be4d8c7-6a75-44cc-94f7-c87433307b26' => 22, # proto-entity
'65bb36f2-b558-48af-8512-bca9150cca85' => 23, # proxy-type
'a1c478b5-0a85-4b5b-96da-d250db14a67c' => 24, # flagged-as
'59cfe520-ba32-48cc-b654-74f7a05779db' => 25, # marked-as
'2bffc55d-7380-454e-bd53-c5acd525d692' => 26, # roaraudio-error-number
WK_SID() => 27,
'd2750351-aed7-4ade-aa80-c32436cc6030' => 28, # also-has-role
'11d8962c-0a71-4d00-95ed-fa69182788a8' => 29, # also-has-comment
'30710bdb-6418-42fb-96db-2278f3bfa17f' => 30, # also-has-description
# Unassigned: 31
'448c50a8-c847-4bc7-856e-0db5fea8f23b' => 32, # final-file-encoding
'79385945-0963-44aa-880a-bca4a42e9002' => 33, # final-file-hash
'3fde5688-6e34-45e9-8f33-68f079b152c8' => 34, # SEEK_SET
'bc598c52-642e-465b-b079-e9253cd6f190' => 35, # SEEK_CUR
'06aff30f-70e8-48b4-8b20-9194d22fc460' => 36, # SEEK_END
'59a5691a-6a19-4051-bc26-8db82c019df3' => 37, # inode
WK_CHAT0W() => 112, # chat-0-word-identifier
WK_SNI() => 113, # sirtx-numerical-identifier
WK_GTIN() => 160,
);
foreach my $ise (keys %wk_sids) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{id_cache} //= {};
$identifier->{id_cache}->{WK_SID()} //= $wk_sids{$ise};
$identifier->register; # re-register
}
}
# Refill with snis:
{
my %wk_snis = (
WK_NULL() => 0, # NULL
'039e0bb7-5dd3-40ee-a98c-596ff6cce405' => 10, # sirtx-numerical-identifier
'f87a38cb-fd13-4e15-866c-e49901adbec5' => 115, # small-identifier
'2bffc55d-7380-454e-bd53-c5acd525d692' => 116, # roaraudio-error-number
WK_CHAT0W() => 118, # chat-0-word-identifier
WK_UUID() => 119,
WK_OID() => 120,
WK_URI() => 121,
WK_WD() => 123,
);
foreach my $ise (keys %wk_snis) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{id_cache} //= {};
$identifier->{id_cache}->{WK_SNI()} //= $wk_snis{$ise};
$identifier->register; # re-register
}
}
# Update NULL:
{
my $identifier = __PACKAGE__->new(uuid => WK_NULL);
$identifier->{id_cache} //= {};
foreach my $type (WK_HDI, WK_CHAT0W) {
$identifier->{id_cache}->{$type} //= 0;
}
$identifier->register;
}
# Some extra tags such as namespaces:
foreach my $ise (NS_WD, NS_INT, NS_DATE) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->register; # re-register
}
# Refill with tagnames
{
my %tagnames = (
WK_NULL() => 'null',
WK_UUID() => 'uuid',
WK_OID() => 'oid',
WK_URI() => 'uri',
WK_SID() => 'small-identifier',
WK_WD() => 'wikidata-identifier',
WK_GTIN() => 'gtin',
WK_IBAN() => 'iban',
WK_BIC() => 'bic',
WK_DOI() => 'doi',
WK_FC() => 'factgrid-identifier',
WK_UNICODE_CP() => 'unicode-code-point',
WK_SNI() => 'sirtx-numerical-identifier',
WK_HDI() => 'host-defined-identifier',
WK_UDI() => 'user-defined-identifier',
WK_CHAT0W() => 'chat-0-word-identifier',
NS_WD() => 'Wikidata-namespace',
NS_FC() => 'factgrid-namespace',
NS_INT() => 'integer-namespace',
NS_DATE() => 'gregorian-date-namespace',
NS_UNICODE_CP() => 'unicode-character-namespace',
'ddd60c5c-2934-404f-8f2d-fcb4da88b633' => 'also-shares-identifier',
'bfae7574-3dae-425d-89b1-9c087c140c23' => 'tagname',
'7f265548-81dc-4280-9550-1bd0aa4bf748' => 'has-type',
'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1' => 'language-tag-identifier',
'923b43ae-a50e-4db3-8655-ed931d0dd6d4' => 'specialises',
'eacbf914-52cf-4192-a42c-8ecd27c85ee1' => 'unicode-string',
'928d02b0-7143-4ec9-b5ac-9554f02d3fb1' => 'integer',
'dea3782c-6bcb-4ce9-8a39-f8dab399d75d' => 'unsigned-integer',
'6ba648c2-3657-47c2-8541-9b73c3a9b2b4' => 'default-context',
'52a516d0-25d8-47c7-a6ba-80983e576c54' => 'proto-file',
'1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d' => 'final-file-size',
'6085f87e-4797-4bb2-b23d-85ff7edc1da0' => 'text-fragment',
'4c9656eb-c130-42b7-9348-a1fee3f42050' => 'also-list-contains-also',
'298ef373-9731-491d-824d-b2836250e865' => 'proto-message',
'7be4d8c7-6a75-44cc-94f7-c87433307b26' => 'proto-entity',
'65bb36f2-b558-48af-8512-bca9150cca85' => 'proxy-type',
'a1c478b5-0a85-4b5b-96da-d250db14a67c' => 'flagged-as',
'59cfe520-ba32-48cc-b654-74f7a05779db' => 'marked-as',
'2bffc55d-7380-454e-bd53-c5acd525d692' => 'roaraudio-error-number',
'd2750351-aed7-4ade-aa80-c32436cc6030' => 'also-has-role',
'11d8962c-0a71-4d00-95ed-fa69182788a8' => 'also-has-comment',
'30710bdb-6418-42fb-96db-2278f3bfa17f' => 'also-has-description',
'448c50a8-c847-4bc7-856e-0db5fea8f23b' => 'final-file-encoding',
'79385945-0963-44aa-880a-bca4a42e9002' => 'final-file-hash',
'3fde5688-6e34-45e9-8f33-68f079b152c8' => 'SEEK_SET',
'bc598c52-642e-465b-b079-e9253cd6f190' => 'SEEK_CUR',
'06aff30f-70e8-48b4-8b20-9194d22fc460' => 'SEEK_END',
'59a5691a-6a19-4051-bc26-8db82c019df3' => 'inode',
'53863a15-68d4-448d-bd69-a9b19289a191' => 'unsigned-integer-generator',
'e8aa9e01-8d37-4b4b-8899-42ca0a2a906f' => 'signed-integer-generator',
'd74f8c35-bcb8-465c-9a77-01010e8ed25c' => 'unicode-character-generator',
'55febcc4-6655-4397-ae3d-2353b5856b34' => 'rgb-colour-generator',
'97b7f241-e1c5-4f02-ae3c-8e31e501e1dc' => 'date-generator',
'19659233-0a22-412c-bdf1-8ee9f8fc4086' => 'multiplicity-generator',
'5ec197c3-1406-467c-96c7-4b1a6ec2c5c9' => 'minimum-multiplicity-generator',
);
foreach my $ise (keys %tagnames) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{tagname} //= [$tagnames{$ise}];
$identifier->register; # re-register
}
}
{
# ISE -> namespace
my %namespaces_uint = (
'4a7fc2e2-854b-42ec-b24f-c7fece371865' => 'ac59062c-6ba2-44de-9f54-09e28f2c0b5c', # e621-post-identifier: e621-post-namespace
'a0a4fae2-be6f-4a51-8326-6110ba845a16' => '69b7ff38-ca78-43a8-b9ea-66cb02312eef', # e621-pool-identifier: e621-pool-namespace
'6e3590b6-2a0c-4850-a71f-8ba196a52280' => 'b96e5d94-0767-40fa-9864-5977eb507ae0', # danbooru2chanjp-post-identifier: danbooru2chanjp-post-namespace
);
my %namespaces_sint = (
'2bffc55d-7380-454e-bd53-c5acd525d692' => '744eaf4e-ae93-44d8-9ab5-744105222da6', # roaraudio-error-number: roaraudio-error-namespace
);
my %namespaces_simple_tag = (
'6fe0dbf0-624b-48b3-b558-0394c14bad6a' => '3623de4d-0dd4-4236-946a-2613467d50f1', # e621tag: e621tag-namespace
'c5632c60-5da2-41af-8b60-75810b622756' => '93f2c36b-8cb6-4f2c-924b-98188f224235', # danbooru2chanjp-tag: danbooru2chanjp-tag-namespace
);
foreach my $ise (keys %namespaces_uint) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{namespace} //= __PACKAGE__->new(ise => $namespaces_uint{$ise});
$identifier->{validate} //= RE_UINT;
$identifier->{generate} //= 'id-based';
$identifier->register; # re-register
}
foreach my $ise (keys %namespaces_sint) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{namespace} //= __PACKAGE__->new(ise => $namespaces_sint{$ise});
$identifier->{validate} //= RE_SINT;
$identifier->{generate} //= 'id-based';
$identifier->register; # re-register
}
foreach my $ise (keys %namespaces_simple_tag) {
my $identifier = __PACKAGE__->new(ise => $ise);
$identifier->{namespace} //= __PACKAGE__->new(ise => $namespaces_simple_tag{$ise});
$identifier->{validate} //= RE_SIMPLE_TAG;
$identifier->{generate} //= 'id-based';
$identifier->register; # re-register
}
# validate => RE_QID, namespace => NS_FC, generate => 'id-based'
}
# Call this after after we loaded all our stuff and before anyone else will register stuff:
__PACKAGE__->_known_provider('wellknown');
sub new {
( run in 0.883 second using v1.01-cache-2.11-cpan-13bb782fe5a )