Data-Identifier

 view release on metacpan or  search on metacpan

lib/Data/Identifier.pm  view on Meta::CPAN

#!/usr/bin/perl -w

# Copyright (c) 2023-2026 Philipp Schafft

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: format independent identifier object


package Data::Identifier;

use v5.20;
use strict;
use warnings;

use parent qw(Data::Identifier::Interface::Known Data::Identifier::Interface::Userdata);

use Carp;
use Math::BigInt lib => 'GMP';
use URI;

our $VERSION = v0.30;

use constant {
    RE_UUID         => qr/^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}\z/,
    RE_OID          => qr/^[0-2](?:\.(?:0|[1-9][0-9]*))+\z/,
    RE_URI          => qr/^[a-zA-Z][a-zA-Z0-9\+\.\-]+:/,
    RE_UINT         => qr/^(?:0|[1-9][0-9]*)\z/,
    RE_SINT         => qr/^(?:0|-?[1-9][0-9]*)\z/,
    RE_QID          => qr/^[QPL][1-9][0-9]*\z/,
    RE_DOI          => qr/^10\.[1-9][0-9]+(?:\.[0-9]+)*\/./,
    RE_GTIN         => qr/^[0-9]{8}(?:[0-9]{4,6})?\z/,
    RE_UNICODE      => qr/^U\+([0-9A-F]{4,7})\z/,
    RE_SIMPLE_TAG   => qr/^[^\p{upper case}\s]+\z/,
};

use constant {
    WK_NULL         => '00000000-0000-0000-0000-000000000000', # NULL, undef, ...
    WK_UUID         => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', # uuid
    WK_OID          => 'd08dc905-bbf6-4183-b219-67723c3c8374', # oid
    WK_URI          => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', # uri
    WK_SID          => 'f87a38cb-fd13-4e15-866c-e49901adbec5', # small-identifier
    WK_WD           => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', # wikidata-identifier
    WK_GTIN         => '82d529be-0f00-4b4f-a43f-4a22de5f5312', # gtin
    WK_IBAN         => 'b1418262-6bc9-459c-b4b0-a054d77db0ea', # iban
    WK_BIC          => 'c8a3a132-f160-473c-b5f3-26a748f37e62', # bic
    WK_DOI          => '931f155e-5a24-499b-9fbb-ed4efefe27fe', # doi
    WK_FC           => 'd576b9d1-47d4-43ae-b7ec-bbea1fe009ba', # factgrid-identifier
    WK_UNICODE_CP   => '5f167223-cc9c-4b2f-9928-9fe1b253b560', # unicode-code-point
    WK_SNI          => '039e0bb7-5dd3-40ee-a98c-596ff6cce405', # sirtx-numerical-identifier
    WK_HDI          => 'f8eb04ef-3b8a-402c-ad7c-1e6814cb1998', # host-defined-identifier
    WK_UDI          => '05af99f9-4578-4b79-aabe-946d8e6f5888', # user-defined-identifier
    WK_CHAT0W       => '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a', # chat-0-word-identifier

    NS_WD           => '9e10aca7-4a99-43ac-9368-6cbfa43636df', # Wikidata-namespace
    NS_FC           => '6491f7a9-0b29-4ef1-992c-3681cea18182', # factgrid-namespace
    NS_INT          => '5dd8ddbb-13a8-4d6c-9264-36e6dd6f9c99', # integer-namespace
    NS_DATE         => 'fc43fbba-b959-4882-b4c8-90a288b7d416', # gregorian-date-namespace
    NS_GTIN         => 'd95d8b1f-5091-4642-a6b0-a585313915f1', # gtin-namespace
    NS_UNICODE_CP   => '132aa723-a373-48bf-a88d-69f1e00f00cf', # unicode-character-namespace
};

# Features:
my $enabled_oid = 1;

my %uuid_to_uriid_org = (
    WK_UUID()       => 'uuid',
    WK_OID()        => 'oid',
    WK_URI()        => 'uri',
    WK_SID()        => 'sid',
    WK_GTIN()       => 'gtin',
    WK_WD()         => 'wikidata-identifier',
);

my %uuid_org_to_uuid = map {$uuid_to_uriid_org{$_} => $_} keys %uuid_to_uriid_org;

my $well_known_uuid = __PACKAGE__->new(ise => WK_UUID, validate => RE_UUID);

my %well_known = (
    uuid => $well_known_uuid,
    oid  => __PACKAGE__->new($well_known_uuid => WK_OID,    validate => RE_OID),
    uri  => __PACKAGE__->new($well_known_uuid => WK_URI,    validate => RE_URI),
    sid  => __PACKAGE__->new($well_known_uuid => WK_SID,    validate => RE_UINT),
    sni  => __PACKAGE__->new($well_known_uuid => WK_SNI,    validate => RE_UINT),
    wd   => __PACKAGE__->new($well_known_uuid => WK_WD,     validate => RE_QID,  generate => 'id-based'),
    fc   => __PACKAGE__->new($well_known_uuid => WK_FC,     validate => RE_QID,  generate => 'id-based'),
    gtin => __PACKAGE__->new($well_known_uuid => WK_GTIN,   validate => RE_GTIN, generate => 'id-based'),
    iban => __PACKAGE__->new($well_known_uuid => WK_IBAN),
    bic  => __PACKAGE__->new($well_known_uuid => WK_BIC),
    doi  => __PACKAGE__->new($well_known_uuid => WK_DOI,    validate => RE_DOI),

    # Unofficial, not part of public API:
    # Also used by Data::Identifier::Util!
    unicodecp => __PACKAGE__->new($well_known_uuid => WK_UNICODE_CP, validate => RE_UNICODE, generate => 'id-based'),

    hdi  => __PACKAGE__->new($well_known_uuid => WK_HDI, validate => RE_UINT),
    udi  => __PACKAGE__->new($well_known_uuid => WK_UDI, validate => RE_UINT),
    null => __PACKAGE__->new($well_known_uuid => WK_NULL),
);

my %registered;

$_->register foreach values %well_known;

# Refill with namespaces:
{
    my %ns = (
        wd   => NS_WD,
        fc   => NS_FC,
        gtin => NS_GTIN,
        unicodecp => NS_UNICODE_CP,
    );

    foreach my $wk (keys %ns) {
        $well_known{$wk}->{namespace} //= Data::Identifier->new(ise => $ns{$wk})->register;
    }
}

# Refill with sids:
{
    my %wk_sids = (
        WK_NULL()                               =>   0, # NULL
        'ddd60c5c-2934-404f-8f2d-fcb4da88b633'  =>   1, # also-shares-identifier
        WK_UUID()                               =>   2,
        'bfae7574-3dae-425d-89b1-9c087c140c23'  =>   3, # tagname
        '7f265548-81dc-4280-9550-1bd0aa4bf748'  =>   4, # has-type
        WK_URI()                                =>   5,
        WK_OID()                                =>   6,
        # Unassigned: 7
        'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1'  =>   8, # language-tag-identifier
        WK_WD()                                 =>   9,
        '923b43ae-a50e-4db3-8655-ed931d0dd6d4'  =>  10, # specialises
        'eacbf914-52cf-4192-a42c-8ecd27c85ee1'  =>  11, # unicode-string
        '928d02b0-7143-4ec9-b5ac-9554f02d3fb1'  =>  12, # integer
        'dea3782c-6bcb-4ce9-8a39-f8dab399d75d'  =>  13, # unsigned-integer
        # Unassigned: 14, 15
        '6ba648c2-3657-47c2-8541-9b73c3a9b2b4'  =>  16, # default-context
        '52a516d0-25d8-47c7-a6ba-80983e576c54'  =>  17, # proto-file
        '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d'  =>  18, # final-file-size
        '6085f87e-4797-4bb2-b23d-85ff7edc1da0'  =>  19, # text-fragment
        '4c9656eb-c130-42b7-9348-a1fee3f42050'  =>  20, # also-list-contains-also
        '298ef373-9731-491d-824d-b2836250e865'  =>  21, # proto-message
        '7be4d8c7-6a75-44cc-94f7-c87433307b26'  =>  22, # proto-entity
        '65bb36f2-b558-48af-8512-bca9150cca85'  =>  23, # proxy-type
        'a1c478b5-0a85-4b5b-96da-d250db14a67c'  =>  24, # flagged-as
        '59cfe520-ba32-48cc-b654-74f7a05779db'  =>  25, # marked-as
        '2bffc55d-7380-454e-bd53-c5acd525d692'  =>  26, # roaraudio-error-number
        WK_SID()                                =>  27,
        'd2750351-aed7-4ade-aa80-c32436cc6030'  =>  28, # also-has-role
        '11d8962c-0a71-4d00-95ed-fa69182788a8'  =>  29, # also-has-comment
        '30710bdb-6418-42fb-96db-2278f3bfa17f'  =>  30, # also-has-description
        # Unassigned: 31
        '448c50a8-c847-4bc7-856e-0db5fea8f23b'  =>  32, # final-file-encoding
        '79385945-0963-44aa-880a-bca4a42e9002'  =>  33, # final-file-hash
        '3fde5688-6e34-45e9-8f33-68f079b152c8'  =>  34, # SEEK_SET
        'bc598c52-642e-465b-b079-e9253cd6f190'  =>  35, # SEEK_CUR
        '06aff30f-70e8-48b4-8b20-9194d22fc460'  =>  36, # SEEK_END
        '59a5691a-6a19-4051-bc26-8db82c019df3'  =>  37, # inode
        WK_CHAT0W()                             => 112, # chat-0-word-identifier
        WK_SNI()                                => 113, # sirtx-numerical-identifier
        WK_GTIN()                               => 160,
    );

    foreach my $ise (keys %wk_sids) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{id_cache} //= {};
        $identifier->{id_cache}->{WK_SID()} //= $wk_sids{$ise};
        $identifier->register; # re-register
    }
}

# Refill with snis:
{
    my %wk_snis = (
        WK_NULL()                               =>   0, # NULL
        '039e0bb7-5dd3-40ee-a98c-596ff6cce405'  =>  10, # sirtx-numerical-identifier
        'f87a38cb-fd13-4e15-866c-e49901adbec5'  => 115, # small-identifier
        '2bffc55d-7380-454e-bd53-c5acd525d692'  => 116, # roaraudio-error-number
        WK_CHAT0W()                             => 118, # chat-0-word-identifier
        WK_UUID()                               => 119,
        WK_OID()                                => 120,
        WK_URI()                                => 121,
        WK_WD()                                 => 123,
    );

    foreach my $ise (keys %wk_snis) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{id_cache} //= {};
        $identifier->{id_cache}->{WK_SNI()} //= $wk_snis{$ise};
        $identifier->register; # re-register
    }
}

# Update NULL:
{
    my $identifier = __PACKAGE__->new(uuid => WK_NULL);
    $identifier->{id_cache} //= {};
    foreach my $type (WK_HDI, WK_CHAT0W) {
        $identifier->{id_cache}->{$type} //= 0;
    }
    $identifier->register;
}

# Some extra tags such as namespaces:
foreach my $ise (NS_WD, NS_INT, NS_DATE) {
    my $identifier = __PACKAGE__->new(ise => $ise);
    $identifier->register; # re-register
}

# Refill with tagnames
{
    my %tagnames = (
        WK_NULL()                               => 'null',
        WK_UUID()                               => 'uuid',
        WK_OID()                                => 'oid',
        WK_URI()                                => 'uri',
        WK_SID()                                => 'small-identifier',
        WK_WD()                                 => 'wikidata-identifier',
        WK_GTIN()                               => 'gtin',
        WK_IBAN()                               => 'iban',
        WK_BIC()                                => 'bic',
        WK_DOI()                                => 'doi',
        WK_FC()                                 => 'factgrid-identifier',
        WK_UNICODE_CP()                         => 'unicode-code-point',
        WK_SNI()                                => 'sirtx-numerical-identifier',
        WK_HDI()                                => 'host-defined-identifier',
        WK_UDI()                                => 'user-defined-identifier',
        WK_CHAT0W()                             => 'chat-0-word-identifier',
        NS_WD()                                 => 'Wikidata-namespace',
        NS_FC()                                 => 'factgrid-namespace',
        NS_INT()                                => 'integer-namespace',
        NS_DATE()                               => 'gregorian-date-namespace',
        NS_UNICODE_CP()                         => 'unicode-character-namespace',

        'ddd60c5c-2934-404f-8f2d-fcb4da88b633'  => 'also-shares-identifier',
        'bfae7574-3dae-425d-89b1-9c087c140c23'  => 'tagname',
        '7f265548-81dc-4280-9550-1bd0aa4bf748'  => 'has-type',
        'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1'  => 'language-tag-identifier',
        '923b43ae-a50e-4db3-8655-ed931d0dd6d4'  => 'specialises',
        'eacbf914-52cf-4192-a42c-8ecd27c85ee1'  => 'unicode-string',
        '928d02b0-7143-4ec9-b5ac-9554f02d3fb1'  => 'integer',
        'dea3782c-6bcb-4ce9-8a39-f8dab399d75d'  => 'unsigned-integer',
        '6ba648c2-3657-47c2-8541-9b73c3a9b2b4'  => 'default-context',
        '52a516d0-25d8-47c7-a6ba-80983e576c54'  => 'proto-file',
        '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d'  => 'final-file-size',
        '6085f87e-4797-4bb2-b23d-85ff7edc1da0'  => 'text-fragment',
        '4c9656eb-c130-42b7-9348-a1fee3f42050'  => 'also-list-contains-also',
        '298ef373-9731-491d-824d-b2836250e865'  => 'proto-message',
        '7be4d8c7-6a75-44cc-94f7-c87433307b26'  => 'proto-entity',
        '65bb36f2-b558-48af-8512-bca9150cca85'  => 'proxy-type',
        'a1c478b5-0a85-4b5b-96da-d250db14a67c'  => 'flagged-as',
        '59cfe520-ba32-48cc-b654-74f7a05779db'  => 'marked-as',
        '2bffc55d-7380-454e-bd53-c5acd525d692'  => 'roaraudio-error-number',
        'd2750351-aed7-4ade-aa80-c32436cc6030'  => 'also-has-role',
        '11d8962c-0a71-4d00-95ed-fa69182788a8'  => 'also-has-comment',
        '30710bdb-6418-42fb-96db-2278f3bfa17f'  => 'also-has-description',
        '448c50a8-c847-4bc7-856e-0db5fea8f23b'  => 'final-file-encoding',
        '79385945-0963-44aa-880a-bca4a42e9002'  => 'final-file-hash',
        '3fde5688-6e34-45e9-8f33-68f079b152c8'  => 'SEEK_SET',
        'bc598c52-642e-465b-b079-e9253cd6f190'  => 'SEEK_CUR',
        '06aff30f-70e8-48b4-8b20-9194d22fc460'  => 'SEEK_END',
        '59a5691a-6a19-4051-bc26-8db82c019df3'  => 'inode',
        '53863a15-68d4-448d-bd69-a9b19289a191'  => 'unsigned-integer-generator',
        'e8aa9e01-8d37-4b4b-8899-42ca0a2a906f'  => 'signed-integer-generator',
        'd74f8c35-bcb8-465c-9a77-01010e8ed25c'  => 'unicode-character-generator',
        '55febcc4-6655-4397-ae3d-2353b5856b34'  => 'rgb-colour-generator',
        '97b7f241-e1c5-4f02-ae3c-8e31e501e1dc'  => 'date-generator',
        '19659233-0a22-412c-bdf1-8ee9f8fc4086'  => 'multiplicity-generator',
        '5ec197c3-1406-467c-96c7-4b1a6ec2c5c9'  => 'minimum-multiplicity-generator',
    );

    foreach my $ise (keys %tagnames) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{tagname} //= [$tagnames{$ise}];
        $identifier->register; # re-register
    }
}

{
    # ISE -> namespace
    my %namespaces_uint = (
        '4a7fc2e2-854b-42ec-b24f-c7fece371865' => 'ac59062c-6ba2-44de-9f54-09e28f2c0b5c', # e621-post-identifier: e621-post-namespace
        'a0a4fae2-be6f-4a51-8326-6110ba845a16' => '69b7ff38-ca78-43a8-b9ea-66cb02312eef', # e621-pool-identifier: e621-pool-namespace
        '6e3590b6-2a0c-4850-a71f-8ba196a52280' => 'b96e5d94-0767-40fa-9864-5977eb507ae0', # danbooru2chanjp-post-identifier: danbooru2chanjp-post-namespace
    );
    my %namespaces_sint = (
        '2bffc55d-7380-454e-bd53-c5acd525d692' => '744eaf4e-ae93-44d8-9ab5-744105222da6', # roaraudio-error-number: roaraudio-error-namespace
    );
    my %namespaces_simple_tag = (
        '6fe0dbf0-624b-48b3-b558-0394c14bad6a' => '3623de4d-0dd4-4236-946a-2613467d50f1', # e621tag: e621tag-namespace
        'c5632c60-5da2-41af-8b60-75810b622756' => '93f2c36b-8cb6-4f2c-924b-98188f224235', # danbooru2chanjp-tag: danbooru2chanjp-tag-namespace
    );

    foreach my $ise (keys %namespaces_uint) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{namespace}    //= __PACKAGE__->new(ise => $namespaces_uint{$ise});
        $identifier->{validate}     //= RE_UINT;
        $identifier->{generate}     //= 'id-based';
        $identifier->register; # re-register
    }

    foreach my $ise (keys %namespaces_sint) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{namespace}    //= __PACKAGE__->new(ise => $namespaces_sint{$ise});
        $identifier->{validate}     //= RE_SINT;
        $identifier->{generate}     //= 'id-based';
        $identifier->register; # re-register
    }

    foreach my $ise (keys %namespaces_simple_tag) {
        my $identifier = __PACKAGE__->new(ise => $ise);
        $identifier->{namespace}    //= __PACKAGE__->new(ise => $namespaces_simple_tag{$ise});
        $identifier->{validate}     //= RE_SIMPLE_TAG;
        $identifier->{generate}     //= 'id-based';
        $identifier->register; # re-register
    }

    # validate => RE_QID, namespace => NS_FC, generate => 'id-based'
}

# Call this after after we loaded all our stuff and before anyone else will register stuff:
__PACKAGE__->_known_provider('wellknown');


sub new {



( run in 0.883 second using v1.01-cache-2.11-cpan-13bb782fe5a )