At

 view release on metacpan or  search on metacpan

lib/At/Protocol/URI.pm  view on Meta::CPAN

    sub _parse($uri) {
        my @res = $uri =~ ATP_URI_REGEX();
        @res or return;
        { hash => $res[4] // '', host => $res[1] // '', pathname => $res[2] // '', searchParams => At::Protocol::URI::_query->new( $res[3] // '' ) };
    }

    sub _parseRelative($uri) {
        my @res = $uri =~ RELATIVE_REGEX();
        @res or return;
        { hash => $res[2] // '', pathname => $res[0] // '', searchParams => At::Protocol::URI::_query->new( $res[1] // '' ) };
    }

    sub as_string($s) {
        my $path = $s->pathname // '';
        $path = '/' . $path if $path !~ m[^/];
        my $qs = $s->search;
        $qs = '?' . $qs if length $qs && $qs !~ m[^\?];
        my $hash = $s->hash;
        $hash = '#' . $hash if length $hash && $hash !~ m[^#];
        join '', grep {defined} $s->origin, $path, $qs, $hash;
    }

    sub create ( $handle_r_did, $collection //= (), $rkey //= () ) {
        At::Protocol::URI->new( join '/', grep {defined} $handle_r_did, $collection, $rkey );
    }
    sub protocol ($s)             {'at:'}
    sub origin($s)                { $s->protocol . '//' . $s->host }
    sub host ( $s, $v //= () )    { $v // return $s->{host}; $s->{host} = $v }
    sub pathname( $s, $v //= () ) { $v // return $s->{pathname}; $s->{pathname} = $v }

    sub search ( $s, $v //= () ) {
        $v // return $s->{searchParams};
        $s->{searchParams}->parse_params($v);
    }
    sub hash ( $s, $v //= () ) { $v // return $s->{hash}; $s->{hash} = $v; }

    sub collection ( $s, $v //= () ) {
        return [ grep {length} split '/', $s->pathname ]->[0] || '' unless defined $v;
        my @parts = split '/', $s->pathname;
        $parts[0] = $v;
        $s->pathname( join '/', @parts );
    }

    sub rkey ( $s, $v //= () ) {
        return [ grep {length} split '/', $s->pathname ]->[1] || '' unless defined $v;
        my @parts = split '/', $s->pathname;
        $parts[0] //= 'undefined';
        $parts[1] = $v;
        $s->pathname( join '/', @parts );
    }

    #~ Validation utils from https://github.com/bluesky-social/atproto/blob/main/packages/syntax/src/aturi_validation.ts
    #~  Human-readable constraints on ATURI:
    #~    - following regular URLs, a 8KByte hard total length limit
    #~    - follows ATURI docs on website
    #~       - all ASCII characters, no whitespace. non-ASCII could be URL-encoded
    #~       - starts "at://"
    #~       - "authority" is a valid DID or a valid handle
    #~       - optionally, follow "authority" with "/" and valid NSID as start of path
    #~       - optionally, if NSID given, follow that with "/" and rkey
    #~       - rkey path component can include URL-encoded ("percent encoded"), or:
    #~           ALPHA / DIGIT / "-" / "." / "_" / "~" / ":" / "@" / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
    #~           [a-zA-Z0-9._~:@!$&'\(\)*+,;=-]
    #~       - rkey must have at least one char
    #~       - regardless of path component, a fragment can follow  as "#" and then a JSON pointer (RFC-6901)
    sub ensureValidAtUri($uri) {
        my $fragmentPart;
        my @uriParts = split '#', $uri, -1;    # negative limit, ftw
        throw InvalidAtUriError('ATURI can have at most one "#", separating fragment out') if scalar @uriParts > 2;
        $fragmentPart = $uriParts[1];
        $uri          = $uriParts[0];

        # Check that all chars are boring ASCII
        throw InvalidAtUriError('Disallowed characters in ATURI (ASCII)') unless $uri =~ /^[a-zA-Z0-9._~:@!\$&')(*+,;=%\/-]*$/;
        #
        my @parts = split /\//, $uri, -1;      # negative limit, ftw
        throw InvalidAtUriError('ATURI must start with "at://"') if scalar @parts >= 3 && ( $parts[0] ne 'at:' || length $parts[1] );
        throw InvalidAtUriError('ATURI requires at least method and authority sections') if scalar @parts < 3;
        try {
            if   ( $parts[2] =~ m/^did:/ ) { ensureValidDid( $parts[2] ); }
            else                           { ensureValidHandle( $parts[2] ) }
        }
        catch ($err) {
            throw InvalidAtUriError('ATURI authority must be a valid handle or DID');
        };
        if ( scalar @parts >= 4 ) {
            if ( !length $parts[3] ) {
                throw InvalidAtUriError('ATURI can not have a slash after authority without a path segment');
            }
            try {
                ensureValidNsid( $parts[3] );
            }
            catch ($err) {
                throw InvalidAtUriError('ATURI requires first path segment (if supplied) to be valid NSID')
            }
        }
        if ( scalar @parts >= 5 ) {
            throw InvalidAtUriError('ATURI can not have a slash after collection, unless record key is provided') if !length $parts[4]

            # would validate rkey here, but there are basically no constraints!
        }
        throw InvalidAtUriError('ATURI path can have at most two parts, and no trailing slash') if scalar @parts >= 6;
        throw InvalidAtUriError('ATURI fragment must be non-empty and start with slash')        if scalar @uriParts >= 2 && !defined $fragmentPart;
        if ( defined $fragmentPart ) {
            throw InvalidAtUriError('ATURI fragment must be non-empty and start with slash')
                if length $fragmentPart == 0 || substr( $fragmentPart, 0, 1 ) ne '/';

            # NOTE: enforcing *some* checks here for sanity. Eg, at least no whitespace
            throw InvalidAtUriError( 'Disallowed characters in ATURI fragment (ASCII)' . $fragmentPart )
                if $fragmentPart !~ /^\/[a-zA-Z0-9._~:@!\$&')(*+,;=%[\]\/-]*$/;
        }
        throw InvalidAtUriError('ATURI is far too long') if length $uri > 8 * 1024;
        1;
    }

    sub ensureValidAtUriRegex($uri) {

        #~ simple regex to enforce most constraints via just regex and length.
        my $aturiRegex
            = qr/^at:\/\/(?<authority>[a-zA-Z0-9._:%-]+)(\/(?<collection>[a-zA-Z0-9-.]+)(\/(?<rkey>[a-zA-Z0-9._~:@!\$&%')(*+,;=-]+))?)?(#(?<fragment>\/[a-zA-Z0-9._~:@!\$&%')(*+,;=\-[\]\/\\]*))?$/;
        my ($rm) = $uri =~ $aturiRegex;



( run in 1.152 second using v1.01-cache-2.11-cpan-39bf76dae61 )