Attean
view release on metacpan or search on metacpan
lib/AtteanX/Parser/SPARQLLex.pm view on Meta::CPAN
use v5.14;
use warnings;
=head1 NAME
AtteanX::Parser::SPARQLLex - SPARQL Lexer
=head1 VERSION
This document describes AtteanX::Parser::SPARQLLex version 0.038
=head1 SYNOPSIS
use Attean;
=head1 DESCRIPTION
...
=head1 ATTRIBUTES
=over 4
=item C<< canonical_media_type >>
=item C<< media_types >>
=item C<< file_extensions >>
=item C<< handled_type >>
=item C<< extend >>
=back
=head1 METHODS
=over 4
=cut
package AtteanX::Parser::SPARQLLex 0.038 {
use utf8;
use Moo;
use Attean;
use Encode;
use Encode qw(decode);
use Types::Standard qw(ArrayRef);
use namespace::clean;
sub canonical_media_type { return "application/x-sparql-query-tokens" }
# these pass through to the lexer iterator
has extend => ( is => 'ro', isa => ArrayRef, default => sub { [] } );
sub media_types {
return [qw(application/x-sparql-query-tokens)];
}
sub handled_type {
state $ITEM_TYPE = Type::Tiny::Role->new(role => 'AtteanX::SPARQL::Token');
return $ITEM_TYPE;
}
=item C<< file_extensions >>
Returns a list of file extensions that may be parsed with the parser.
=cut
sub file_extensions { return [qw(rq)] }
with 'Attean::API::PullParser', 'Attean::API::Parser';
=item C<< parse_iter_from_bytes( $data ) >>
Returns an iterator of SPARQL tokens that result from parsing
the SPARQL query/update read from the UTF-8 encoded byte string C<< $data >>.
=cut
sub parse_iter_from_bytes {
my $self = shift;
my $data = shift;
open(my $fh, '<:encoding(UTF-8)', \$data);
return $self->parse_iter_from_io($fh);
}
=item C<< parse_iter_from_io( $fh ) >>
Returns an iterator of SPARQL tokens that result from parsing
the SPARQL query/update read from the L<IO::Handle> object C<< $fh >>.
=cut
sub parse_iter_from_io {
my $self = shift;
my $fh = shift;
return AtteanX::Parser::SPARQLLex::Iterator->new(
extend => $self->extend,
file => $fh,
);
}
}
package AtteanX::Parser::SPARQLLex::Iterator 0.038 {
use utf8;
use Moo;
use Attean;
use Encode;
use Encode qw(decode);
use AtteanX::SPARQL::Token;
use AtteanX::SPARQL::Constants;
use Types::Standard qw(FileHandle Ref Str Int ArrayRef HashRef ConsumerOf InstanceOf);
use namespace::clean;
has lookahead_methods => ( is => 'ro', isa => HashRef, default => sub { +{} } );
has lookahead_tokens => ( is => 'ro', isa => HashRef, default => sub { +{} } );
has extend => ( is => 'ro', isa => ArrayRef, default => sub { [] } );
has token_buffer => ( is => 'ro', isa => ArrayRef, default => sub { [] } );
with 'AtteanX::API::Lexer';
my $r_ECHAR = qr/\\([tbnrf\\"'])/o;
my $r_STRING_LITERAL1 = qr/'(([^\x{27}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*'/o;
my $r_STRING_LITERAL2 = qr/"(([^\x{22}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*"/o;
my $r_STRING_LITERAL_LONG1 = qr/'''(('|'')?([^'\\]|${r_ECHAR}))*'''/o;
my $r_STRING_LITERAL_LONG2 = qr/"""(("|"")?([^"\\]|${r_ECHAR}))*"""/o;
my $r_LANGTAG = qr/@[a-zA-Z]+(-[a-zA-Z0-9]+)*/o;
my $r_IRI_REF = qr/<([^<>"{}|^`\\\x{00}-\x{20}])*>/o;
my $r_PN_CHARS_BASE = qr/([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}...
my $r_PN_CHARS_U = qr/([_]|${r_PN_CHARS_BASE})/o;
my $r_VARNAME = qr/((${r_PN_CHARS_U}|[0-9])(${r_PN_CHARS_U}|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])*)/o;
my $r_VAR1 = qr/[?]${r_VARNAME}/o;
my $r_VAR2 = qr/[\$]${r_VARNAME}/o;
my $r_PN_CHARS = qr/${r_PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]/o;
my $r_PN_PREFIX = qr/(${r_PN_CHARS_BASE}((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o;
my $r_PN_LOCAL_ESCAPED = qr{(\\([-~.!&'()*+,;=/?#@%_\$]))|%[0-9A-Fa-f]{2}}o;
my $r_PN_LOCAL = qr/((${r_PN_CHARS_U}|[:0-9]|${r_PN_LOCAL_ESCAPED})((${r_PN_CHARS}|${r_PN_LOCAL_ESCAPED}|[:.])*(${r_PN_CHARS}|[:]|${r_PN_LOCAL_ESCAPED}))?)/o;
my $r_PN_LOCAL_BNODE = qr/((${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o;
my $r_PNAME_NS = qr/((${r_PN_PREFIX})?:)/o;
my $r_PNAME_LN = qr/(${r_PNAME_NS}${r_PN_LOCAL})/o;
my $r_EXPONENT = qr/[eE][-+]?\d+/o;
my $r_DOUBLE = qr/\d+[.]\d*${r_EXPONENT}|[.]\d+${r_EXPONENT}|\d+${r_EXPONENT}/o;
my $r_DECIMAL = qr/(\d+[.]\d*)|([.]\d+)/o;
my $r_INTEGER = qr/\d+/o;
my $r_BLANK_NODE_LABEL = qr/_:${r_PN_LOCAL_BNODE}/o;
my $r_ANON = qr/\[[\t\r\n ]*\]/o;
my $r_NIL = qr/\([\n\r\t ]*\)/o;
my $r_KEYWORDS = qr/(ABS|ADD|ALL|ASC|ASK|AS|AVG|BASE|BIND|BNODE|BOUND|BY|CEIL|CLEAR|COALESCE|CONCAT|CONSTRUCT|CONTAINS|COPY|COUNT|CREATE|DATATYPE|DAY|DEFAULT|DELETE|DELETE WHERE|DESCRIBE|DESC|DISTINCT|DISTINCT|DROP|ENCODE_FOR_URI|EXISTS|FILTER|FL...
sub BUILD {
my $self = shift;
my %METHOD_TOKEN = (
# q[#] => '_get_comment',
q[@] => '_get_lang',
q[<] => '_get_iriref_or_relational',
q[{] => '_get_brace_or_annotation_or_or',
q[}] => '_get_brace_or_annotation_or_or',
q[|] => '_get_brace_or_annotation_or_or',
q[_] => '_get_bnode',
q['] => '_get_single_literal',
q["] => '_get_double_literal',
q[:] => '_get_pname',
q[?] => '_get_variable',
q[$] => '_get_variable',
q[!] => '_get_bang',
q[>] => '_get_iriref_or_relational',
q([) => '_get_lbracket_or_anon',
q[(] => '_get_lparen_or_nil',
(map {$_ => '_get_number'} (0 .. 9, '-', '+'))
);
while (my ($k,$v) = each(%METHOD_TOKEN)) {
if (length($k) != 1) {
die "Cannot set a lookahead token handler method with lookahead > 1 char";
}
$self->lookahead_methods->{$k} //= $v;
}
my %CHAR_TOKEN = (
( run in 0.624 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )