Attean

 view release on metacpan or  search on metacpan

lib/AtteanX/Parser/SPARQLLex.pm  view on Meta::CPAN

use v5.14;
use warnings;

=head1 NAME

AtteanX::Parser::SPARQLLex - SPARQL Lexer

=head1 VERSION

This document describes AtteanX::Parser::SPARQLLex version 0.038

=head1 SYNOPSIS

 use Attean;

=head1 DESCRIPTION

...

=head1 ATTRIBUTES

=over 4

=item C<< canonical_media_type >>

=item C<< media_types >>

=item C<< file_extensions >>

=item C<< handled_type >>

=item C<< extend >>

=back

=head1 METHODS

=over 4

=cut

package AtteanX::Parser::SPARQLLex 0.038 {
	use utf8;
	use Moo;
	use Attean;
	use Encode;
	use Encode qw(decode);
	use Types::Standard qw(ArrayRef);
	use namespace::clean;

	sub canonical_media_type { return "application/x-sparql-query-tokens" }

	# these pass through to the lexer iterator
	has extend		=> ( is => 'ro', isa => ArrayRef, default => sub { [] } );

	sub media_types {
		return [qw(application/x-sparql-query-tokens)];
	}
	
	sub handled_type {
		state $ITEM_TYPE = Type::Tiny::Role->new(role => 'AtteanX::SPARQL::Token');
		return $ITEM_TYPE;
	}

=item C<< file_extensions >>

Returns a list of file extensions that may be parsed with the parser.

=cut

	sub file_extensions { return [qw(rq)] }

	with 'Attean::API::PullParser', 'Attean::API::Parser';

=item C<< parse_iter_from_bytes( $data ) >>

Returns an iterator of SPARQL tokens that result from parsing
the SPARQL query/update read from the UTF-8 encoded byte string C<< $data >>.

=cut

	sub parse_iter_from_bytes {
		my $self	= shift;
		my $data	= shift;
		open(my $fh, '<:encoding(UTF-8)', \$data);
		return $self->parse_iter_from_io($fh);
	}

=item C<< parse_iter_from_io( $fh ) >>

Returns an iterator of SPARQL tokens that result from parsing
the SPARQL query/update read from the L<IO::Handle> object C<< $fh >>.

=cut

	sub parse_iter_from_io {
		my $self	= shift;
		my $fh		= shift;
		return AtteanX::Parser::SPARQLLex::Iterator->new(
			extend => $self->extend,
			file => $fh,
		);
	}
}

package AtteanX::Parser::SPARQLLex::Iterator 0.038 {
	use utf8;
	use Moo;
	use Attean;
	use Encode;
	use Encode qw(decode);
	use AtteanX::SPARQL::Token;
	use AtteanX::SPARQL::Constants;
	use Types::Standard qw(FileHandle Ref Str Int ArrayRef HashRef ConsumerOf InstanceOf);
	use namespace::clean;
	
	has lookahead_methods		=> ( is => 'ro', isa => HashRef, default => sub { +{} } );
	has lookahead_tokens		=> ( is => 'ro', isa => HashRef, default => sub { +{} } );
	has extend					=> ( is => 'ro', isa => ArrayRef, default => sub { [] } );
	has token_buffer			=> ( is => 'ro', isa => ArrayRef, default => sub { [] } );
	
	with 'AtteanX::API::Lexer';
	
	my $r_ECHAR					= qr/\\([tbnrf\\"'])/o;
	my $r_STRING_LITERAL1		= qr/'(([^\x{27}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*'/o;
	my $r_STRING_LITERAL2		= qr/"(([^\x{22}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*"/o;
	my $r_STRING_LITERAL_LONG1	= qr/'''(('|'')?([^'\\]|${r_ECHAR}))*'''/o;
	my $r_STRING_LITERAL_LONG2	= qr/"""(("|"")?([^"\\]|${r_ECHAR}))*"""/o;
	my $r_LANGTAG				= qr/@[a-zA-Z]+(-[a-zA-Z0-9]+)*/o;
	my $r_IRI_REF				= qr/<([^<>"{}|^`\\\x{00}-\x{20}])*>/o;
	my $r_PN_CHARS_BASE			= qr/([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}...
	my $r_PN_CHARS_U			= qr/([_]|${r_PN_CHARS_BASE})/o;
	my $r_VARNAME				= qr/((${r_PN_CHARS_U}|[0-9])(${r_PN_CHARS_U}|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])*)/o;
	my $r_VAR1					= qr/[?]${r_VARNAME}/o;
	my $r_VAR2					= qr/[\$]${r_VARNAME}/o;
	my $r_PN_CHARS				= qr/${r_PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]/o;
	my $r_PN_PREFIX				= qr/(${r_PN_CHARS_BASE}((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o;
	my $r_PN_LOCAL_ESCAPED		= qr{(\\([-~.!&'()*+,;=/?#@%_\$]))|%[0-9A-Fa-f]{2}}o;
	my $r_PN_LOCAL				= qr/((${r_PN_CHARS_U}|[:0-9]|${r_PN_LOCAL_ESCAPED})((${r_PN_CHARS}|${r_PN_LOCAL_ESCAPED}|[:.])*(${r_PN_CHARS}|[:]|${r_PN_LOCAL_ESCAPED}))?)/o;
	my $r_PN_LOCAL_BNODE		= qr/((${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o;
	my $r_PNAME_NS				= qr/((${r_PN_PREFIX})?:)/o;
	my $r_PNAME_LN				= qr/(${r_PNAME_NS}${r_PN_LOCAL})/o;
	my $r_EXPONENT				= qr/[eE][-+]?\d+/o;
	my $r_DOUBLE				= qr/\d+[.]\d*${r_EXPONENT}|[.]\d+${r_EXPONENT}|\d+${r_EXPONENT}/o;
	my $r_DECIMAL				= qr/(\d+[.]\d*)|([.]\d+)/o;
	my $r_INTEGER				= qr/\d+/o;
	my $r_BLANK_NODE_LABEL		= qr/_:${r_PN_LOCAL_BNODE}/o;
	my $r_ANON					= qr/\[[\t\r\n ]*\]/o;
	my $r_NIL					= qr/\([\n\r\t ]*\)/o;
	my $r_KEYWORDS				= qr/(ABS|ADD|ALL|ASC|ASK|AS|AVG|BASE|BIND|BNODE|BOUND|BY|CEIL|CLEAR|COALESCE|CONCAT|CONSTRUCT|CONTAINS|COPY|COUNT|CREATE|DATATYPE|DAY|DEFAULT|DELETE|DELETE WHERE|DESCRIBE|DESC|DISTINCT|DISTINCT|DROP|ENCODE_FOR_URI|EXISTS|FILTER|FL...

	sub BUILD {
		my $self	= shift;
		my %METHOD_TOKEN	= (
		# 	q[#]	=> '_get_comment',
			q[@]	=> '_get_lang',
			q[<]	=> '_get_iriref_or_relational',
			q[{]	=> '_get_brace_or_annotation_or_or',
			q[}]	=> '_get_brace_or_annotation_or_or',
			q[|]	=> '_get_brace_or_annotation_or_or',
			q[_]	=> '_get_bnode',
			q[']	=> '_get_single_literal',
			q["]	=> '_get_double_literal',
			q[:]	=> '_get_pname',
			q[?]	=> '_get_variable',
			q[$]	=> '_get_variable',
			q[!]	=> '_get_bang',
			q[>]	=> '_get_iriref_or_relational',
			q([)	=> '_get_lbracket_or_anon',
			q[(]	=> '_get_lparen_or_nil',
			(map {$_ => '_get_number'} (0 .. 9, '-', '+'))
		);
		while (my ($k,$v) = each(%METHOD_TOKEN)) {
			if (length($k) != 1) {
				die "Cannot set a lookahead token handler method with lookahead > 1 char";
			}
			$self->lookahead_methods->{$k}	//= $v;
		}

		my %CHAR_TOKEN	= (



( run in 0.624 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )