Attean

 view release on metacpan or  search on metacpan

lib/AtteanX/Parser/SPARQLLex.pm  view on Meta::CPAN

	
		if ($self->buffer =~ /^$r_PNAME_LN/o) {
			my $ln	= $self->read_length($+[0]);
			my ($ns, $local)	= ($ln =~ /^([^:]*:)(.*)$/);
			no warnings 'uninitialized';
			$local	=~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g;
			return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns, $local);
		} elsif ($self->buffer =~ $r_PNAME_NS) {
			my $ns	= $self->read_length($+[0]);
			return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns);
		} else {
			$self->_throw_error("Expected PNAME");
		}
	}

	sub _get_variable {
		my $self	= shift;
		if (substr($self->buffer, 0, 1) eq '$') {
			$self->get_char_safe('$');
			if ($self->buffer =~ /^$r_VARNAME/) {
				my $name	= $self->read_length($+[0]);
				return $self->new_token(VAR, $self->start_line, $self->start_column, $name);
			} else {
				$self->_throw_error("Invalid variable name");
			}
		} else {
			$self->get_char_safe('?');
			if ($self->buffer =~ /^$r_VARNAME/) {
				my $name	= $self->read_length($+[0]);
				return $self->new_token(VAR, $self->start_line, $self->start_column, $name);
			} else {
				return $self->new_token(QUESTION, $self->start_line, $self->start_column, '?');
			}
		}
	}
	
	sub _get_iriref_or_relational {
		my $self	= shift;
		my $buffer	= $self->buffer;
		if ($buffer =~ m/^<([^<>"{}|^`\x00-\x20])*>/) {
			$self->get_char_safe(q[<]);
			if ($self->buffer =~ m/^[\x23-\x3d\x3f-\x5a\x5d-\x7e]*>/o) {
				my $iri	.= $self->read_length($+[0]);
				chop($iri);
				return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
			}
	
			my $iri	= '';
			while (1) {
				if (length($self->buffer) == 0) {
					my $c	= $self->peek_char;
					last unless defined($c);
				}
				if (substr($self->buffer, 0, 1) eq '\\') {
					$self->get_char_safe('\\');
					my $esc	= $self->get_char;
					if ($esc eq '\\') {
						$iri	.= "\\";
					} elsif ($esc eq 'U') {
						my $codepoint	= $self->read_length(8);
						$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
						$iri .= chr(hex($codepoint));
					} elsif ($esc eq 'u') {
						my $codepoint	= $self->read_length(4);
						$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
						my $char	= chr(hex($codepoint));
						if ($char =~ /[<>" {}|\\^`]/o) {
							$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char)));
						}
						$iri .= $char;
					} else {
						$self->_throw_error("Unrecognized iri escape '$esc'");
					}
				} elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) {
					$iri	.= $self->read_length($+[0]);
				} elsif (substr($self->buffer, 0, 1) eq '>') {
					last;
				} else {
					my $c	= $self->peek_char;
					$self->_throw_error("Got '$c' while expecting IRI character");
				}
			}
			$self->get_char_safe(q[>]);
			return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
		} elsif (substr($buffer, 0, 2) eq '<=') {
			$self->read_length(2);
			return $self->new_token(LE, $self->start_line, $self->start_column, '<=');
		} elsif (substr($buffer, 0, 2) eq '>=') {
			$self->read_length(2);
			return $self->new_token(GE, $self->start_line, $self->start_column, '>=');
		} elsif (substr($buffer, 0, 2) eq '<<') {
			$self->read_length(2);
			return $self->new_token(LTLT, $self->start_line, $self->start_column, '<<');
		} elsif (substr($buffer, 0, 2) eq '>>') {
			$self->read_length(2);
			return $self->new_token(GTGT, $self->start_line, $self->start_column, '>>');
		} elsif (substr($buffer, 0, 1) eq '>') {
			$self->get_char;
			return $self->new_token(GT, $self->start_line, $self->start_column, '>');
		} elsif (substr($buffer, 0, 1) eq '<') {
			$self->get_char;
			return $self->new_token(LT, $self->start_line, $self->start_column, '<');
		} else {
			die "Unrecognized relational op near '$buffer'";
		}
	}

	sub _get_bang {
		my $self	= shift;
		if ($self->buffer =~ /^!=/) {
			$self->read_length(2);
			return $self->new_token(NOTEQUALS, $self->start_line, $self->start_column, '!=');
		} else {
			$self->get_char;
			return $self->new_token(BANG, $self->start_line, $self->start_column, '!');
		}
	}
	
	sub _get_bnode {
		my $self	= shift;
		unless ($self->buffer =~ /^$r_BLANK_NODE_LABEL/o) {
			$self->_throw_error("Expected: name");
		}
		my $ln	= $self->read_length($+[0]);
		my $name	= substr($ln, 2);

lib/AtteanX/Parser/SPARQLLex.pm  view on Meta::CPAN

		
			my $quote_count	= 0;
			my $string	= '';
			while (1) {
				if (length($self->buffer) == 0) {
					$self->fill_buffer;
					$self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0);
				}
				if (substr($self->buffer, 0, 1) eq "'") {
					my $c	= $self->get_char;
					$quote_count++;
					last if ($quote_count == 3);
				} else {
					if ($quote_count) {
						$string	.= "'" foreach (1..$quote_count);
						$quote_count	= 0;
					}
					if (substr($self->buffer, 0, 1) eq '\\') {
						$string	.= $self->_get_escaped_char();
					} else {
						$self->buffer	=~ /^[^'\\]+/;
						$string	.= $self->read_length($+[0]);
					}
				}
			}
			return $self->new_token(STRING3S, $self->start_line, $self->start_column, $string);
		} else {
			### #x22 scharacter* #x22
			my $string	= '';
			while (1) {
				if (substr($self->buffer, 0, 1) eq '\\') {
					$string	.= $self->_get_escaped_char();
				} elsif ($self->buffer =~ /^[^'\\]+/o) {
					$string	.= $self->read_length($+[0]);
				} elsif (substr($self->buffer, 0, 1) eq "'") {
					last;
				} else {
					my $c		= $self->peek_char();
					$self->_throw_error("Got '$c' while expecting string character");
				}
			}
			$self->get_char_safe(q[']);
			return $self->new_token(STRING1S, $self->start_line, $self->start_column, $string);
		}
	}

	sub _get_escaped_char {
		my $self	= shift;
		my $c	= $self->peek_char;
		$self->get_char_safe('\\');
		my $esc	= $self->get_char;
		if ($esc eq '\\') { return "\\" }
		elsif ($esc =~ /^['">]$/) { return $esc }
		elsif ($esc eq 'r') { return "\r" }
		elsif ($esc eq 't') { return "\t" }
		elsif ($esc eq 'n') { return "\n" }
		elsif ($esc eq 'b') { return "\b" }
		elsif ($esc eq 'f') { return "\f" }
		elsif ($esc eq 'U') {
			my $codepoint	= $self->read_length(8);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		} elsif ($esc eq 'u'){
			my $codepoint	= $self->read_length(4);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		}
		$self->_throw_error("Unrecognized string escape '$esc'");
	}
	
	sub _throw_error {
		my $self	= shift;
		my $error	= shift;
		my $line	= $self->line;
		my $col		= $self->column;
		use Data::Dumper;
		Carp::confess "$error at $line:$col with buffer: " . Dumper($self->buffer);
	}
}

1;

__END__

=back

=head1 BUGS

Please report any bugs or feature requests to through the GitHub web interface
at L<https://github.com/kasei/perlrdf/issues>.

=head1 AUTHOR

Gregory Todd Williams  C<< <gwilliams@cpan.org> >>

=head1 COPYRIGHT

Copyright (c) 2014--2022 Gregory Todd Williams. This
program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=cut



( run in 1.189 second using v1.01-cache-2.11-cpan-d7f47b0818f )