Attean
view release on metacpan or search on metacpan
lib/AtteanX/Parser/SPARQLLex.pm view on Meta::CPAN
if ($self->buffer =~ /^$r_PNAME_LN/o) {
my $ln = $self->read_length($+[0]);
my ($ns, $local) = ($ln =~ /^([^:]*:)(.*)$/);
no warnings 'uninitialized';
$local =~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g;
return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns, $local);
} elsif ($self->buffer =~ $r_PNAME_NS) {
my $ns = $self->read_length($+[0]);
return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns);
} else {
$self->_throw_error("Expected PNAME");
}
}
sub _get_variable {
my $self = shift;
if (substr($self->buffer, 0, 1) eq '$') {
$self->get_char_safe('$');
if ($self->buffer =~ /^$r_VARNAME/) {
my $name = $self->read_length($+[0]);
return $self->new_token(VAR, $self->start_line, $self->start_column, $name);
} else {
$self->_throw_error("Invalid variable name");
}
} else {
$self->get_char_safe('?');
if ($self->buffer =~ /^$r_VARNAME/) {
my $name = $self->read_length($+[0]);
return $self->new_token(VAR, $self->start_line, $self->start_column, $name);
} else {
return $self->new_token(QUESTION, $self->start_line, $self->start_column, '?');
}
}
}
sub _get_iriref_or_relational {
my $self = shift;
my $buffer = $self->buffer;
if ($buffer =~ m/^<([^<>"{}|^`\x00-\x20])*>/) {
$self->get_char_safe(q[<]);
if ($self->buffer =~ m/^[\x23-\x3d\x3f-\x5a\x5d-\x7e]*>/o) {
my $iri .= $self->read_length($+[0]);
chop($iri);
return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
}
my $iri = '';
while (1) {
if (length($self->buffer) == 0) {
my $c = $self->peek_char;
last unless defined($c);
}
if (substr($self->buffer, 0, 1) eq '\\') {
$self->get_char_safe('\\');
my $esc = $self->get_char;
if ($esc eq '\\') {
$iri .= "\\";
} elsif ($esc eq 'U') {
my $codepoint = $self->read_length(8);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
$iri .= chr(hex($codepoint));
} elsif ($esc eq 'u') {
my $codepoint = $self->read_length(4);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
my $char = chr(hex($codepoint));
if ($char =~ /[<>" {}|\\^`]/o) {
$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char)));
}
$iri .= $char;
} else {
$self->_throw_error("Unrecognized iri escape '$esc'");
}
} elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) {
$iri .= $self->read_length($+[0]);
} elsif (substr($self->buffer, 0, 1) eq '>') {
last;
} else {
my $c = $self->peek_char;
$self->_throw_error("Got '$c' while expecting IRI character");
}
}
$self->get_char_safe(q[>]);
return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
} elsif (substr($buffer, 0, 2) eq '<=') {
$self->read_length(2);
return $self->new_token(LE, $self->start_line, $self->start_column, '<=');
} elsif (substr($buffer, 0, 2) eq '>=') {
$self->read_length(2);
return $self->new_token(GE, $self->start_line, $self->start_column, '>=');
} elsif (substr($buffer, 0, 2) eq '<<') {
$self->read_length(2);
return $self->new_token(LTLT, $self->start_line, $self->start_column, '<<');
} elsif (substr($buffer, 0, 2) eq '>>') {
$self->read_length(2);
return $self->new_token(GTGT, $self->start_line, $self->start_column, '>>');
} elsif (substr($buffer, 0, 1) eq '>') {
$self->get_char;
return $self->new_token(GT, $self->start_line, $self->start_column, '>');
} elsif (substr($buffer, 0, 1) eq '<') {
$self->get_char;
return $self->new_token(LT, $self->start_line, $self->start_column, '<');
} else {
die "Unrecognized relational op near '$buffer'";
}
}
sub _get_bang {
my $self = shift;
if ($self->buffer =~ /^!=/) {
$self->read_length(2);
return $self->new_token(NOTEQUALS, $self->start_line, $self->start_column, '!=');
} else {
$self->get_char;
return $self->new_token(BANG, $self->start_line, $self->start_column, '!');
}
}
sub _get_bnode {
my $self = shift;
unless ($self->buffer =~ /^$r_BLANK_NODE_LABEL/o) {
$self->_throw_error("Expected: name");
}
my $ln = $self->read_length($+[0]);
my $name = substr($ln, 2);
lib/AtteanX/Parser/SPARQLLex.pm view on Meta::CPAN
my $quote_count = 0;
my $string = '';
while (1) {
if (length($self->buffer) == 0) {
$self->fill_buffer;
$self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0);
}
if (substr($self->buffer, 0, 1) eq "'") {
my $c = $self->get_char;
$quote_count++;
last if ($quote_count == 3);
} else {
if ($quote_count) {
$string .= "'" foreach (1..$quote_count);
$quote_count = 0;
}
if (substr($self->buffer, 0, 1) eq '\\') {
$string .= $self->_get_escaped_char();
} else {
$self->buffer =~ /^[^'\\]+/;
$string .= $self->read_length($+[0]);
}
}
}
return $self->new_token(STRING3S, $self->start_line, $self->start_column, $string);
} else {
### #x22 scharacter* #x22
my $string = '';
while (1) {
if (substr($self->buffer, 0, 1) eq '\\') {
$string .= $self->_get_escaped_char();
} elsif ($self->buffer =~ /^[^'\\]+/o) {
$string .= $self->read_length($+[0]);
} elsif (substr($self->buffer, 0, 1) eq "'") {
last;
} else {
my $c = $self->peek_char();
$self->_throw_error("Got '$c' while expecting string character");
}
}
$self->get_char_safe(q[']);
return $self->new_token(STRING1S, $self->start_line, $self->start_column, $string);
}
}
sub _get_escaped_char {
my $self = shift;
my $c = $self->peek_char;
$self->get_char_safe('\\');
my $esc = $self->get_char;
if ($esc eq '\\') { return "\\" }
elsif ($esc =~ /^['">]$/) { return $esc }
elsif ($esc eq 'r') { return "\r" }
elsif ($esc eq 't') { return "\t" }
elsif ($esc eq 'n') { return "\n" }
elsif ($esc eq 'b') { return "\b" }
elsif ($esc eq 'f') { return "\f" }
elsif ($esc eq 'U') {
my $codepoint = $self->read_length(8);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
return chr(hex($codepoint));
} elsif ($esc eq 'u'){
my $codepoint = $self->read_length(4);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
return chr(hex($codepoint));
}
$self->_throw_error("Unrecognized string escape '$esc'");
}
sub _throw_error {
my $self = shift;
my $error = shift;
my $line = $self->line;
my $col = $self->column;
use Data::Dumper;
Carp::confess "$error at $line:$col with buffer: " . Dumper($self->buffer);
}
}
1;
__END__
=back
=head1 BUGS
Please report any bugs or feature requests to through the GitHub web interface
at L<https://github.com/kasei/perlrdf/issues>.
=head1 AUTHOR
Gregory Todd Williams C<< <gwilliams@cpan.org> >>
=head1 COPYRIGHT
Copyright (c) 2014--2022 Gregory Todd Williams. This
program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
( run in 1.189 second using v1.01-cache-2.11-cpan-d7f47b0818f )