Attean
view release on metacpan or search on metacpan
lib/AtteanX/Parser/Turtle/Lexer.pm view on Meta::CPAN
my ($ns, $local) = ($ln =~ /^([^:]*:)(.*)$/);
no warnings 'uninitialized';
$local =~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g;
return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns, $local);
} else {
$self->buffer =~ $r_PNAME_NS;
my $ns = $self->read_length($+[0]);
return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns);
}
}
sub _get_gtgt {
my $self = shift;
$self->read_word('>>');
return $self->new_token(GTGT, $self->start_line, $self->start_column, '>>');
}
sub _get_lbrace_or_lannot {
my $self = shift;
$self->get_char_safe(q[{]);
if ($self->buffer =~ /^\|/o) {
$self->get_char_safe(q[|]);
return $self->new_token(LANNOT, $self->start_line, $self->start_column, '{|');
}
return $self->new_token(LBRACE, $self->start_line, $self->start_column, '{');
}
sub _get_rannot {
my $self = shift;
$self->read_word('|}');
return $self->new_token(RANNOT, $self->start_line, $self->start_column, '|}');
}
sub _get_iriref_or_ltlt {
my $self = shift;
$self->get_char_safe(q[<]);
if ($self->buffer =~ /^</o) {
$self->get_char_safe(q[<]);
return $self->new_token(LTLT, $self->start_line, $self->start_column, '<<');
}
if ($self->buffer =~ m/^[\x23-\x3d\x3f-\x5a\x5d-\x7e]*>/o) {
my $iri .= $self->read_length($+[0]);
chop($iri);
return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
}
my $iri = '';
while (1) {
if (length($self->buffer) == 0) {
my $c = $self->peek_char;
last unless defined($c);
}
if (substr($self->buffer, 0, 1) eq '\\') {
$self->get_char_safe('\\');
my $esc = $self->get_char;
if ($esc eq '\\') {
$iri .= "\\";
} elsif ($esc eq 'U') {
my $codepoint = $self->read_length(8);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
$iri .= chr(hex($codepoint));
} elsif ($esc eq 'u') {
my $codepoint = $self->read_length(4);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
my $char = chr(hex($codepoint));
if ($char =~ /[<>" {}|\\^`]/o) {
$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char)));
}
$iri .= $char;
} else {
$self->_throw_error("Unrecognized iri escape '$esc'");
}
} elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) {
$iri .= $self->read_length($+[0]);
} elsif (substr($self->buffer, 0, 1) eq '>') {
last;
} else {
my $c = $self->peek_char;
$self->_throw_error("Got '$c' while expecting IRI character");
}
}
$self->get_char_safe(q[>]);
return $self->new_token(IRI, $self->start_line, $self->start_column, $iri);
}
sub _get_bnode {
my $self = shift;
$self->read_word('_:');
$self->_throw_error("Expected: name") unless ($self->buffer =~ /^${r_bnode_id}/o);
my $name = substr($self->buffer, 0, $+[0]);
$self->read_word($name);
return $self->new_token(BNODE, $self->start_line, $self->start_column, $name);
}
sub _get_number {
my $self = shift;
if ($self->buffer =~ /^${r_double}/o) {
return $self->new_token(DOUBLE, $self->start_line, $self->start_column, $self->read_length($+[0]));
} elsif ($self->buffer =~ /^${r_decimal}/o) {
return $self->new_token(DECIMAL, $self->start_line, $self->start_column, $self->read_length($+[0]));
} elsif ($self->buffer =~ /^${r_integer}/o) {
return $self->new_token(INTEGER, $self->start_line, $self->start_column, $self->read_length($+[0]));
}
$self->_throw_error("Expected number");
}
sub _get_comment {
my $self = shift;
$self->get_char_safe('#');
my $comment = '';
my $c = $self->peek_char;
while (length($c) and $c !~ /[\r\n]/o) {
$comment .= $self->get_char;
$c = $self->peek_char;
}
if (length($c) and $c =~ /[\r\n]/o) {
$self->get_char;
}
return $self->new_token(COMMENT, $self->start_line, $self->start_column, $comment);
}
sub _get_double_literal {
my $self = shift;
# my $c = $self->peek_char();
lib/AtteanX/Parser/Turtle/Lexer.pm view on Meta::CPAN
my $quote_count = 0;
my $string = '';
while (1) {
if (length($self->buffer) == 0) {
$self->fill_buffer;
$self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0);
}
if (substr($self->buffer, 0, 1) eq "'") {
my $c = $self->get_char;
$quote_count++;
last if ($quote_count == 3);
} else {
if ($quote_count) {
$string .= "'" foreach (1..$quote_count);
$quote_count = 0;
}
if (substr($self->buffer, 0, 1) eq '\\') {
$string .= $self->_get_escaped_char();
} else {
$self->buffer =~ /^[^'\\]+/;
$string .= $self->read_length($+[0]);
}
}
}
return $self->new_token(STRING3S, $self->start_line, $self->start_column, $string);
} else {
### #x22 scharacter* #x22
my $string = '';
while (1) {
$self->fill_buffer unless (length($self->buffer));
if (substr($self->buffer, 0, 1) eq '\\') {
$string .= $self->_get_escaped_char();
} elsif ($self->buffer =~ /^[^'\\]+/o) {
$string .= $self->read_length($+[0]);
} elsif (substr($self->buffer, 0, 1) eq "'") {
last;
} else {
my $c = $self->peek_char();
$self->_throw_error("Got '$c' while expecting string character");
}
}
$self->get_char_safe(q[']);
return $self->new_token(STRING1S, $self->start_line, $self->start_column, $string);
}
}
sub _get_escaped_char {
my $self = shift;
my $c = $self->peek_char;
$self->get_char_safe('\\');
my $esc = $self->get_char;
if ($esc eq '\\') { return "\\" }
elsif ($esc =~ /^['">]$/) { return $esc }
elsif ($esc eq 'r') { return "\r" }
elsif ($esc eq 't') { return "\t" }
elsif ($esc eq 'n') { return "\n" }
elsif ($esc eq 'b') { return "\b" }
elsif ($esc eq 'f') { return "\f" }
elsif ($esc eq 'U') {
my $codepoint = $self->read_length(8);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
return chr(hex($codepoint));
} elsif ($esc eq 'u'){
my $codepoint = $self->read_length(4);
$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
return chr(hex($codepoint));
}
$self->_throw_error("Unrecognized string escape '$esc'");
}
sub _get_keyword {
my $self = shift;
$self->get_char_safe('@');
if ($self->buffer =~ /^base/o) {
$self->read_word('base');
return $self->new_token(TURTLEBASE, $self->start_line, $self->start_column);
} elsif ($self->buffer =~ /^prefix/o) {
$self->read_word('prefix');
return $self->new_token(TURTLEPREFIX, $self->start_line, $self->start_column);
} else {
if ($self->buffer =~ /^[a-zA-Z]+(-[a-zA-Z0-9]+)*\b/o) {
my $lang = $self->read_length($+[0]);
return $self->new_token(LANG, $self->start_line, $self->start_column, $lang);
}
$self->_throw_error("Expected keyword or language tag");
}
}
sub _throw_error {
my $self = shift;
my $error = shift;
my $line = $self->line;
my $col = $self->column;
Carp::confess "$error at $line:$col with buffer: " . Dumper($self->buffer);
}
}
1;
__END__
=end private
=back
=head1 BUGS
Please report any bugs or feature requests to through the GitHub web interface
at L<https://github.com/kasei/perlrdf/issues>.
=head1 AUTHOR
Gregory Todd Williams C<< <gwilliams@cpan.org> >>
=head1 COPYRIGHT
Copyright (c) 2014--2022 Gregory Todd Williams. This
program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
( run in 0.661 second using v1.01-cache-2.11-cpan-39bf76dae61 )