RDF-Trine
view release on metacpan or search on metacpan
lib/RDF/Trine/Parser/Turtle/Lexer.pm view on Meta::CPAN
next;
}
elsif ($c =~ /[ \r\n\t]/) {
while (defined($c) and length($c) and $c =~ /[\t\r\n ]/) {
$self->_get_char;
$c = $self->_peek_char;
}
# we're ignoring whitespace tokens, but we could return them here instead of falling through to the 'next':
# return $self->new_token(WS);
next;
}
elsif ($c =~ /[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/) {
if ($self->{buffer} =~ /^a(?!:)\s/) {
$self->_get_char;
return $self->new_token(A);
} elsif ($self->{buffer} =~ /^(?:true|false)(?!:)\b/) {
my $bool = $self->_read_length($+[0]);
return $self->new_token(BOOLEAN, $bool);
} elsif ($self->{buffer} =~ /^BASE(?!:)\b/i) {
$self->_read_length(4);
return $self->new_token(SPARQLBASE);
} elsif ($self->{buffer} =~ /^PREFIX(?!:)\b/i) {
$self->_read_length(6);
return $self->new_token(SPARQLPREFIX);
} else {
return $self->_get_pname;
}
}
elsif ($c eq '^') { $self->_read_word('^^'); return $self->new_token(HATHAT); }
else {
# Carp::cluck sprintf("Unexpected byte '$c' (0x%02x)", ord($c));
return $self->_throw_error(sprintf("Unexpected byte '%s' (0x%02x)", $c, ord($c)));
}
warn 'byte: ' . Dumper($c);
}
}
=begin private
=cut
=item C<< fill_buffer >>
Fills the internal parse buffer with a new line from the input source.
=cut
sub fill_buffer {
my $self = shift;
unless (length($self->buffer)) {
my $line = $self->file->getline;
if (defined($line)) {
$self->{buffer} .= $line;
}
}
}
=item C<< check_for_bom >>
Checks the input buffer for a Unicode BOM, and consumes it if it is present.
=cut
sub check_for_bom {
my $self = shift;
my $c = $self->_peek_char();
if (defined($c) and $c eq "\x{FEFF}") {
$self->_get_char;
}
}
sub _get_char_safe {
my $self = shift;
my $char = shift;
my $c = $self->_get_char;
if ($c ne $char) {
$self->_throw_error("Expected '$char' but got '$c'");
}
return $c;
}
sub _get_char_fill_buffer {
my $self = shift;
if (length($self->{buffer}) == 0) {
$self->fill_buffer;
if (length($self->{buffer}) == 0) {
return;
}
}
my $c = substr($self->{buffer}, 0, 1, '');
if ($c eq "\n") {
# $self->{linebuffer} = '';
$self->{line} = 1+$self->{line};
$self->{column} = 1;
} else {
# $self->{linebuffer} .= $c;
$self->{column} = 1+$self->{column};
}
return $c;
}
sub _get_char {
my $self = shift;
my $c = substr($self->{buffer}, 0, 1, '');
if ($c eq "\n") {
# $self->{linebuffer} = '';
$self->{line} = 1+$self->{line};
$self->{column} = 1;
} else {
# $self->{linebuffer} .= $c;
$self->{column} = 1+$self->{column};
}
return $c;
}
sub _peek_char {
my $self = shift;
if (length($self->{buffer}) == 0) {
$self->fill_buffer;
if (length($self->{buffer}) == 0) {
( run in 1.827 second using v1.01-cache-2.11-cpan-d8267643d1d )