RDF-Trine

 view release on metacpan or  search on metacpan

lib/RDF/Trine/Parser/Turtle/Lexer.pm  view on Meta::CPAN

			next;
		}
		elsif ($c =~ /[ \r\n\t]/) {
			while (defined($c) and length($c) and $c =~ /[\t\r\n ]/) {
				$self->_get_char;
				$c		= $self->_peek_char;
			}
			
			# we're ignoring whitespace tokens, but we could return them here instead of falling through to the 'next':
# 			return $self->new_token(WS);
			next;
		}
		elsif ($c =~ /[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/) {
			if ($self->{buffer} =~ /^a(?!:)\s/) {
				$self->_get_char;
				return $self->new_token(A);
			} elsif ($self->{buffer} =~ /^(?:true|false)(?!:)\b/) {
				my $bool	= $self->_read_length($+[0]);
				return $self->new_token(BOOLEAN, $bool);
			} elsif ($self->{buffer} =~ /^BASE(?!:)\b/i) {
				$self->_read_length(4);
				return $self->new_token(SPARQLBASE);
			} elsif ($self->{buffer} =~ /^PREFIX(?!:)\b/i) {
				$self->_read_length(6);
				return $self->new_token(SPARQLPREFIX);
			} else {
				return $self->_get_pname;
			}
		}
		elsif ($c eq '^') { $self->_read_word('^^'); return $self->new_token(HATHAT); }
		else {
# 			Carp::cluck sprintf("Unexpected byte '$c' (0x%02x)", ord($c));
			return $self->_throw_error(sprintf("Unexpected byte '%s' (0x%02x)", $c, ord($c)));
		}
		warn 'byte: ' . Dumper($c);
	}
}

=begin private

=cut

=item C<< fill_buffer >>

Fills the internal parse buffer with a new line from the input source.

=cut

sub fill_buffer {
	my $self	= shift;
	unless (length($self->buffer)) {
		my $line	= $self->file->getline;
		if (defined($line)) {
			$self->{buffer}	.= $line;
		}
	}
}

=item C<< check_for_bom >>

Checks the input buffer for a Unicode BOM, and consumes it if it is present.

=cut

sub check_for_bom {
	my $self	= shift;
	my $c	= $self->_peek_char();
	if (defined($c) and $c eq "\x{FEFF}") {
		$self->_get_char;
	}
}

sub _get_char_safe {
	my $self	= shift;
	my $char	= shift;
	my $c		= $self->_get_char;
	if ($c ne $char) {
		$self->_throw_error("Expected '$char' but got '$c'");
	}
	return $c;
}

sub _get_char_fill_buffer {
	my $self	= shift;
	if (length($self->{buffer}) == 0) {
		$self->fill_buffer;
		if (length($self->{buffer}) == 0) {
			return;
		}
	}
	my $c		= substr($self->{buffer}, 0, 1, '');
	if ($c eq "\n") {
# 		$self->{linebuffer}	= '';
		$self->{line}	= 1+$self->{line};
		$self->{column}	= 1;
	} else {
# 		$self->{linebuffer}	.= $c;
		$self->{column}	= 1+$self->{column};
	}
	return $c;
}

sub _get_char {
	my $self	= shift;
	my $c		= substr($self->{buffer}, 0, 1, '');
	if ($c eq "\n") {
# 		$self->{linebuffer}	= '';
		$self->{line}	= 1+$self->{line};
		$self->{column}	= 1;
	} else {
# 		$self->{linebuffer}	.= $c;
		$self->{column}	= 1+$self->{column};
	}
	return $c;
}

sub _peek_char {
	my $self	= shift;
	if (length($self->{buffer}) == 0) {
		$self->fill_buffer;
		if (length($self->{buffer}) == 0) {



( run in 1.827 second using v1.01-cache-2.11-cpan-d8267643d1d )