unicode results from the CPAN

unicode

Attean

view release on metacpan or search on metacpan

 - (Update) Improved test suite (includes #92 from KjetilK, #53).
 - (Update) Removed AtteanX::RDFQueryTranslator (split into a new package)
   and all other references to RDF::Query.
 - (Update) Removed default implementation of Attean::API::Term->ebv (now
   required of consumers).
 - (Update) Serialize SPARQL and Turtle namespace declarations in a stable
   order.
 - (Update) Updated Attean::API::AbbreviatingParser->base definition to be
   a consumer of Attean::API::IRI.
 - (Update) Updated Attean::API::SPARQLSerializable->as_sparql to return a
   unicode string, not bytes.

0.015  2016-04-09

 - (Update) Fixed metadata used to generate README files.

0.014  2016-04-09

 - (Addition) Add a size estimate attribute to Attean::Plan::Iterator (#90
   from KjetilK).
 - (Addition) Added Attean::Plan::Iterator for cases where there is too

Changes view on Meta::CPAN

 - (Update) Renamed Attean::API::Planner to Attean::API::QueryPlanner and
   re-organized planning code.
 - (Update) Update Changes metadata handling to use
   Module::Instal::DOAPChangeSets (Github issue #25).
 - (Update) Updated Attean::Algebra::Join to be n-ary, not binary.
 - (Update) Updated attean_query to use the native SPARQL parser.

0.009  2015-11-04

 - (Addition) Added Attean::API::Result->shared_domain method.
 - (Update) Improve handling on unicode data in SPARQL TSV parser.
 - (Update) Improve query planner and plan implementations to support
   SPARQL 1.1 test suite.
 - (Update) Removed HeapSort plan implementation and use of Array::Heap due
   to packaging concerns (issue #32).

0.008  2015-08-18

 - (Addition) Added Attean::API::Plan::Join role.
 - (Addition) Added apply_triple and apply_quad methods to triple and quad
   pattern classes to produce Result objects.

lib/AtteanX/Parser/SPARQLLex.pm view on Meta::CPAN

					my $c	= $self->peek_char;
					last unless defined($c);
				}
				if (substr($self->buffer, 0, 1) eq '\\') {
					$self->get_char_safe('\\');
					my $esc	= $self->get_char;
					if ($esc eq '\\') {
						$iri	.= "\\";
					} elsif ($esc eq 'U') {
						my $codepoint	= $self->read_length(8);
						$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
						$iri .= chr(hex($codepoint));
					} elsif ($esc eq 'u') {
						my $codepoint	= $self->read_length(4);
						$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
						my $char	= chr(hex($codepoint));
						if ($char =~ /[<>" {}|\\^`]/o) {
							$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char)));
						}
						$iri .= $char;
					} else {
						$self->_throw_error("Unrecognized iri escape '$esc'");
					}
				} elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) {
					$iri	.= $self->read_length($+[0]);

lib/AtteanX/Parser/SPARQLLex.pm view on Meta::CPAN

		my $esc	= $self->get_char;
		if ($esc eq '\\') { return "\\" }
		elsif ($esc =~ /^['">]$/) { return $esc }
		elsif ($esc eq 'r') { return "\r" }
		elsif ($esc eq 't') { return "\t" }
		elsif ($esc eq 'n') { return "\n" }
		elsif ($esc eq 'b') { return "\b" }
		elsif ($esc eq 'f') { return "\f" }
		elsif ($esc eq 'U') {
			my $codepoint	= $self->read_length(8);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		} elsif ($esc eq 'u'){
			my $codepoint	= $self->read_length(4);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		}
		$self->_throw_error("Unrecognized string escape '$esc'");
	}
	
	sub _throw_error {
		my $self	= shift;
		my $error	= shift;
		my $line	= $self->line;
		my $col		= $self->column;

lib/AtteanX/Parser/Turtle/Lexer.pm view on Meta::CPAN

				my $c	= $self->peek_char;
				last unless defined($c);
			}
			if (substr($self->buffer, 0, 1) eq '\\') {
				$self->get_char_safe('\\');
				my $esc	= $self->get_char;
				if ($esc eq '\\') {
					$iri	.= "\\";
				} elsif ($esc eq 'U') {
					my $codepoint	= $self->read_length(8);
					$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
					$iri .= chr(hex($codepoint));
				} elsif ($esc eq 'u') {
					my $codepoint	= $self->read_length(4);
					$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
					my $char	= chr(hex($codepoint));
					if ($char =~ /[<>" {}|\\^`]/o) {
						$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char)));
					}
					$iri .= $char;
				} else {
					$self->_throw_error("Unrecognized iri escape '$esc'");
				}
			} elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) {
				$iri	.= $self->read_length($+[0]);

lib/AtteanX/Parser/Turtle/Lexer.pm view on Meta::CPAN

		my $esc	= $self->get_char;
		if ($esc eq '\\') { return "\\" }
		elsif ($esc =~ /^['">]$/) { return $esc }
		elsif ($esc eq 'r') { return "\r" }
		elsif ($esc eq 't') { return "\t" }
		elsif ($esc eq 'n') { return "\n" }
		elsif ($esc eq 'b') { return "\b" }
		elsif ($esc eq 'f') { return "\f" }
		elsif ($esc eq 'U') {
			my $codepoint	= $self->read_length(8);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		} elsif ($esc eq 'u'){
			my $codepoint	= $self->read_length(4);
			$self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o);
			return chr(hex($codepoint));
		}
		$self->_throw_error("Unrecognized string escape '$esc'");
	}
	
	sub _get_keyword {
		my $self	= shift;
		$self->get_char_safe('@');
		if ($self->buffer =~ /^base/o) {
			$self->read_word('base');

meta/changes.ttl view on Meta::CPAN

			[ a dcs:Update ; rdfs:label "Changed Attean::TriplePattern->as_quadpattern to delegate to Attean::API::TriplePattern->as_quad_pattern." ],
			[ a dcs:Update ; rdfs:label "Removed default implementation of Attean::API::Term->ebv (now required of consumers)." ],
			[ a dcs:Update ; rdfs:label "Improve Attean::SimpleQueryEvaluator to handle updated algebra classes and iterator API." ],
			[ a dcs:Update ; rdfs:label "Fixed bug in SPARQL parsing of NIL tokens." ],
			[ a dcs:Update ; rdfs:label "Added Test::Attean::StoreCleanup role and added store cleanup to store tests." ],
			[ a dcs:Update ; rdfs:label "Added Test::Attean::QuadStore->cleanup_store method." ],
			[ a dcs:Update ; rdfs:label "Updated Attean::API::AbbreviatingParser->base definition to be a consumer of Attean::API::IRI." ],
			[ a dcs:Update ; rdfs:label "Fix overly aggressive code that attempted to turn IRIs into prefix names during Turtle serialization." ],
			[ a dcs:Update ; rdfs:label "Serialize SPARQL and Turtle namespace declarations in a stable order." ],
			[ a dcs:Update ; rdfs:label "Add serialization of SPARQL PREFIX declarations and prefixnames when namespaces are set (#53)." ],
			[ a dcs:Update ; rdfs:label "Updated Attean::API::SPARQLSerializable->as_sparql to return a unicode string, not bytes." ]
	]
	.


my:v_0-015
	a					:Version ;
	dc:issued			"2016-04-09"^^xsd:date ;
	:revision			"0.015" ;
	dcterms:replaces	my:v_0-014 ;
	dcs:changeset [

meta/changes.ttl view on Meta::CPAN

my:v_0-009
	a					:Version ;
	dc:issued			"2015-11-04"^^xsd:date ;
	:revision			"0.009" ;
	dcterms:replaces	my:v_0-008 ;
	dcs:changeset [
		dcs:item
			[ a dcs:Addition ; rdfs:label "Added Attean::API::Result->shared_domain method." ],
			[ a dcs:Update ; rdfs:label "Improve query planner and plan implementations to support SPARQL 1.1 test suite." ],
			[ a dcs:Update ; rdfs:label "Removed HeapSort plan implementation and use of Array::Heap due to packaging concerns (issue #32)." ],
			[ a dcs:Update ; rdfs:label "Improve handling on unicode data in SPARQL TSV parser." ]
	]
	.


my:v_0-008
	a					:Version ;
	dc:issued			"2015-08-18"^^xsd:date ;
	:revision			"0.008" ;
	dcterms:replaces	my:v_0-007 ;
	dcs:changeset [

t/parser-ntriples.t view on Meta::CPAN

	is( $store->count_quads(blank('a')), 1, 'expected 1 count bff' );
	is( $store->count_quads(iri('a')), 1, 'expected 1 count bff' );
	is( $store->count_quads(iri('b')), 0, 'expected 0 count bff' );
	is( $store->count_quads(undef, iri('b')), 2, 'expected 2 count fbf' );
}

{
	my $ntriples	= qq[_:eve <http://example.com/resum\\u00E9> <http://example.com/resume.html> .\n];
	my @list		= $parser->parse_list_from_bytes($ntriples);
	is( scalar(@list), 1, 'expected model size after ntriples parse' );
	is($list[0]->predicate->value, 'http://example.com/resumÃ©', 'expected 1 count fbf with unicode escaping' );
}

{
	my $ntriples	= qq[_:eve <http://example.com/resum\\u00E9> "Resume" .\n];
	my @list		= $parser->parse_list_from_bytes($ntriples);
	is( scalar(@list), 1, 'expected model size after ntriples parse' );
	is($list[0]->object->value, 'Resume', 'expected 1 count fbf with unicode escaping' );
}

{
	my %got;
	my $handler	= sub {
		my $st	= shift;
		my $o	= $st->object;
		$got{ $o->ntriples_string }++
	};
	my $ntriples	= <<"END";

t/parser-sparql.t view on Meta::CPAN

	my $iter	= $l->parse_iter_from_io($fh);
	
	expect($iter->next, KEYWORD, ['ASK']);
	expect($iter->next, LBRACE, ['{'],);
	expect($iter->next, IRI, ['s'], 'subject');
	expect($iter->next, PREFIXNAME, ['ex:', 'p'], 'predicate');
	expect($iter->next, STRING1D, ['"'], 'double quote');
	expect($iter->next, COMMA, [',']);
	expect($iter->next, STRING1S, ["'"], 'single quote');
	expect($iter->next, COMMA, [',']);
	expect($iter->next, STRING1S, ["ç«æ˜Ÿ"], 'unicode \\u and \\U escapes');
	expect($iter->next, RBRACE, ['}'], 'escaped closing brace');
};

subtest 'custom function' => sub {
	my $sparql	= q[PREFIX ex: <http://example.org/> SELECT * WHERE { ?s ?p ?o FILTER(ex:test(?o)) }];
	open(my $fh, '<:encoding(UTF-8)', \$sparql);
	my $parser	= AtteanX::Parser::SPARQL->new();
	my ($a)		= $parser->parse($sparql);
	my ($f)		= $a->subpatterns_of_type('Attean::Algebra::Filter');
	isa_ok($f, 'Attean::Algebra::Filter');

t/parser-turtle.t view on Meta::CPAN

	my $turtle	= q[<s> ex:p "\\"", '\\'', '\\u706b\\U0000661F' .];
	open(my $fh, '<:encoding(UTF-8)', \$turtle);
	my $l	= AtteanX::Parser::Turtle::Lexer->new($fh);
	
	expect($l->get_token, IRI, ['s'], 'subject');
	expect($l->get_token, PREFIXNAME, ['ex:', 'p'], 'predicate');
	expect($l->get_token, STRING1D, ['"'], 'double quote');
	expect($l->get_token, COMMA, [',']);
	expect($l->get_token, STRING1S, ["'"], 'single quote');
	expect($l->get_token, COMMA, [',']);
	expect($l->get_token, STRING1S, ["ç«æ˜Ÿ"], 'unicode \\u and \\U escapes');
};

subtest 'parse_term_from_bytes' => sub {
	my $parser	= Attean->get_parser('Turtle')->new();
	my $turtle	= '"""hello"""@en';
	my $term	= $parser->parse_term_from_bytes($turtle);
	does_ok($term, 'Attean::API::Literal');
	is($term->value, 'hello');
	is($term->language, 'en');
};

t/term.t view on Meta::CPAN

use v5.14;
use utf8;
use Data::Dumper;
use Test::Modern;
use Type::Tiny::Role;
use Attean::RDF;

my $XSD	= "http://www.w3.org/2001/XMLSchema#";

is(iri('http://example.org/')->ntriples_string, '<http://example.org/>', 'IRI ntriples_string');
is(iri('http://example.org/âœª')->ntriples_string, '<http://example.org/\u272A>', 'unicode IRI ntriples_string');
is(literal("ðŸ¶\\\nâœª")->ntriples_string, qq["ðŸ¶\\\\\\nâœª"], 'unicode literal ntriples_string');
is(literal('Eve')->ntriples_string, '"Eve"', 'literal ntriples_string');
is(langliteral('Eve', 'en')->ntriples_string, '"Eve"@en', 'lang-literal ntriples_string');
is(blank('eve')->ntriples_string, '_:eve', 'blank ntriples_string');

ok(Attean::Literal->integer(1)->ebv, '1 EBV');
ok(not(Attean::Literal->integer(0)->ebv), '0 EBV');
ok(not(literal('')->ebv), '"" EBV');
ok(literal('foo')->ebv, '"foo" EBV');
ok(blank('foo')->ebv, '_:foo EBV');
ok(iri('foo')->ebv, '<foo> EBV');

( run in 1.779 second using v1.01-cache-2.11-cpan-88abd93f124 )