RDFStore
view release on metacpan or search on metacpan
lib/RDFStore/Parser/SiRPAC.pm view on Meta::CPAN
my $file = shift;
if( (defined $file) && ($file ne '') ) {
my $ret;
my @ret=();
my $file_uri;
my $scheme;
$scheme='file:'
if( (-e $file) || (!($file =~ /^\w+:/)) );
$file_uri= URI->new(((defined $scheme) ? $scheme : '' ).$file);
if ( (defined $file_uri) && (defined $file_uri->scheme) &&
($file_uri->scheme ne 'file') ) {
my $content = $class->wget($file_uri);
if(defined $content) {
if (wantarray) {
eval {
@ret = $class->parsestring($content, $file_uri,@_);
};
} else {
eval {
$ret = $class->parsestring($content, $file_uri,@_);
};
};
my $err = $@;
croak $err
if $err;
} else {
croak "Cannot fetch '$file_uri'";
};
} else {
my $filename= $file_uri->file;
# FIXME: it might be wrong in some cases
local(*FILE);
open(FILE, $filename)
or croak "Couldn't open $filename:\n$!";
binmode(FILE);
if (wantarray) {
eval {
@ret = $class->parse(*FILE,$file_uri,@_);
};
} else {
eval {
$ret = $class->parse(*FILE,$file_uri,@_);
};
};
my $err = $@;
close(FILE);
croak $err
if $err;
};
return unless defined wantarray;
return wantarray ? @ret : $ret;
};
};
sub getAttributeValue {
my ($expat,$attlist, $elName) = @_;
#print STDERR "getAttributeValue(@_): ".(caller)[2]."\n";
return
if( (ref($attlist) =~ /ARRAY/) && (!@{$attlist}) );
my $n;
for($n=0; $n<=$#{$attlist}; $n+=2) {
my $attname;
if(ref($attlist->[$n]) =~ /ARRAY/) {
#$attname = $attlist->[$n]->[0].$attlist->[$n]->[1];
$attname = $attlist->[$n]->[0];
$attname .= $attlist->[$n]->[1]
if(defined $attlist->[$n]->[1]);
} else {
$attname = $attlist->[$n];
};
return $attlist->[$n+1]
if ($attname eq $elName);
};
return;
}
sub RDFXML_StartElementHandler {
my $expat = shift;
my $tag = shift;
my @attlist = @_;
my @rdf_attlist;
my $xml_tag = $tag; # save it for later
my $sNamespace = $expat->namespace($tag);
my $parseLiteral = (($expat->{SiRPAC}->{scanMode} ne 'SKIPPING') && (parseLiteral($expat)));
if(not(defined $sNamespace)) {
my ($prefix,$suffix) = split(':',$tag);
if($prefix eq $RDFStore::Parser::SiRPAC::XMLSCHEMA_prefix) {
$sNamespace = $RDFStore::Parser::SiRPAC::XMLSCHEMA;
$tag = $expat->generate_ns_name($suffix,$sNamespace);
} else {
if( (defined $prefix) && (defined $suffix) ) {
die rdfcroak($expat,"Unresolved namespace prefix '$prefix' for '$suffix'");
} else {
unless($parseLiteral) {
my $msg = rdfwarn($expat,"Using node element '$tag' without a namespace is forbidden.");
push @{ $expat->{warnings} },$msg;
warn $msg;
return;
};
};
};
};
my $newElement;
my $setScanModeElement = 0;
if($expat->{SiRPAC}->{scanMode} eq 'SKIPPING') {
if( $sNamespace.$tag eq $RDFStore::Parser::SiRPAC::RDFMS_RDF ) {
$expat->{SiRPAC}->{scanMode} = 'RDF';
$setScanModeElement = 1;
lib/RDFStore/Parser/SiRPAC.pm view on Meta::CPAN
# Place all characters as Data instance to the containment
# hierarchy with the help of the stack.
my $e = $expat->{SiRPAC}->{elementStack}->[$#{$expat->{SiRPAC}->{elementStack}}];
# Determine whether the previous event was for
# characters. If so, update the Data node contents.
# A&B would otherwise result in three
# separate Data nodes in the parse tree
my $bHasData = 0;
my $dN;
my $dataNode;
foreach $dN (@{$e->{children}}) {
if($dN->isa('RDFStore::Parser::SiRPAC::DataElement')) {
$bHasData = 1;
$dataNode=$dN;
last;
};
};
if(!$bHasData) {
push @{$e->{children}},RDFStore::Parser::SiRPAC::DataElement->new($text, 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'},$expat->{SiRPAC}->{'rdfstore:context'});
} else {
$dataNode->{sContent} .= $text;
#not nice to see it here.....I know ;-)
$dataNode->{tag} = "[DATA: " . $dataNode->{sContent} . "]";
};
};
sub processXML {
my ($expat,$ele) = @_;
if($ele->name() eq $RDFStore::Parser::SiRPAC::RDFMS_RDF) {
my $c;
foreach $c (@{$ele->{children}}) {
if($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Description) {
processDescription($expat,$c,0,
$expat->{SiRPAC}->{bCreateBags}, $expat->{SiRPAC}->{bCreateBags});
} elsif( ($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) ) {
processContainer($expat,$c);
#strange checking here....
} elsif( (!($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_resource)) &&
(!($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_nodeID)) &&
(length($c->name())>0) ) {
processTypedNode($expat,$c);
};
};
} elsif($ele->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Description) {
processDescription($expat,$ele,0,
$expat->{SiRPAC}->{bCreateBags}, $expat->{SiRPAC}->{bCreateBags});
} else {
processTypedNode($expat,$ele);
};
};
sub processDescription {
my ($expat,$ele,$inPredicate,$reify,$createBag) = @_;
#print STDERR "processDescription($expat,".$ele->name.",$inPredicate,$reify,$createBag)",((caller)[2]),"\n";
# Return immediately if the description has already been managed
return $ele->{sID}
if($ele->{bDone});
my $iChildCount=1;
my $bOnce=1;
# Determine first all relevant values
my ($sID,$sBagid,$sAbout,$sAboutEach) = (
$ele->{sID},
$ele->{sBagID},
$ele->{sAbout},
$ele->{sAboutEach} );
my $target = (defined $ele->{vTargets}->[0]) ? $ele->{vTargets}->[0] : undef;
my $targetIsContainer=0;
my $sTargetAbout='';
my $sTargetBagid='';
my $sTargetID='';
# Determine what the target of the Description reference is
if (defined $target) {
my $sTargetAbout = $target->{sAbout};
my $sTargetID = $target->{sID};
my $sTargetBagid = $target->{sBagID};
# Target is collection if
# 1. it is identified with bagID attribute
# 2. it is identified with ID attribute and is a collection
if ( ((defined $sTargetBagid) && ($sTargetBagid ne '')) &&
((defined $sAbout) && ($sAbout ne '')) ) {
# skip '#' sign??
$targetIsContainer = ($sAbout =~ /^.$sTargetBagid/);
} else {
if ( ((defined $sTargetID) && ($sTargetID ne '')) &&
((defined $sAbout) && ($sAbout ne '')) &&
($sAbout =~ /^.$sTargetID/) &&
( ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) ) ) {
$targetIsContainer = 1;
};
};
};
# Check if there are properties encoded using the abbreviated syntax
expandAttributes($expat,$ele,$ele,0);
# Manage the aboutEach attribute here
if( ((defined $sAboutEach) && ($sAboutEach ne '')) && (defined $target) ) {
if( ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) ) {
my $ele1;
foreach $ele1 (@{$target->{children}}) {
if( ($ele1->name() =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
( ($ele1->localName() =~ /li$/) || ($ele1->localName() =~ /_/) ) ) {
my $sResource = $ele1->{sResource};
# Manage <li resource="..." /> case
lib/RDFStore/Parser/SiRPAC.pm view on Meta::CPAN
} else {
$ele->{sID} = $sAbout;
};
$sChildID = processPredicate($expat,$n,$ele,$sAbout,0);
};
# Each Description block creates also a Bag node which
# has links to all properties within the block IF
# the bCreateBags variable is true
if( ((defined $sBagid) && ($sBagid ne '')) || ($expat->{SiRPAC}->{bCreateBags} && $createBag) ) {
my $sNamespace = $RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS;
# do only once and only if there is a child
if( ($bOnce) && ((defined $sChildID) && ($sChildID ne '')) ) {
$bOnce = 0;
my $nodeID = getAttributeValue($expat, $ele->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
if ( defined $nodeID ) {
if(not(((defined $ele->{sBagID}) && ($ele->{sBagID} ne '')))) {
$ele->{sBagID} = 'rdf:nodeID:'.$nodeID;
};
} else {
$ele->{sBagID} = newReificationID($expat)
if(not(((defined $ele->{sBagID}) && ($ele->{sBagID} ne ''))));
};
$ele->{sID} = normalizeResourceIdentifier($expat,$ele->{sBagID})
if(not(((defined $ele->{sID}) && ($ele->{sID} ne ''))));
addTriple( $expat,
buildResource( $expat,$sNamespace,'type'),
buildResource( $expat,$ele->{sBagID}),
buildResource( $expat,$sNamespace,'Bag'),
$ele->{'context'}
);
};
if ((defined $sChildID) && ($sChildID ne '')) {
addTriple( $expat,
buildResource( $expat,$sNamespace,"_".$iChildCount),
buildResource( $expat,$ele->{sBagID}),
buildResource( $expat,$sChildID),
$ele->{'context'}
);
$iChildCount++;
};
};
};
};
$ele->{bDone} = 1;
return $ele->{sID};
};
# we could use URI and XPath modules to validate and normalise the subject, predicate, object
# Use XPath/XPointer for literals could be cool to have one unique uri thing
sub addTriple {
my ($expat,$predicate,$subject,$object, $context) = @_;
#print STDERR "addTriple('".
# (($predicate) ? $predicate->toString : '')."','".
# (($subject) ? $subject->toString : '')."','".
# (($object) ? $object->toString : '')."')",((caller)[2]),"\n";
# If there is no subject (rdf:about="") or object (rdf:resource=""), then use the URI/filename where the RDF description came from
carp "Predicate null when subject=".$subject->toString." and object=".$object->toString
unless(defined $predicate);
carp "Subject null when predicate=".$predicate->toString." and object=".$object->toString
unless(defined $subject);
carp "Object null when predicate=".$predicate->toString." and subject=".$subject->toString
unless(defined $object);
$subject = buildResource( $expat,$expat->{'sSource'})
unless( (defined $subject) &&
($subject->toString()) &&
(length($subject->toString())>0) );
if( (defined $object) &&
(ref($object)) &&
($object->isa("RDFStore::Resource")) ) {
$object = buildResource( $expat,$expat->{'sSource'})
unless( (defined $object) &&
($object->toString()) &&
(length($object->toString())>0) );
};
# ignore rdfstore:context triples due they are used simply to set context/provenance info
return
if( ($predicate->toString eq $RDFStore::Parser::SiRPAC::RDFSTORE_context) ||
($predicate->toString eq $RDFStore::Parser::SiRPAC::RDFSTORE_contextnodeID) );
#Trigger 'Assert' event
my $assert = $expat->{SiRPAC}->{parser}->{Handlers}->{Assert}
if(ref($expat->{SiRPAC}->{parser}->{Handlers}) =~ /HASH/);
if (defined($assert)) {
return &$assert($expat,
$expat->{SiRPAC}->{nodeFactory}->createStatement($subject,$predicate,$object, $context) ); #context too nowdays...
} else {
return;
};
};
sub newReificationID {
my ($expat) = @_;
#print STDERR "newReificationID($expat): ",((caller)[2]),"\n";
# try to generate system/run wide unique ID i.e. 'S' + unpack("H*", rand()) + 'P' + $$ + 'T' + time() + 'N' + GenidNumber
return 'rdf:nodeID:genidrdfstore' .
'S'.$expat->{SiRPAC}->{'rand_seed'} .
'P'. $$.
'T'. $expat->{SiRPAC}->{'timestamp'} .
'N'. $expat->{SiRPAC}->{iReificationCounter}++;
};
sub processTypedNode {
my ($expat,$typedNode) = @_;
#print STDERR "processTypedNode(".$typedNode->{tag}."): ",((caller)[2]),"\n";
my $sID = $typedNode->{sID};
my $sBagID = $typedNode->{sBagID};
my $sAbout = $typedNode->{sAbout};
my $target = (defined $typedNode->{vTargets}->[0]) ? $typedNode->{vTargets}->[0] : undef;
my $sAboutEach = $typedNode->{sAboutEach};
if ( (defined $typedNode->{sResource}) && ($typedNode->{sResource} ne '') && ($typedNode->{sResource} !~ /^rdf:nodeID:/) ) {
die rdfcroak($expat,"'resource' attribute not allowed for a typedNode '".$typedNode->name()."' - see <a href=\"http://www.w3.org/TR/REC-rdf-syntax/#typedNode\">[6.13]</a>");
};
# We are going to manage this typedNode using the processDescription
# routine later on. Before that, place all properties encoded as
# attributes to separate child nodes.
my $n;
for($n=0; $n<=$#{$typedNode->{attlist}}; $n+=2) {
my $sAttribute = $typedNode->{attlist}->[$n]->[0].$typedNode->{attlist}->[$n]->[1];
my $sValue = getAttributeValue($expat, $typedNode->{attlist},$sAttribute);
if ( defined $sValue ) {
$sValue =~ s/^([ ])+//g;
$sValue =~ s/([ ])+$//g;
};
if ( (!($sAttribute =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/)) &&
(!($sAttribute =~ m|^$RDFStore::Parser::SiRPAC::XMLSCHEMA|)) ) {
if( (defined $sValue) &&
(length($sValue) > 0) ) {
my $newPredicate = RDFStore::Parser::SiRPAC::Element->new(
$typedNode->{attlist}->[$n]->[0],
$typedNode->{attlist}->[$n]->[1],[
[undef,$RDFStore::Parser::SiRPAC::RDFMS_ID],
( ((defined $sAbout) && ($sAbout ne '')) ? $sAbout :
(defined $sID) ? ( $sID =~ /^#/ ) ? $sID : '#'.$sID :
'' ),
[undef,$RDFStore::Parser::SiRPAC::RDFMS_bagID],
(defined $sBagID) ? ( $sBagID =~ /^#/ ) ? $sBagID : '#'.$sBagID : ''
],
$expat->{SiRPAC}->{'xml:lang'},
$expat->{SiRPAC}->{'rdf:datatype'},
$expat->{SiRPAC}->{'rdfstore:context'});
my $newData = RDFStore::Parser::SiRPAC::DataElement->new($sValue, 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
push @{$newPredicate->{children}},$newData;
push @{$typedNode->{children}},$newPredicate;
# removeAttribute
my @rr;
my $i;
for($i=0; $i<=$#{$typedNode->{attlist}}; $i+=2) {
my $a = $typedNode->{attlist}->[$i]->[0].$typedNode->{attlist}->[$i]->[1];
if($a eq $sAttribute) {
next;
} else {
push @rr,($typedNode->{attlist}->[$i],$typedNode->{attlist}->[$i+1]);
};
};
$typedNode->{attlist} = \@rr;
};
};
};
my $sObject;
my $nodeID;
if(defined $target) {
$sObject = ( (((defined $target->{sBagID}) && ($target->{sBagID} ne ''))) ? $target->{sBagID} : $target->{sID});
} elsif((defined $sAbout) && ($sAbout ne '')){ #this makes failing t/rdfcore-tests/xmlbase/test008.rdf and t/rdfcore-tests/xmlbase/test013.rdf
$sObject = $sAbout;
} elsif((defined $sID) && ($sID ne '')) {
$sObject = $sID;
} else {
$nodeID = getAttributeValue($expat, $typedNode->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
if ( defined $nodeID ) {
$sObject = 'rdf:nodeID:'.$nodeID;
} else {
$sObject = newReificationID($expat);
};
};
$typedNode->{sID} = normalizeResourceIdentifier($expat,$sObject);
# special case: should the typedNode have aboutEach attribute,
# the type predicate should distribute to pointed
# collection also -> create a child node to the typedNode
if ( ((defined $sAboutEach) && ($sAboutEach ne '')) &&
(scalar(@{$typedNode->{vTargets}})>0) ) {
my $newPredicate = RDFStore::Parser::SiRPAC::Element->new($RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type', undef, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
my $newData = RDFStore::Parser::SiRPAC::DataElement->new($typedNode->name(), 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
push @{$newPredicate->{children}},$newData;
push @{$typedNode->{children}},$newPredicate;
} else {
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
buildResource( $expat,$typedNode->{sID}),
buildResource( $expat,$typedNode->namespace,$typedNode->localName),
$typedNode->{'context'}
);
};
my $sDesc = processDescription($expat,$typedNode, 0, $expat->{SiRPAC}->{bCreateBags}, 0);
return $sObject;
};
sub processContainer {
my ($expat,$n) = @_;
#print STDERR "processContainer($n)",((caller)[2]),"\n";
my $sID = $n->{sID};
$sID = $n->{sAbout}
unless((defined $sID) && ($sID ne ''));
my $nodeID = getAttributeValue($expat, $n->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
if( defined $nodeID ) {
$sID = 'rdf:nodeID:'.$nodeID
unless((defined $sID) && ($sID ne ''));
} else {
$sID = newReificationID($expat)
unless((defined $sID) && ($sID ne ''));
};
# Do the instantiation only once
if(!($n->{bDone})) {
if($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) {
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
buildResource( $expat,$sID),
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Seq'),
$n->{'context'}
);
} elsif($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) {
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
buildResource( $expat,$sID),
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Alt'),
$n->{'context'}
);
} elsif($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) {
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
buildResource( $expat,$sID),
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Bag'),
$n->{'context'}
);
};
$n->{bDone} = 1;
};
expandAttributes($expat,$n,$n,0);
if( (scalar(@{$n->{children}})<=0) &&
($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ) {
die rdfcroak($expat,"An RDF:Alt container must have at least one nested listitem");
};
my $iCounter = 1;
my $n2;
my $object_elements=1;
foreach $n2 (@{$n->{children}}) {
if ( (defined $n2) &&
(defined $n2->name()) &&
(defined $n2->localName()) &&
($n2->name() =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
( ($n2->localName() =~ /li$/) || ($n2->localName() =~ /_/) ) ) {
# added by AR 2003/10/05 accordingly to W3C RDF Core #rdf-containers-formalmodel issue
# (see http://lists.w3.org/Archives/Public/w3c-rdfcore-wg/2001Jul/0039.html)
my $isli = ($n2->localName() =~ /li$/) ? 1 : 0;
if($n2->localName() =~ m/_(\d+)$/) {
lib/RDFStore/Parser/SiRPAC.pm view on Meta::CPAN
};
sub buildResource {
my ($expat, $ns, $ln) = @_;
my $factory = $expat->{SiRPAC}->{nodeFactory};
if ( !$ln and ( $ns =~ s/^rdf:nodeID:// ) ) {
#Trigger 'manage_bNodes' event
my $manage_bnodes = $expat->{SiRPAC}->{parser}->{Handlers}->{manage_bNodes}
if(ref($expat->{SiRPAC}->{parser}->{Handlers}) =~ /HASH/);
if (defined($manage_bnodes)) {
return &$manage_bnodes($expat, $factory, $ns);
} else {
return $factory->createAnonymousResource( $ns );
};
} else {
return $factory->createResource( $ns, $ln );
};
};
sub buildLiteral {
my ($factory) = shift;
return $factory->createLiteral( @_ );
};
sub rdfwarn {
my ($expat, $message) = @_;
my $source = $expat->{'sSource'};
my $line = $expat->current_line;
my $column = $expat->current_column;
my $byte = $expat->current_byte;
$message .= " in $source at line $line, column $column, byte $byte";
return $message;
};
sub rdfcroak {
my ($expat, $message) = @_;
my $source = $expat->{'sSource'};
my $eclines = $expat->{ErrorContext};
my $line = $expat->current_line;
my $column = $expat->current_column;
my $byte = $expat->current_byte;
#$message .= " in $source at line $line, column $column, byte $byte";
$message .= " at line $line, column $column, byte $byte"; # it seems the source file is already magically included by caller die() sub...
$message .= ":\n" . $expat->position_in_context($eclines)
if defined($eclines);
return $message;
};
# processPredicate handles all elements not defined as special
# RDF elements. <tt>predicate</tt> has either <tt>resource()</tt> or a single child
sub processPredicate {
my ($expat,$predicate,$description,$sTarget,$reify) = @_;
#print STDERR "processPredicate($predicate->{tag},$description->{tag},$sTarget,$reify)",((caller)[2]),"\n";
my $sStatementID = $predicate->{sID};
my $sBagID = $predicate->{sBagID};
my $sResource = $predicate->{sResource};
# If a predicate has other attributes than rdf:ID, rdf:bagID,
# or xmlns... -> generate new triples according to the spec.
# (See end of Section 6)
# this new element may not be needed
my $d = RDFStore::Parser::SiRPAC::Element->new(undef,$RDFStore::Parser::SiRPAC::RDFMS_Description, undef, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
if(expandAttributes($expat,$d,$predicate,1,$sResource)) {
# error checking
if(scalar(@{$predicate->{children}})>0) {
die rdfcroak($expat,$predicate->name()." must be an empty element since it uses propAttr grammar production - see <a href=\"http://www.w3.org/TR/REC-rdf-syntax/#propertyElt\">[6.12]</a>");
return;
};
# determine the 'about' part for the new statements
if ((defined $sStatementID) && ($sStatementID ne '')) {
push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_about];
push @{$d->{attlist}},$sStatementID;
} elsif ((defined $sResource) && ($sResource ne '')) {
push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_about];
push @{$d->{attlist}},$sResource;
} else {
my $nodeID = getAttributeValue($expat, $predicate->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID); # XXXXXXX to be checked
if( defined $nodeID ) {
$sStatementID = 'rdf:nodeID:'.$nodeID;
} else {
$sStatementID = newReificationID($expat);
};
push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_nodeID]; # XXXXXXXX to be checked
push @{$d->{attlist}},$sStatementID;
};
if ((defined $sBagID) && ($sBagID ne '')) {
push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_bagID];
push @{$d->{attlist}},$sBagID;
$d->{sBagID} = $sBagID;
};
processDescription($expat,$d, 0,0,$expat->{SiRPAC}->{bCreateBags});
};
# Tricky part: if the resource attribute is present for a predicate
# AND there are no children, the value of the predicate is either
# 1. the URI in the resource attribute OR
# 2. the node ID of the resolved #resource attribute
my $predicate_target = (defined $predicate->{vTargets}->[0]) ? $predicate->{vTargets}->[0] : undef;
if( ((defined $sResource) && ($sResource ne '')) && (scalar(@{$predicate->{children}})<=0) ) {
if (not(defined $predicate_target)) {
if ( ($reify) ||
( (defined $predicate->{sID}) &&
($predicate->{sID} ne '') ) ) {
$sStatementID = reify( $expat,
buildResource( $expat,$predicate->namespace,$predicate->localName),
buildResource( $expat,$sTarget),
buildResource( $expat,$sResource),
$predicate->{sID},
$predicate);
lib/RDFStore/Parser/SiRPAC.pm view on Meta::CPAN
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'rest'),
buildResource( $expat,$currentID),
buildResource( $expat,$collectionID),
$predicate->{'context'} #take the one of the predicate in this case
);
} else { #process once the predicate if collection
addTriple ( $expat,
buildResource( $expat,$predicate->namespace,$predicate->localName),
buildResource( $expat,$sTarget),
buildResource( $expat,$collectionID),
$predicate->{'context'} #take the one of the predicate in this case
);
};
$currentID=$collectionID;
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'first'),
buildResource( $expat,$collectionID),
buildResource( $expat,$sStatementID),
$predicate->{'context'} #take the one of the predicate in this case
);
} else {
addTriple ( $expat,
buildResource( $expat,$predicate->namespace,$predicate->localName),
buildResource( $expat,$sTarget),
buildResource( $expat,$sStatementID),
$predicate->{'context'}
);
};
$j++;
$object_elements++;
};
};
if( ($j>0) &&
($predicate->{isCollection}) ) {
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'rest'),
buildResource( $expat,$currentID),
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'nil'),
$predicate->{'context'} #take the one of the predicate in this case
);
};
return $sStatementID;
};
sub reify {
my ($expat,$predicate,$subject,$object,$sNodeID,$ele) = @_;
my $nodeID = getAttributeValue($expat, $ele->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID); # XXXXXXX to be checked
if ( defined $nodeID ) {
$sNodeID = 'rdf:nodeID:'.$nodeID
if(not(((defined $sNodeID) && ($sNodeID ne ''))));
} else {
$sNodeID = newReificationID($expat)
if(not(((defined $sNodeID) && ($sNodeID ne ''))));
};
#print STDERR "reify('".$predicate->toString."','".$subject->toString."','".$object->toString."','$sNodeID','$ele')",((caller)[2]),"\n";
# The original statement must remain in the data model
addTriple($expat,$predicate, $subject, $object, $ele->{'context'});
# Do not reify reifyd properties
if ( ($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_subject) ||
($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_predicate) ||
($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_object) ||
($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_type) ) {
return;
};
# Reify by creating 4 new triples
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'predicate'),
buildResource( $expat,$sNodeID),
$predicate,
$ele->{'context'} );
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'subject'),
buildResource( $expat,$sNodeID),
#bug fix by AR 2001/06/10
(length($subject->toString()) == 0) ?
buildResource( $expat,$expat->{'sSource'}.'#' ) :
$subject,
$ele->{'context'}
);
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'object'),
buildResource( $expat,$sNodeID),
$object,
$ele->{'context'});
addTriple( $expat,
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
buildResource( $expat,$sNodeID),
buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Statement'),
$ele->{'context'}
);
return $sNodeID;
};
# Take an element <i>ele</i> with its parent element <i>parent</i>
# and evaluate all its attributes to see if they are non-RDF specific
# and non-XML specific in which case they must become children of
# the <i>ele</i> node.
sub expandAttributes {
my ($expat,$parent,$ele,$predicateNode,$resourceValue) = @_;
#print "expandAttributes(".$parent->name().",".$ele->name().",$predicateNode)",((caller)[2]),"\n";
#use Data::Dumper;
#print Dumper($parent);
my $foundAbbreviation = 0;
my $resourceFound = 0;
my $count=0;
while ($count<=$#{$ele->{attlist}}) {
my $sAttribute = ( (defined $ele->{attlist}->[$count++]->[0]) ? $ele->{attlist}->[$count-1]->[0] : '').( (defined $ele->{attlist}->[$count-1]->[1]) ? $ele->{attlist}->[$count-1]->[1] : '');
my $sValue = getAttributeValue($expat, $ele->{attlist},$sAttribute);
#perhaps should next if not defined $sValue.....
$count++;
if ($sAttribute =~ m|^$RDFStore::Parser::SiRPAC::XMLSCHEMA|) {
# expands after parsing, that's why it is useless here... :(
# because of concatenation without : inbetween
# ...there was something more here to do....
next;
};
# exception: expand rdf:value and rdf:type and rdf:li elements - the last two must be resources and not literal values anyway
# test http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-ns-prefix-confusion/test0006.rdf was failing and others - we fixed
# processPredicate() method after attributes have been expanded to force resource object nodes for rdf:type on predicate with rdf:resource
if ( ($sAttribute =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
(!($ele->{attlist}->[$count-2]->[1]=~ /^_/)) && #this might be buggy by AR 2001/05/28
(!($ele->{attlist}->[$count-2]->[1] =~ /^value$/)) &&
(!($ele->{attlist}->[$count-2]->[1] =~ /^type$/)) ) {
# If an attribute (e.g. a property that follows the
# propAttr production) is not qualified but its enclosing
# (parent) element is from the RDFMS namespace, then the
# attribute was prefaced with RDFMS in RDFXML_StartElementHandler().
# This must be handled here so that the propAttr is added to the Model.
if( ($ele->{attlist}->[$count-2]->[1] =~ /resource$/) &&
($predicateNode) ) {
$resourceFound = 1;
next;
};
next
if( ($ele->{attlist}->[$count-2]->[1] =~ /ID$/) ||
($ele->{attlist}->[$count-2]->[1] =~ /bagID$/) ||
($ele->{attlist}->[$count-2]->[1] =~ /about$/) ||
($ele->{attlist}->[$count-2]->[1] =~ /aboutEach$/) ||
($ele->{attlist}->[$count-2]->[1] =~ /datatype$/) ||
($ele->{attlist}->[$count-2]->[1] =~ /parseType$/) );
};
# expanding predicate element
#if( ($predicateNode) &&
# (!($sAttribute eq $RDFStore::Parser::SiRPAC::RDFMS_resource)) &&
# (!($sAttribute eq $RDFStore::Parser::SiRPAC::RDFMS_nodeID)) ) {
# die rdfcroak($expat,"Property element ". $ele->name()." has invalid attribute ".$sAttribute.". Only rdf:resource is allowed.");
#};
$foundAbbreviation = 1;
#xml:lang, rdf:datatypae and rdfstore:context settings are taken from the current $ele being expanded
my $newElement = RDFStore::Parser::SiRPAC::Element->new($ele->{attlist}->[$count-2]->[0],$ele->{attlist}->[$count-2]->[1], undef, $ele->{'lang'}, $ele->{'rdf:datatype'}, $ele->{'context'});
my $newData = RDFStore::Parser::SiRPAC::DataElement->new($sValue, 0, $ele->{'lang'}, $ele->{'rdf:datatype'}, $ele->{'context'});
push @{$newElement->{children}},$newData;
push @{$parent->{children}},$newElement;
};
# If an rdf:resource propAttr was found in this predicate then
# cache its value in each of the predicate's elements. The value
# of the this propAttr will be the subject of all of the propAttr's triples
if( ($resourceFound) && (defined $resourceValue) ) {
my $i=0;
foreach $i (0..$#{$parent->{children}}) {
$parent->{children}->[$i]->{sResource}= $resourceValue;
};
};
#print STDERR "----".$ele->{tag}."\n";
#map {
#if(ref($_)) {
# print STDERR $_->[0],$_->[1],"=";
#} else {
# print STDERR $_,"\n";
#};
#} @{$ele->{attlist}};
return $foundAbbreviation;
};
sub parseLiteral {
my ($expat) = @_;
#print STDERR "parseLiteral(): ".(caller)[2]."\n";
foreach(reverse @{$expat->{SiRPAC}->{elementStack}}) {
my $sParseType = getAttributeValue( $expat,
$_->{attlist},
$RDFStore::Parser::SiRPAC::RDFMS_parseType );
return 1
if( (defined $sParseType) &&
($sParseType ne "Resource") &&
($sParseType ne "Collection") );
};
return 0;
};
sub parseResource {
my ($expat) = @_;
#print STDERR "parseResource($expat)",((caller)[2]),"\n";
foreach(reverse @{$expat->{SiRPAC}->{elementStack}}) {
my $sParseType = getAttributeValue( $expat,
$_->{attlist},
$RDFStore::Parser::SiRPAC::RDFMS_parseType );
return 1
if( (defined $sParseType) &&
($sParseType eq "Resource") );
};
return 0;
};
sub normalizeResourceIdentifier {
my ($expat,$sURI) = @_;
return $sURI
if ( $sURI =~ /^rdf:nodeID:/ ); #do not touch bNodes
#print STDERR "normalizeResourceIdentifier(['",$expat->base,"'],'$sURI')".(caller)[2]."\n";
my $xml_base = $expat->base; #which is also set if a SourceBase is passed to the parser
my $URL = URI->new($sURI);
if( (defined $URL->scheme) &&
( $URL->scheme ne 'file') ) {
# If sURI is an absolute URI, don't bother
# with it
return $sURI;
} elsif( (defined $sURI) &&
( (defined $xml_base) &&
($xml_base ne '') ) ) {
$xml_base =~ s/#.*$//;
# see why at http://www.w3.org/TR/2003/PR-rdf-testcases-20031215/#sec-uri-encoding
# NOTE: the URI module does correctly escape UTF-8-ish chars using '%' notation but RDF/XML "should" not (see above link)
my $vURI = ($sURI !~ m/^#/) ? $sURI : '';
my $absoluteURL;
if( $xml_base =~ m/^(http|file):/ ) {
my $path = new URI( $xml_base );
$path = $path->path
if($path);
$vURI = $1 . $vURI # keep the file part otherwise the URI relative methods/flags below would drop it
if( ($vURI ne $sURI) &&
($path =~ m/([^\/]+\.[^\/]+)$/) );
local $URI::ABS_REMOTE_LEADING_DOTS = 1;
$absoluteURL = URI->new( $vURI )->abs( $xml_base ); #let URI module to sort out relative paths and make it absolute to xml_base
} else {
$absoluteURL = $xml_base . $vURI;
};
if(defined $absoluteURL) {
return $absoluteURL.( ($sURI !~ m/^#/) ? '' : $sURI );
} else {
carp "Cannot combine $xml_base with $sURI";
};
} else {
$sURI = '#'.$sURI
unless($sURI =~ /^#/);
return $sURI;
};
};
package RDFStore::Parser::SiRPAC::Element;
{
sub new {
my ($pkg, $namespace, $tag, $attlist, $lang, $datatype, $context) = @_;
$attlist = []
unless(defined $attlist);
#print STDERR "RDFStore::Parser::SiRPAC::Element::new( @_ ): ".(caller)[2]."\n";
my $self = {
tag => $tag,
sNamespace => $namespace,
attlist => $attlist,
children => [],
vTargets => [],
bDone => 0,
isCollection => 0,
#at this level is just because SiRPAC parsing struct is broken (wrong to propagate XML attribute on elements)
'lang' => $lang, #xml:lang
'rdf:datatype' => $datatype, #rdf:datatype
'context' => $context #rdfstore:context
};
bless $self,$pkg;
};
sub name {
return (defined $_[0]->{sNamespace}) ?
$_[0]->{sNamespace}.$_[0]->{tag} :
$_[0]->{tag};
};
sub localName {
return $_[0]->{tag};
};
sub namespace {
return $_[0]->{sNamespace};
};
};
package RDFStore::Parser::SiRPAC::DataElement;
{
@RDFStore::Parser::SiRPAC::DataElement::ISA = qw( RDFStore::Parser::SiRPAC::Element );
sub new {
my ($pkg, $text, $parsetype, $lang, $datatype, $context) = @_;
#print STDERR "RDFStore::Parser::SiRPAC::DataElement::new( @_ ): ".(caller)[2]."\n";
my $self = $pkg->SUPER::new(undef,$text,undef,$lang, $datatype, $context);
delete $self->{sNamespace}; # we do not need it
delete $self->{attlist}; # we do not need it
$self->{'parse_type'} = ( $parsetype or
$datatype eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral' ) ? 1 : 0; #Literal or Resource
$self->{tag} = "[DATA: " . $text . "]";
$self->{sContent} = $text; #instanceOf Data :-)
bless $self,$pkg;
};
sub name { };
sub localName { };
sub namespace { };
};
1;
};
__END__
=head1 NAME
RDFStore::Parser::SiRPAC - This module implements a streaming RDF Parser as a direct implementation of XML::Parser::Expat(3)
=head1 SYNOPSIS
use RDFStore::Parser::SiRPAC;
use RDFStore::NodeFactory;
my $p=new RDFStore::Parser::SiRPAC(
ErrorContext => 2,
Handlers => {
Init => sub { print "INIT\n"; },
Final => sub { print "FINAL\n"; },
Assert => sub { print "STATEMENT - @_\n"; }
},
NodeFactory => new RDFStore::NodeFactory() );
$p->parsefile('http://www.gils.net/bsr-gils.rdfs');
$p->parsefile('http://www.gils.net/rdf/bsr-gils.rdfs');
$p->parsefile('/some/where/my.rdf');
$p->parsefile('file:/some/where/my.rdf');
$p->parse(*STDIN); #parse stream but with *blocking* Expat (see below example for n-blocking parsing using XML::Parse::ExpatNB)
use RDFStore::Parser::SiRPAC;
use RDFStore::NodeFactory;
my $pstore=new RDFStore::Parser::SiRPAC(
ErrorContext => 2,
Style => 'RDFStore::Parser::Styles::RDFStore::Model',
NodeFactory => new RDFStore::NodeFactory(),
style_options => {
persistent => 1,
seevalues => 1,
store_options => { Name => '/tmp/test' }
}
);
my $rdfstore_model = $pstore->parsefile('http://www.gils.net/bsr-gils.rdfs');
( run in 0.482 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )