RDFStore

 view release on metacpan or  search on metacpan

lib/RDFStore/Parser/SiRPAC.pm  view on Meta::CPAN


	my $file = shift;

	if( (defined $file) && ($file ne '') ) {
		my $ret;
		my @ret=();
		my $file_uri;
		my $scheme;
		$scheme='file:'
			if( (-e $file) || (!($file =~ /^\w+:/)) );
                $file_uri= URI->new(((defined $scheme) ? $scheme : '' ).$file);
		if (	(defined $file_uri) && (defined $file_uri->scheme)	&&
			($file_uri->scheme ne 'file') ) {
  			my $content = $class->wget($file_uri);
			if(defined $content) {
				if (wantarray) { 	
					eval {
						@ret = $class->parsestring($content, $file_uri,@_);
    					};
				} else {
					eval {
						$ret = $class->parsestring($content, $file_uri,@_);
    					};
				};
    				my $err = $@;
    				croak $err 	
					if $err;
                        } else {
				croak "Cannot fetch '$file_uri'";
			};
    		} else {
			my $filename= $file_uri->file;

			# FIXME: it might be wrong in some cases
			local(*FILE);
			open(FILE, $filename) 
				or  croak "Couldn't open $filename:\n$!";
			binmode(FILE);
			if (wantarray) { 	
				eval {
					@ret = $class->parse(*FILE,$file_uri,@_);
    				};
			} else {
				eval {
					$ret = $class->parse(*FILE,$file_uri,@_);
    				};
			};
    			my $err = $@;
    			close(FILE);
    			croak $err 	
				if $err;
		};
		return unless defined wantarray;
		return wantarray ? @ret : $ret;
  	};
};

sub getAttributeValue {
	my ($expat,$attlist, $elName) = @_;

#print STDERR "getAttributeValue(@_): ".(caller)[2]."\n";

  	return
		if( (ref($attlist) =~ /ARRAY/) && (!@{$attlist}) );
	my $n;
	for($n=0; $n<=$#{$attlist}; $n+=2) {
    		my $attname;
		if(ref($attlist->[$n]) =~ /ARRAY/) {
    			#$attname = $attlist->[$n]->[0].$attlist->[$n]->[1];
    			$attname = $attlist->[$n]->[0];
    			$attname .= $attlist->[$n]->[1]
				if(defined $attlist->[$n]->[1]);
		} else {
			$attname = $attlist->[$n];
		};

    		return $attlist->[$n+1]
			if ($attname eq $elName);
  	};
  	return;
}

sub RDFXML_StartElementHandler {
	my $expat = shift;
	my $tag = shift;
	my @attlist = @_;

	my @rdf_attlist;

	my $xml_tag = $tag; # save it for later

	my $sNamespace = $expat->namespace($tag);

	my $parseLiteral = (($expat->{SiRPAC}->{scanMode} ne 'SKIPPING') && (parseLiteral($expat)));

	if(not(defined $sNamespace)) {			
		my ($prefix,$suffix) = split(':',$tag);
		if($prefix eq $RDFStore::Parser::SiRPAC::XMLSCHEMA_prefix) {
			$sNamespace = $RDFStore::Parser::SiRPAC::XMLSCHEMA;
			$tag = $expat->generate_ns_name($suffix,$sNamespace);
		} else {
			if( (defined $prefix) && (defined $suffix) ) {
				die rdfcroak($expat,"Unresolved namespace prefix '$prefix' for '$suffix'");
			} else {
				unless($parseLiteral) {
					my $msg = rdfwarn($expat,"Using node element '$tag' without a namespace is forbidden.");
					push @{ $expat->{warnings} },$msg;
					warn $msg;
					return;
					};
				};
			};
        	};

	my $newElement;

	my $setScanModeElement = 0;
	if($expat->{SiRPAC}->{scanMode} eq 'SKIPPING') {
		if( $sNamespace.$tag eq $RDFStore::Parser::SiRPAC::RDFMS_RDF ) {
                        $expat->{SiRPAC}->{scanMode} = 'RDF';
                        $setScanModeElement = 1;

lib/RDFStore/Parser/SiRPAC.pm  view on Meta::CPAN

     	# Place all characters as Data instance to the containment
     	# hierarchy with the help of the stack.
    	my $e = $expat->{SiRPAC}->{elementStack}->[$#{$expat->{SiRPAC}->{elementStack}}];

	# Determine whether the previous event was for
	# characters. If so, update the Data node contents.
	# A&amp;B would otherwise result in three
	# separate Data nodes in the parse tree
	my $bHasData = 0;
        my $dN;
        my $dataNode;
        foreach $dN (@{$e->{children}}) {
                if($dN->isa('RDFStore::Parser::SiRPAC::DataElement')) {
                        $bHasData = 1;
                        $dataNode=$dN;
                        last;
                };
        };

        if(!$bHasData) {
                push @{$e->{children}},RDFStore::Parser::SiRPAC::DataElement->new($text, 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'},$expat->{SiRPAC}->{'rdfstore:context'});
        } else {
                $dataNode->{sContent} .= $text;
                #not nice to see it here.....I know ;-)
                $dataNode->{tag} = "[DATA: " . $dataNode->{sContent} . "]";
        	};
	};

sub processXML {
	my ($expat,$ele) = @_;

	if($ele->name() eq $RDFStore::Parser::SiRPAC::RDFMS_RDF) {
		my $c;
		foreach $c (@{$ele->{children}}) {
			if($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Description) {
				processDescription($expat,$c,0,
					$expat->{SiRPAC}->{bCreateBags}, $expat->{SiRPAC}->{bCreateBags});
			} elsif( 	($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
					($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
					($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag)	)  {
				processContainer($expat,$c);
			#strange checking here....
			} elsif( 	(!($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_resource)) && 
					(!($c->name() eq $RDFStore::Parser::SiRPAC::RDFMS_nodeID)) &&
					(length($c->name())>0) ) {
				processTypedNode($expat,$c);
			};
		};
	} elsif($ele->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Description) {
		processDescription($expat,$ele,0,
			$expat->{SiRPAC}->{bCreateBags}, $expat->{SiRPAC}->{bCreateBags});
	} else {
		processTypedNode($expat,$ele);
		};

	};

sub processDescription {
	my ($expat,$ele,$inPredicate,$reify,$createBag) = @_;

#print STDERR "processDescription($expat,".$ele->name.",$inPredicate,$reify,$createBag)",((caller)[2]),"\n";

	# Return immediately if the description has already been managed
	return $ele->{sID}
		if($ele->{bDone});

	my $iChildCount=1;
	my $bOnce=1;
	
	# Determine first all relevant values
	my ($sID,$sBagid,$sAbout,$sAboutEach) = (
									$ele->{sID},
									$ele->{sBagID},
									$ele->{sAbout},
									$ele->{sAboutEach} );
	my $target = (defined $ele->{vTargets}->[0]) ? $ele->{vTargets}->[0] : undef;

	my $targetIsContainer=0;
	my $sTargetAbout='';
	my $sTargetBagid='';
	my $sTargetID='';

	# Determine what the target of the Description reference is
	if (defined $target) {
      		my $sTargetAbout = $target->{sAbout};
      		my $sTargetID    = $target->{sID};
      		my $sTargetBagid = $target->{sBagID};

       		# Target is collection if
       		# 1. it is identified with bagID attribute
       		# 2. it is identified with ID attribute and is a collection
      		if ( ((defined $sTargetBagid) && ($sTargetBagid ne '')) && 
			((defined $sAbout) && ($sAbout ne '')) ) {
			# skip '#' sign??
        		$targetIsContainer = ($sAbout =~ /^.$sTargetBagid/);
      		} else {
        		if (	((defined $sTargetID) && ($sTargetID ne '')) &&
            			((defined $sAbout) && ($sAbout ne '')) &&
				($sAbout =~ /^.$sTargetID/) &&
				( 	($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
                                        ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
                                        ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) )	)  {
          			$targetIsContainer = 1;
        		};
      		};
    	};

	# Check if there are properties encoded using the abbreviated syntax
	expandAttributes($expat,$ele,$ele,0);

	# Manage the aboutEach attribute here
	if( ((defined $sAboutEach) && ($sAboutEach ne '')) && (defined $target) ) {
      		if( 	($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) ||
                        ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ||
                        ($target->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) ) {
			my $ele1;
			foreach $ele1 (@{$target->{children}}) {
          			if( 	($ele1->name() =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
					( ($ele1->localName() =~ /li$/) || ($ele1->localName() =~ /_/) ) ) {
            				my $sResource = $ele1->{sResource};
             				# Manage <li resource="..." /> case

lib/RDFStore/Parser/SiRPAC.pm  view on Meta::CPAN

				} else {
          				$ele->{sID} = $sAbout;
					};
					
          			$sChildID = processPredicate($expat,$n,$ele,$sAbout,0);
				};

                        # Each Description block creates also a Bag node which
                        # has links to all properties within the block IF
                        # the bCreateBags variable is true
        		if( ((defined $sBagid) && ($sBagid ne '')) || ($expat->{SiRPAC}->{bCreateBags} && $createBag) ) {
          			my $sNamespace = $RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS;
          			# do only once and only if there is a child
          			if( ($bOnce) && ((defined $sChildID) && ($sChildID ne '')) ) {
            				$bOnce = 0;
					my $nodeID = getAttributeValue($expat, $ele->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
                                	if ( defined $nodeID ) {
            					if(not(((defined $ele->{sBagID}) && ($ele->{sBagID} ne '')))) {
                                        		$ele->{sBagID} = 'rdf:nodeID:'.$nodeID;
							};
                                	} else {
              					$ele->{sBagID} = newReificationID($expat)
            						if(not(((defined $ele->{sBagID}) && ($ele->{sBagID} ne ''))));
						};
          				$ele->{sID} = normalizeResourceIdentifier($expat,$ele->{sBagID})
          					if(not(((defined $ele->{sID}) && ($ele->{sID} ne ''))));

			            	addTriple(	$expat,
							buildResource( $expat,$sNamespace,'type'),
                       					buildResource( $expat,$ele->{sBagID}),
							buildResource( $expat,$sNamespace,'Bag'),
							$ele->{'context'}
						);
          			};
				if ((defined $sChildID) && ($sChildID ne '')) {
			            	addTriple(	$expat,
							buildResource( $expat,$sNamespace,"_".$iChildCount),
                       					buildResource( $expat,$ele->{sBagID}),
							buildResource( $expat,$sChildID),
							$ele->{'context'}
						 );
            				$iChildCount++;
          			};
        		};
		};
    	};

	$ele->{bDone} = 1;

	return $ele->{sID};
	};

# we could use URI and XPath modules to validate and normalise the subject, predicate, object
# Use XPath/XPointer for literals could be cool to have one unique uri thing
sub addTriple {
	my ($expat,$predicate,$subject,$object, $context) = @_;

#print STDERR "addTriple('".
#                       (($predicate) ? $predicate->toString : '')."','".
#                       (($subject) ? $subject->toString : '')."','".
#                       (($object) ? $object->toString : '')."')",((caller)[2]),"\n";

        # If there is no subject (rdf:about="") or object (rdf:resource=""), then use the URI/filename where the RDF description came from
        carp "Predicate null when subject=".$subject->toString." and object=".$object->toString
                unless(defined $predicate);

        carp "Subject null when predicate=".$predicate->toString." and object=".$object->toString
                unless(defined $subject);

	carp "Object null when predicate=".$predicate->toString." and subject=".$subject->toString
        	unless(defined $object);

	$subject = buildResource( $expat,$expat->{'sSource'})
		unless(	(defined $subject) && 
			($subject->toString()) && 
			(length($subject->toString())>0) );

	if(	(defined $object) &&
		(ref($object)) && 
		($object->isa("RDFStore::Resource")) ) {
		$object = buildResource( $expat,$expat->{'sSource'})
			unless( (defined $object) &&
				($object->toString()) && 
				(length($object->toString())>0) );
		};

	# ignore rdfstore:context triples due they are used simply to set context/provenance info
	return
		if(	($predicate->toString eq $RDFStore::Parser::SiRPAC::RDFSTORE_context) ||
			($predicate->toString eq $RDFStore::Parser::SiRPAC::RDFSTORE_contextnodeID) );

	#Trigger 'Assert' event
        my $assert = $expat->{SiRPAC}->{parser}->{Handlers}->{Assert}
		if(ref($expat->{SiRPAC}->{parser}->{Handlers}) =~ /HASH/);
        if (defined($assert)) {
        	return &$assert($expat, 
				$expat->{SiRPAC}->{nodeFactory}->createStatement($subject,$predicate,$object, $context) ); #context too nowdays...
	} else {
		return;
		};
	};

sub newReificationID {
	my ($expat) = @_;

#print STDERR "newReificationID($expat): ",((caller)[2]),"\n";

	# try to generate system/run wide unique ID i.e. 'S' + unpack("H*", rand()) + 'P' + $$ + 'T' + time() + 'N' + GenidNumber
	return  'rdf:nodeID:genidrdfstore' .
		'S'.$expat->{SiRPAC}->{'rand_seed'} .
		'P'. $$. 
		'T'. $expat->{SiRPAC}->{'timestamp'} .
		'N'. $expat->{SiRPAC}->{iReificationCounter}++;
	};

sub processTypedNode {
	my ($expat,$typedNode) = @_;

#print STDERR "processTypedNode(".$typedNode->{tag}."): ",((caller)[2]),"\n";

	my $sID = $typedNode->{sID};
	my $sBagID = $typedNode->{sBagID};
	my $sAbout = $typedNode->{sAbout};

	my $target = (defined $typedNode->{vTargets}->[0]) ? $typedNode->{vTargets}->[0] : undef;

    	my $sAboutEach = $typedNode->{sAboutEach};

	if ( (defined $typedNode->{sResource}) && ($typedNode->{sResource} ne '') && ($typedNode->{sResource} !~ /^rdf:nodeID:/) ) {
      		die rdfcroak($expat,"'resource' attribute not allowed for a typedNode '".$typedNode->name()."' - see <a href=\"http://www.w3.org/TR/REC-rdf-syntax/#typedNode\">[6.13]</a>");
		};

	# We are going to manage this typedNode using the processDescription
	# routine later on. Before that, place all properties encoded as
	# attributes to separate child nodes.
	my $n;
	for($n=0; $n<=$#{$typedNode->{attlist}}; $n+=2) {
    		my $sAttribute = $typedNode->{attlist}->[$n]->[0].$typedNode->{attlist}->[$n]->[1];
    		my $sValue = getAttributeValue($expat, $typedNode->{attlist},$sAttribute);
		if ( defined $sValue ) {
			$sValue =~ s/^([ ])+//g;
			$sValue =~ s/([ ])+$//g;
			};

		if ( 	(!($sAttribute =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/)) &&
			(!($sAttribute =~ m|^$RDFStore::Parser::SiRPAC::XMLSCHEMA|)) ) {
        		if(	(defined $sValue) &&
				(length($sValue) > 0) ) {
              			my $newPredicate =  RDFStore::Parser::SiRPAC::Element->new(
							$typedNode->{attlist}->[$n]->[0],
							$typedNode->{attlist}->[$n]->[1],[
							[undef,$RDFStore::Parser::SiRPAC::RDFMS_ID], 
							(	((defined $sAbout) && ($sAbout ne '')) ?  $sAbout : 
								(defined $sID) ?  ( $sID =~ /^#/ ) ? $sID : '#'.$sID :
								'' ),
							[undef,$RDFStore::Parser::SiRPAC::RDFMS_bagID],
							(defined $sBagID) ? ( $sBagID =~ /^#/ ) ? $sBagID : '#'.$sBagID : ''
								],
								$expat->{SiRPAC}->{'xml:lang'}, 
								$expat->{SiRPAC}->{'rdf:datatype'},
								$expat->{SiRPAC}->{'rdfstore:context'});
				
				my $newData =  RDFStore::Parser::SiRPAC::DataElement->new($sValue, 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
				push @{$newPredicate->{children}},$newData;
				push @{$typedNode->{children}},$newPredicate;

				# removeAttribute
				my @rr;
				my $i;
				for($i=0; $i<=$#{$typedNode->{attlist}}; $i+=2) {
					my $a = $typedNode->{attlist}->[$i]->[0].$typedNode->{attlist}->[$i]->[1];
					if($a eq $sAttribute) {
						next;
					} else {
						push @rr,($typedNode->{attlist}->[$i],$typedNode->{attlist}->[$i+1]);
						};
					};
				$typedNode->{attlist} = \@rr;
        			};
      			};
    	};

	my $sObject;
	my $nodeID;
    	if(defined $target) {
		$sObject = ( (((defined $target->{sBagID}) && ($target->{sBagID} ne ''))) ? $target->{sBagID} : $target->{sID});
	} elsif((defined $sAbout) && ($sAbout ne '')){ #this makes failing t/rdfcore-tests/xmlbase/test008.rdf and t/rdfcore-tests/xmlbase/test013.rdf
      		$sObject = $sAbout;
    	} elsif((defined $sID) && ($sID ne '')) {
      		$sObject = $sID;
    	} else {
		$nodeID = getAttributeValue($expat, $typedNode->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
                if ( defined $nodeID ) {
                	$sObject = 'rdf:nodeID:'.$nodeID;
		} else {
      			$sObject = newReificationID($expat);
			};
	};

	$typedNode->{sID} = normalizeResourceIdentifier($expat,$sObject);

	# special case: should the typedNode have aboutEach attribute,
	# the type predicate should distribute to pointed
	# collection also -> create a child node to the typedNode
	if ( 	((defined $sAboutEach) && ($sAboutEach ne '')) &&
        	(scalar(@{$typedNode->{vTargets}})>0) ) {
              		my $newPredicate =  RDFStore::Parser::SiRPAC::Element->new($RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type', undef, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
			my $newData = RDFStore::Parser::SiRPAC::DataElement->new($typedNode->name(), 0, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
			push @{$newPredicate->{children}},$newData;
			push @{$typedNode->{children}},$newPredicate;
    	} else {
      		addTriple(	$expat,
				buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
				buildResource( $expat,$typedNode->{sID}),
				buildResource( $expat,$typedNode->namespace,$typedNode->localName),
				$typedNode->{'context'}
			);
    	};

    	my $sDesc = processDescription($expat,$typedNode, 0, $expat->{SiRPAC}->{bCreateBags}, 0);

    	return $sObject;
};

sub processContainer {
	my ($expat,$n) = @_;

#print STDERR "processContainer($n)",((caller)[2]),"\n";

	my $sID = $n->{sID};
      	$sID = $n->{sAbout}
    		unless((defined $sID) && ($sID ne ''));
	my $nodeID = getAttributeValue($expat, $n->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID);
	if( defined $nodeID ) {
        	$sID = 'rdf:nodeID:'.$nodeID
    			unless((defined $sID) && ($sID ne ''));
        } else {
      		$sID = newReificationID($expat)
    			unless((defined $sID) && ($sID ne ''));
		};

     	# Do the instantiation only once
	if(!($n->{bDone})) {
      		if($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Seq) {
			addTriple(	$expat,
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
					buildResource( $expat,$sID),
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Seq'),
					$n->{'context'}
				);
      		} elsif($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) {
			addTriple(	$expat,
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
					buildResource( $expat,$sID),
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Alt'),
					$n->{'context'}
				);
      		} elsif($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Bag) {
			addTriple(	$expat,
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
					buildResource( $expat,$sID),
					buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Bag'),
					$n->{'context'}
				);
      		};
		$n->{bDone} = 1;
    	};

	expandAttributes($expat,$n,$n,0);

	if( 	(scalar(@{$n->{children}})<=0) &&
      		($n->name() eq $RDFStore::Parser::SiRPAC::RDFMS_Alt) ) {
      		die rdfcroak($expat,"An RDF:Alt container must have at least one nested listitem");
    	};

	my $iCounter = 1;
	my $n2;
	my $object_elements=1;
	foreach $n2 (@{$n->{children}}) {
		if (	(defined $n2) &&
			(defined $n2->name()) &&
			(defined $n2->localName()) &&
			($n2->name() =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
			( ($n2->localName() =~ /li$/) || ($n2->localName() =~ /_/) ) ) {
			# added by AR 2003/10/05 accordingly to W3C RDF Core #rdf-containers-formalmodel issue
			# (see http://lists.w3.org/Archives/Public/w3c-rdfcore-wg/2001Jul/0039.html)
			my $isli = ($n2->localName() =~ /li$/) ? 1 : 0;
			if($n2->localName() =~ m/_(\d+)$/) {

lib/RDFStore/Parser/SiRPAC.pm  view on Meta::CPAN

};

sub buildResource {
	my ($expat, $ns, $ln) = @_;

	my $factory = $expat->{SiRPAC}->{nodeFactory};

	if ( !$ln and ( $ns =~ s/^rdf:nodeID:// ) ) {
		#Trigger 'manage_bNodes' event
        	my $manage_bnodes = $expat->{SiRPAC}->{parser}->{Handlers}->{manage_bNodes}
			if(ref($expat->{SiRPAC}->{parser}->{Handlers}) =~ /HASH/);
        	if (defined($manage_bnodes)) {
        		return &$manage_bnodes($expat, $factory, $ns);
		} else {
			return $factory->createAnonymousResource( $ns );
			};
	} else {
		return $factory->createResource( $ns, $ln );
		};
	};

sub buildLiteral {
	my ($factory) = shift;

	return $factory->createLiteral( @_ );
	};

sub rdfwarn {
	my ($expat, $message) = @_;

	my $source = $expat->{'sSource'};
	my $line = $expat->current_line;
	my $column = $expat->current_column;
	my $byte = $expat->current_byte;
	$message .= " in $source at line $line, column $column, byte $byte";

	return $message;
	};

sub rdfcroak {
	my ($expat, $message) = @_;

	my $source = $expat->{'sSource'};
	my $eclines = $expat->{ErrorContext};
	my $line = $expat->current_line;
	my $column = $expat->current_column;
	my $byte = $expat->current_byte;
	#$message .= " in $source at line $line, column $column, byte $byte";
	$message .= " at line $line, column $column, byte $byte"; # it seems the source file is already magically included by caller die() sub...
	$message .= ":\n" . $expat->position_in_context($eclines)
		if defined($eclines);

	return $message;
	};

# processPredicate handles all elements not defined as special
# RDF elements. <tt>predicate</tt> has either <tt>resource()</tt> or a single child
sub processPredicate {
	my ($expat,$predicate,$description,$sTarget,$reify) = @_;

#print STDERR "processPredicate($predicate->{tag},$description->{tag},$sTarget,$reify)",((caller)[2]),"\n";

	my $sStatementID = $predicate->{sID};
	my $sBagID       = $predicate->{sBagID};
    	my $sResource    = $predicate->{sResource};

     	# If a predicate has other attributes than rdf:ID, rdf:bagID,
     	# or xmlns... -> generate new triples according to the spec.
     	# (See end of Section 6)

	# this new element may not be needed
        my $d = RDFStore::Parser::SiRPAC::Element->new(undef,$RDFStore::Parser::SiRPAC::RDFMS_Description, undef, $expat->{SiRPAC}->{'xml:lang'}, $expat->{SiRPAC}->{'rdf:datatype'}, $expat->{SiRPAC}->{'rdfstore:context'});
    	if(expandAttributes($expat,$d,$predicate,1,$sResource)) {
      		# error checking
      		if(scalar(@{$predicate->{children}})>0) {
        		die rdfcroak($expat,$predicate->name()." must be an empty element since it uses propAttr grammar production - see <a href=\"http://www.w3.org/TR/REC-rdf-syntax/#propertyElt\">[6.12]</a>");
        		return;
      		};

      		# determine the 'about' part for the new statements
      		if ((defined $sStatementID) && ($sStatementID ne '')) {
        		push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_about];
        		push @{$d->{attlist}},$sStatementID;
      		} elsif ((defined $sResource) && ($sResource ne '')) {
        		push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_about];
        		push @{$d->{attlist}},$sResource;
      		} else {
			my $nodeID = getAttributeValue($expat, $predicate->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID); # XXXXXXX to be checked
			if( defined $nodeID ) {
                        	$sStatementID = 'rdf:nodeID:'.$nodeID;
                        } else {
				$sStatementID = newReificationID($expat);
				};
        		push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_nodeID]; # XXXXXXXX to be checked
        		push @{$d->{attlist}},$sStatementID;
      		};

		if ((defined $sBagID) && ($sBagID ne '')) {
        		push @{$d->{attlist}},[undef,$RDFStore::Parser::SiRPAC::RDFMS_bagID];
        		push @{$d->{attlist}},$sBagID;
        		$d->{sBagID} = $sBagID;
      		};

          	processDescription($expat,$d, 0,0,$expat->{SiRPAC}->{bCreateBags});
    	};
	# Tricky part: if the resource attribute is present for a predicate
	# AND there are no children, the value of the predicate is either
	# 1. the URI in the resource attribute OR
	# 2. the node ID of the resolved #resource attribute
	my $predicate_target = (defined $predicate->{vTargets}->[0]) ? $predicate->{vTargets}->[0] : undef;
    	if( ((defined $sResource) && ($sResource ne '')) && (scalar(@{$predicate->{children}})<=0) ) {
      		if (not(defined $predicate_target)) {
        		if (	($reify) ||
				(       (defined $predicate->{sID}) &&
                                	($predicate->{sID} ne '') ) ) {
          			$sStatementID = reify(	$expat,
							buildResource( $expat,$predicate->namespace,$predicate->localName),
							buildResource( $expat,$sTarget),
							buildResource( $expat,$sResource),
							$predicate->{sID},
							$predicate);

lib/RDFStore/Parser/SiRPAC.pm  view on Meta::CPAN

                                                buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'rest'),
                                                buildResource( $expat,$currentID),
                                                buildResource( $expat,$collectionID),
						$predicate->{'context'} #take the one of the predicate in this case
                                                );
				} else { #process once the predicate if collection
          				addTriple ( 	$expat,
                     				buildResource( $expat,$predicate->namespace,$predicate->localName),
                     				buildResource( $expat,$sTarget),
						buildResource( $expat,$collectionID),
						$predicate->{'context'} #take the one of the predicate in this case
					 	);
                                        };

                                $currentID=$collectionID;
                                addTriple(      $expat,
                                        buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'first'),
                                        buildResource( $expat,$collectionID),
                                        buildResource( $expat,$sStatementID),
					$predicate->{'context'} #take the one of the predicate in this case
                                        );
			} else {
          			addTriple ( 	$expat,
                     			buildResource( $expat,$predicate->namespace,$predicate->localName),
                     			buildResource( $expat,$sTarget),
					buildResource( $expat,$sStatementID),
					$predicate->{'context'}
					 );
                                };
			$j++;

			$object_elements++;
      			};
    		};

	if(	($j>0) && 
		($predicate->{isCollection}) ) {
        	addTriple(      $expat,
                		buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'rest'),
                                buildResource( $expat,$currentID),
               			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'nil'),
				$predicate->{'context'} #take the one of the predicate in this case
                                );
		};

	return $sStatementID;
	};

sub reify {
	my ($expat,$predicate,$subject,$object,$sNodeID,$ele) = @_;

	my $nodeID = getAttributeValue($expat, $ele->{attlist},$RDFStore::Parser::SiRPAC::RDFMS_nodeID); # XXXXXXX to be checked
	if ( defined $nodeID ) {
        	$sNodeID = 'rdf:nodeID:'.$nodeID
    			if(not(((defined $sNodeID) && ($sNodeID ne ''))));
        } else {
		$sNodeID = newReificationID($expat)
    			if(not(((defined $sNodeID) && ($sNodeID ne ''))));
		};

#print STDERR "reify('".$predicate->toString."','".$subject->toString."','".$object->toString."','$sNodeID','$ele')",((caller)[2]),"\n";

     	# The original statement must remain in the data model
    	addTriple($expat,$predicate, $subject, $object, $ele->{'context'});

	# Do not reify reifyd properties
    	if (	($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_subject) ||
    		($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_predicate) ||
    		($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_object) ||
    		($predicate eq $RDFStore::Parser::SiRPAC::RDFMS_type) ) {
      		return;
    	};

	# Reify by creating 4 new triples
    	addTriple(	$expat,
			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'predicate'),
			buildResource( $expat,$sNodeID),	
			$predicate,
			$ele->{'context'} );

    	addTriple(	$expat,
			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'subject'),
			buildResource( $expat,$sNodeID),	
			#bug fix by AR 2001/06/10
			(length($subject->toString()) == 0) ?
				buildResource( $expat,$expat->{'sSource'}.'#' ) :
				$subject,
			$ele->{'context'}
			);

    	addTriple(	$expat,
			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'object'),
			buildResource( $expat,$sNodeID),	
			$object,
			$ele->{'context'});

    	addTriple(	$expat,
			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'type'),
			buildResource( $expat,$sNodeID),	
			buildResource( $expat,$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS,'Statement'),
			$ele->{'context'}
			);

	return $sNodeID;
};

# Take an element <i>ele</i> with its parent element <i>parent</i>
# and evaluate all its attributes to see if they are non-RDF specific
# and non-XML specific in which case they must become children of
# the <i>ele</i> node.
sub expandAttributes {
	my ($expat,$parent,$ele,$predicateNode,$resourceValue) = @_;

#print "expandAttributes(".$parent->name().",".$ele->name().",$predicateNode)",((caller)[2]),"\n";
#use Data::Dumper;
#print Dumper($parent);

	my $foundAbbreviation = 0;
	my $resourceFound = 0;
	
  	my $count=0;
	while ($count<=$#{$ele->{attlist}}) {
    		my $sAttribute = ( (defined $ele->{attlist}->[$count++]->[0]) ? $ele->{attlist}->[$count-1]->[0] : '').( (defined $ele->{attlist}->[$count-1]->[1]) ? $ele->{attlist}->[$count-1]->[1] : '');
    		my $sValue = getAttributeValue($expat, $ele->{attlist},$sAttribute);

		#perhaps should next if not defined $sValue.....

		$count++;
      		if ($sAttribute =~ m|^$RDFStore::Parser::SiRPAC::XMLSCHEMA|) {
        		# expands after parsing, that's why it is useless here... :(
           		# because of concatenation without : inbetween
			# ...there was something more here to do....
        		next;
      			};

      		# exception: expand rdf:value and rdf:type and rdf:li elements - the last two must be resources and not literal values anyway
		# test http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-ns-prefix-confusion/test0006.rdf was failing and others - we fixed
		# processPredicate() method after attributes have been expanded to force resource object nodes for rdf:type on predicate with rdf:resource
      		if (	($sAttribute =~ /^$RDFStore::Parser::SiRPAC::RDF_SYNTAX_NS/) &&
          		(!($ele->{attlist}->[$count-2]->[1]=~ /^_/)) && #this might be buggy by AR 2001/05/28
          		(!($ele->{attlist}->[$count-2]->[1] =~ /^value$/)) &&
          		(!($ele->{attlist}->[$count-2]->[1] =~ /^type$/)) ) {

			# If an attribute (e.g. a property that follows the
			# propAttr production) is not qualified but its enclosing
			# (parent) element is from the RDFMS namespace, then the
			# attribute was prefaced with RDFMS in RDFXML_StartElementHandler().
			# This must be handled here so that the propAttr is added to the Model.
        		if(	($ele->{attlist}->[$count-2]->[1] =~ /resource$/) && 
				($predicateNode) ) {
          			$resourceFound = 1;
          			next;
        			};
 
			next
				if(	($ele->{attlist}->[$count-2]->[1] =~ /ID$/) ||
            				($ele->{attlist}->[$count-2]->[1] =~ /bagID$/) ||
            				($ele->{attlist}->[$count-2]->[1] =~ /about$/) ||
            				($ele->{attlist}->[$count-2]->[1] =~ /aboutEach$/) ||
            				($ele->{attlist}->[$count-2]->[1] =~ /datatype$/) ||
            				($ele->{attlist}->[$count-2]->[1] =~ /parseType$/) );
		};
		
      		# expanding predicate element
      		#if( 	($predicateNode) &&
		#	(!($sAttribute eq $RDFStore::Parser::SiRPAC::RDFMS_resource)) &&
		#       (!($sAttribute eq $RDFStore::Parser::SiRPAC::RDFMS_nodeID)) ) {
		#	die rdfcroak($expat,"Property element ". $ele->name()." has invalid attribute ".$sAttribute.". Only rdf:resource is allowed.");
		#};

      		$foundAbbreviation = 1;

		#xml:lang, rdf:datatypae and rdfstore:context settings are taken from the current $ele being expanded
		my $newElement =  RDFStore::Parser::SiRPAC::Element->new($ele->{attlist}->[$count-2]->[0],$ele->{attlist}->[$count-2]->[1], undef, $ele->{'lang'}, $ele->{'rdf:datatype'}, $ele->{'context'});

		my $newData = RDFStore::Parser::SiRPAC::DataElement->new($sValue, 0, $ele->{'lang'}, $ele->{'rdf:datatype'}, $ele->{'context'});
        	push @{$newElement->{children}},$newData;
        	push @{$parent->{children}},$newElement;
		};

	# If an rdf:resource propAttr was found in this predicate then
	# cache its value in each of the predicate's elements.  The value
	# of the this propAttr will be the subject of all of the propAttr's triples
    	if(	($resourceFound) && (defined $resourceValue) ) {
		my $i=0;
        	foreach $i (0..$#{$parent->{children}}) {
        		$parent->{children}->[$i]->{sResource}= $resourceValue;
        	}; 
	};

#print STDERR "----".$ele->{tag}."\n";
#map {
#if(ref($_)) {
#	print STDERR $_->[0],$_->[1],"=";
#} else {
#	print STDERR $_,"\n";
#};
#} @{$ele->{attlist}};

	return $foundAbbreviation;
};

sub parseLiteral {
	my ($expat) = @_;

#print STDERR "parseLiteral(): ".(caller)[2]."\n";

	foreach(reverse @{$expat->{SiRPAC}->{elementStack}}) {	
		my $sParseType = getAttributeValue(	$expat,
							$_->{attlist},
							$RDFStore::Parser::SiRPAC::RDFMS_parseType );
		return 1
			if(	(defined $sParseType) && 
				($sParseType ne "Resource") &&
				($sParseType ne "Collection") );
		};
    	return 0;       
	};

sub parseResource {
	my ($expat) = @_;

#print STDERR "parseResource($expat)",((caller)[2]),"\n";

	foreach(reverse @{$expat->{SiRPAC}->{elementStack}}) {	
		my $sParseType = getAttributeValue(	$expat,
							$_->{attlist},
							$RDFStore::Parser::SiRPAC::RDFMS_parseType );
		return 1
			if(	(defined $sParseType) &&
				($sParseType eq "Resource") );
		};
    	return 0;       
	};

sub normalizeResourceIdentifier {
	my ($expat,$sURI) = @_;

	return $sURI
		if ( $sURI =~ /^rdf:nodeID:/ ); #do not touch bNodes

#print STDERR "normalizeResourceIdentifier(['",$expat->base,"'],'$sURI')".(caller)[2]."\n";

	my $xml_base = $expat->base; #which is also set if a SourceBase is passed to the parser

	my $URL = URI->new($sURI);
        if(	(defined $URL->scheme) &&
		( $URL->scheme ne 'file') ) {
                # If sURI is an absolute URI, don't bother
                # with it
		return $sURI;
	} elsif(	(defined $sURI) && 
			(	(defined $xml_base) &&
				($xml_base ne '') ) ) {
		$xml_base =~ s/#.*$//;

		# see why at http://www.w3.org/TR/2003/PR-rdf-testcases-20031215/#sec-uri-encoding
		# NOTE: the URI module does correctly escape UTF-8-ish chars using '%' notation but RDF/XML "should" not (see above link)
		my $vURI = ($sURI !~ m/^#/) ? $sURI : '';

		my $absoluteURL;
		if( $xml_base =~ m/^(http|file):/ ) {
			my $path = new URI( $xml_base );
                        $path = $path->path
				if($path);
                        $vURI = $1 . $vURI # keep the file part otherwise the URI relative methods/flags below would drop it
				if(	($vURI ne $sURI) &&
					($path =~ m/([^\/]+\.[^\/]+)$/) );

			local $URI::ABS_REMOTE_LEADING_DOTS = 1;

			$absoluteURL = URI->new( $vURI )->abs( $xml_base ); #let URI module to sort out relative paths and make it absolute to xml_base
		} else {
			$absoluteURL = $xml_base . $vURI;
			};
		if(defined $absoluteURL) {
			return $absoluteURL.( ($sURI !~ m/^#/) ? '' : $sURI );
		} else {
			carp "Cannot combine $xml_base with $sURI";
	    		};
        } else {
		$sURI = '#'.$sURI
			unless($sURI =~ /^#/);
		return $sURI;
		};
	};

package RDFStore::Parser::SiRPAC::Element;
{
	sub new {
		my ($pkg, $namespace, $tag, $attlist, $lang, $datatype, $context) = @_;

		$attlist = []
			unless(defined $attlist);

#print STDERR "RDFStore::Parser::SiRPAC::Element::new( @_ ): ".(caller)[2]."\n";

		my $self =  {
				tag		=>	$tag,
				sNamespace	=>	$namespace,
				attlist		=>	$attlist,
				children	=>	[],
				vTargets	=>	[],
				bDone		=>	0,
				isCollection	=>	0,
				#at this level is just because SiRPAC parsing struct is broken (wrong to propagate XML attribute on elements)
				'lang'		=>      $lang, #xml:lang
				'rdf:datatype'  =>	$datatype, #rdf:datatype
				'context'	=> 	$context #rdfstore:context
			};
		bless $self,$pkg;
	};

	sub name {
		return (defined $_[0]->{sNamespace}) ?
				$_[0]->{sNamespace}.$_[0]->{tag} :
				$_[0]->{tag};
	};

	sub localName {
		return $_[0]->{tag};
	};

	sub namespace {
		return $_[0]->{sNamespace};
	};
};

package RDFStore::Parser::SiRPAC::DataElement;
{
	@RDFStore::Parser::SiRPAC::DataElement::ISA = qw( RDFStore::Parser::SiRPAC::Element );
	sub new {
		my ($pkg, $text, $parsetype, $lang, $datatype, $context) = @_;

#print STDERR "RDFStore::Parser::SiRPAC::DataElement::new( @_ ): ".(caller)[2]."\n";

		my $self = $pkg->SUPER::new(undef,$text,undef,$lang, $datatype, $context);

		delete $self->{sNamespace}; # we do not need it
		delete $self->{attlist}; # we do not need it

		$self->{'parse_type'} = (	$parsetype or 
						$datatype eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral' ) ? 1 : 0; #Literal or Resource
		$self->{tag} = "[DATA: " . $text . "]";
		$self->{sContent} = $text; #instanceOf Data :-)
		bless $self,$pkg;
	};

	sub name { };
	sub localName { };
	sub namespace { };
};

1;
};

__END__

=head1 NAME

RDFStore::Parser::SiRPAC - This module implements a streaming RDF Parser as a direct implementation of XML::Parser::Expat(3)

=head1 SYNOPSIS

	use RDFStore::Parser::SiRPAC;
        use RDFStore::NodeFactory;
        my $p=new RDFStore::Parser::SiRPAC(
		ErrorContext => 2,
                Handlers        => {
                        Init    => sub { print "INIT\n"; },
                        Final   => sub { print "FINAL\n"; },
                        Assert  => sub { print "STATEMENT - @_\n"; }
                },
                NodeFactory     => new RDFStore::NodeFactory() );

	$p->parsefile('http://www.gils.net/bsr-gils.rdfs');
        $p->parsefile('http://www.gils.net/rdf/bsr-gils.rdfs');
        $p->parsefile('/some/where/my.rdf');
        $p->parsefile('file:/some/where/my.rdf');
	$p->parse(*STDIN); #parse stream but with *blocking* Expat (see below example for n-blocking parsing using XML::Parse::ExpatNB)

	use RDFStore::Parser::SiRPAC;
        use RDFStore::NodeFactory;
	my $pstore=new RDFStore::Parser::SiRPAC(
                ErrorContext 	=> 2,
		Style           => 'RDFStore::Parser::Styles::RDFStore::Model',
                NodeFactory     => new RDFStore::NodeFactory(),
                style_options   =>      {
                                        persistent      =>      1,
                                        seevalues       =>      1,
                                        store_options         =>      { Name => '/tmp/test' }
                                }
        );
	my $rdfstore_model = $pstore->parsefile('http://www.gils.net/bsr-gils.rdfs');



( run in 0.482 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )