Redland

 view release on metacpan or  search on metacpan

redland/raptor/src/raptor_rdfxml.c  view on Meta::CPAN

  { "List",            0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
  { "XMLLiteral",      0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
  /* rdfs:Resource-s */
  { "nil",             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
  { NULL ,             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }
};


static int
raptor_rdfxml_forbidden_nodeElement_name(const char *name) 
{
  int i;

  if(*name == '_')
    return 0;
  
  for(i=0; rdf_syntax_terms_info[i].name; i++)
    if(!strcmp(rdf_syntax_terms_info[i].name, name))
      return rdf_syntax_terms_info[i].forbidden_as_nodeElement;

  return -1;
}


static int
raptor_rdfxml_forbidden_propertyElement_name(const char *name) 
{
  int i;

  if(*name == '_')
    return 0;
  
  for(i=0; rdf_syntax_terms_info[i].name; i++)
    if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
      return rdf_syntax_terms_info[i].forbidden_as_propertyElement;

  return -1;
}


static int
raptor_rdfxml_forbidden_propertyAttribute_name(const char *name) 
{
  int i;

  if(*name == '_')
    return 0;
  
  for(i=0; rdf_syntax_terms_info[i].name; i++)
    if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
      return rdf_syntax_terms_info[i].forbidden_as_propertyAttribute;

  return -1;
}


typedef enum {
  /* undetermined yet - whitespace is stored */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,

  /* literal content - no elements, cdata allowed, whitespace significant 
   * <propElement> blah </propElement>
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,

  /* parseType literal content (WF XML) - all content preserved
   * <propElement rdf:parseType="Literal"><em>blah</em></propElement> 
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,

  /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
   * any non-whitespace cdata is error
   * only used for <rdf:RDF> or implict <rdf:RDF>
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,

  /* properties - 0+ elements expected, no cdata, whitespace ignored,
   * any non-whitespace cdata is error
   * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,

  /* property content - all content preserved
   * any content type changes when first non-whitespace found
   * <propElement>...
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,

  /* resource URI given - no element, no cdata, whitespace ignored,
   * any non-whitespace cdata is error 
   * <propElement rdf:resource="uri"/>
   * <propElement rdf:resource="uri"></propElement>
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,

  /* skipping content - all content is preserved 
   * Used when skipping content for unknown parseType-s,
   * error recovery, some other reason
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,

  /* parseType Collection - all content preserved
   * Parsing of this determined by RDF/XML (Revised) closed collection rules
   * <propElement rdf:parseType="Collection">...</propElement>
   */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,

  /* Like above but handles "daml:collection" */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,

  /* dummy for use in strings below */
  RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST

} raptor_rdfxml_element_content_type;


static const struct {
  const char * const name;
  const int whitespace_significant;
  /* non-blank cdata */
  const int cdata_allowed;
  /* XML element content */
  const int element_allowed;
  /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
  const int rdf_processing;
} rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
  {"Unknown",         1, 1, 1, 0 },
  {"Literal",         1, 1, 0, 0 },
  {"XML Literal",     1, 1, 1, 0 },
  {"Nodes",           0, 0, 1, 1 },
  {"Properties",      0, 1, 1, 1 },
  {"Property Content",1, 1, 1, 1 },
  {"Resource",        0, 0, 0, 0 },
  {"Preserved",       1, 1, 1, 0 },
  {"Collection",      1, 1, 1, 1 },
  {"DAML Collection", 1, 1, 1, 1 },
};



static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type) 
{
  if(type > RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
    return "INVALID";
  return rdf_content_type_info[type].name;
}





/*
 * Raptor Element/attributes on stack 
 */
struct raptor_rdfxml_element_s {
  raptor_xml_element *xml_element;

  /* NULL at bottom of stack */
  struct raptor_rdfxml_element_s *parent;

  /* attributes declared in M&S */
  const unsigned char * rdf_attr[RDF_ATTR_LAST+1];
  /* how many of above seen */
  int rdf_attr_count;

  /* state that this production matches */
  raptor_state state;

  /* how to handle the content inside this XML element */
  raptor_rdfxml_element_content_type content_type;


  /* starting state for children of this element */
  raptor_state child_state;

  /* starting content type for children of this element */
  raptor_rdfxml_element_content_type child_content_type;


  /* STATIC Reified statement identifier */
  raptor_identifier reified;

  /* STATIC Bag identifier */
  raptor_identifier bag;
  int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */

  /* STATIC Subject identifier (URI/anon ID), type, source
   *
   * When the XML element represents a node, this is the identifier 
   */
  raptor_identifier subject;
  
  /* STATIC Predicate URI, source is either
   * RAPTOR_URI_SOURCE_ELEMENT or RAPTOR_URI_SOURCE_ATTRIBUTE
   *
   * When the XML element represents a node or predicate,
   * this is the identifier of the predicate 
   */
  raptor_identifier predicate;

  /* STATIC Object identifier (URI/anon ID), type, source
   *
   * When this XML element generates a statement that needs an object,
   * possibly from a child element, this is the identifier of the object 
   */
  raptor_identifier object;

  /* URI of datatype of literal */
  raptor_uri *object_literal_datatype;

  /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
  int last_ordinal;

  /* If this element's parseType is a Collection 
   * this identifies the anon node of current tail of the collection(list). 
   */
  const unsigned char *tail_id;

  /* RDF/XML specific checks */

  /* all cdata so far is whitespace */
  unsigned int content_cdata_all_whitespace;
};

typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;


#define RAPTOR_RDFXML_N_CONCEPTS 22

/*
 * Raptor parser object
 */
struct raptor_rdfxml_parser_s {
  raptor_sax2 *sax2;
  
  /* stack of elements - elements add after current_element */
  raptor_rdfxml_element *root_element;
  raptor_rdfxml_element *current_element;

  raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];

  /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
  raptor_id_set* id_set;

  void *xml_content;
  size_t xml_content_length;
  raptor_iostream* iostream;

  /* writer for building parseType="Literal" content */
  raptor_xml_writer* xml_writer;
};




/* static variables */

#define RAPTOR_RDF_type_URI(rdf_xml_parser)      rdf_xml_parser->concepts[0]
#define RAPTOR_RDF_value_URI(rdf_xml_parser)     rdf_xml_parser->concepts[1]
#define RAPTOR_RDF_subject_URI(rdf_xml_parser)   rdf_xml_parser->concepts[2]
#define RAPTOR_RDF_predicate_URI(rdf_xml_parser) rdf_xml_parser->concepts[3]
#define RAPTOR_RDF_object_URI(rdf_xml_parser)    rdf_xml_parser->concepts[4]
#define RAPTOR_RDF_Statement_URI(rdf_xml_parser) rdf_xml_parser->concepts[5]

#define RAPTOR_RDF_Seq_URI(rdf_xml_parser) rdf_xml_parser->concepts[6]
#define RAPTOR_RDF_Bag_URI(rdf_xml_parser) rdf_xml_parser->concepts[7]
#define RAPTOR_RDF_Alt_URI(rdf_xml_parser) rdf_xml_parser->concepts[8]

#define RAPTOR_RDF_List_URI(rdf_xml_parser)  rdf_xml_parser->concepts[9]
#define RAPTOR_RDF_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[10]
#define RAPTOR_RDF_rest_URI(rdf_xml_parser)  rdf_xml_parser->concepts[11]
#define RAPTOR_RDF_nil_URI(rdf_xml_parser)   rdf_xml_parser->concepts[12]

#define RAPTOR_DAML_NS_URI(rdf_xml_parser)   rdf_xml_parser->concepts[13]

#define RAPTOR_DAML_List_URI(rdf_xml_parser)  rdf_xml_parser->concepts[14]
#define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[15]
#define RAPTOR_DAML_rest_URI(rdf_xml_parser)  rdf_xml_parser->concepts[16]
#define RAPTOR_DAML_nil_URI(rdf_xml_parser)   rdf_xml_parser->concepts[17]

#define RAPTOR_RDF_RDF_URI(rdf_xml_parser)         rdf_xml_parser->concepts[18]
#define RAPTOR_RDF_Description_URI(rdf_xml_parser) rdf_xml_parser->concepts[19]
#define RAPTOR_RDF_li_URI(rdf_xml_parser)          rdf_xml_parser->concepts[20]

#define RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)  rdf_xml_parser->concepts[21]

/* RAPTOR_RDFXML_N_CONCEPTS defines size of array */


/* prototypes for element functions */
static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);

static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);

/* prototypes for grammar functions */
static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);


/* prototype for statement related functions */
static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const ...



/* Prototypes for parsing data functions */
static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);

static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);


static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser) 
{
  raptor_rdfxml_element *element=rdf_xml_parser->current_element;

  if(!element)
    return NULL;

  rdf_xml_parser->current_element=element->parent;
  if(rdf_xml_parser->root_element == element) /* just deleted root */
    rdf_xml_parser->root_element=NULL;

  return element;
}


static void
raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element) 
{
  element->parent=rdf_xml_parser->current_element;
  rdf_xml_parser->current_element=element;
  if(!rdf_xml_parser->root_element)
    rdf_xml_parser->root_element=element;
}


static void
raptor_free_rdfxml_element(raptor_rdfxml_element *element)
{
  int i;
  
  /* Free special RDF M&S attributes */
  for(i=0; i<= RDF_ATTR_LAST; i++) 
    if(element->rdf_attr[i])
      RAPTOR_FREE(cstring, (void*)element->rdf_attr[i]);

  raptor_free_identifier(&element->subject);
  raptor_free_identifier(&element->predicate);
  raptor_free_identifier(&element->object);
  raptor_free_identifier(&element->bag);
  raptor_free_identifier(&element->reified);

  if(element->tail_id)

redland/raptor/src/raptor_rdfxml.c  view on Meta::CPAN

                raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name);
              /* Delete it if it was stored elsewhere */
              /* make sure value isn't deleted from qname structure */
              attr->value=NULL;
              raptor_free_qname(attr);
              attr=NULL;
              break;
            }
        } /* end if non-namespace prefixed RDF attributes */

        if(!attr)
          continue;

      } /* end if leave literal XML alone */

      if(attr)
        new_named_attrs[offset++]=attr;
    }

    /* new attribute count is set from attributes that haven't been skipped */
    ns_attributes_count=offset;
    if(!ns_attributes_count) {
      /* all attributes were deleted so delete the new array */
      RAPTOR_FREE(raptor_qname_array, new_named_attrs);
      new_named_attrs=NULL;
    }

    RAPTOR_FREE(raptor_qname_array, named_attrs);
    named_attrs=new_named_attrs;
    raptor_xml_element_set_attributes(xml_element, 
                                      named_attrs, ns_attributes_count);
  } /* end if ns_attributes_count */


  /* start from unknown; if we have a parent, it may set this */
  element->state=RAPTOR_STATE_UNKNOWN;
  element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;

  if(element->parent && 
     element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
    element->content_type=element->parent->child_content_type;
      
    if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
       element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
       element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
      /* If parent has an rdf:resource, this element should not be here */
      raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.", 
                            raptor_xml_element_get_name(element->parent->xml_element)->local_name);
      element->state=RAPTOR_STATE_SKIPPING;
      element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;

    } else {
      if(!element->parent->child_state)
        raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_handler: no parent element child_state set");      

      element->state=element->parent->child_state;
      element->parent->xml_element->content_element_seen++;
      count_bumped++;
    
      /* leave literal XML alone */
      if (!rdf_content_type_info[element->content_type].cdata_allowed) {
        if(element->parent->xml_element->content_element_seen &&
           element->parent->xml_element->content_cdata_seen) {
          /* Uh oh - mixed content, the parent element has cdata too */
          raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", 
                                raptor_xml_element_get_name(element->parent->xml_element)->local_name);
        }
        
        /* If there is some existing all-whitespace content cdata
         * before this node element, delete it
         */
        if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
           element->parent->xml_element->content_element_seen &&
           element->parent->content_cdata_all_whitespace &&
           element->parent->xml_element->content_cdata_length) {
          
          element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
          
          raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
          element->parent->xml_element->content_cdata_sb=NULL;
          element->parent->xml_element->content_cdata_length=0;
        }
        
      } /* end if leave literal XML alone */
      
    } /* end if parent has no rdf:resource */

  } /* end if element->parent */


#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name);

  fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
  raptor_print_xml_element(xml_element, stderr);
#endif

  
  /* Check for non namespaced stuff when not in a parseType literal, other */
  if (rdf_content_type_info[element->content_type].rdf_processing) {

    /* The element */
    /* If has no namespace or the namespace has no name (xmlns="") */
    if(!raptor_xml_element_get_name(xml_element)->nspace ||
       (raptor_xml_element_get_name(xml_element)->nspace &&
        !raptor_namespace_get_uri(raptor_xml_element_get_name(xml_element)->nspace))) {
      raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.", 
                          raptor_xml_element_get_name(element->parent->xml_element)->local_name);
      element->state=RAPTOR_STATE_SKIPPING;
      /* Remove count above so that parent thinks this is empty */
      if(count_bumped)
        element->parent->xml_element->content_element_seen--;
      element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
    }


    /* Check for any remaining non-namespaced attributes */
    if (named_attrs) {
      for(i=0; i < ns_attributes_count; i++) {
        raptor_qname *attr=named_attrs[i];
        /* Check if any attributes are non-namespaced */
        if(!attr->nspace ||
           (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
          raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name);
          raptor_free_qname(attr);
          named_attrs[i]=NULL;
        }
      }
    }
  }
  

  if (element->rdf_attr[RDF_ATTR_aboutEach] || 
      element->rdf_attr[RDF_ATTR_aboutEachPrefix]) {
    raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.", 
                          raptor_xml_element_get_name(xml_element)->local_name);
    element->state=RAPTOR_STATE_SKIPPING;
    /* Remove count above so that parent thinks this is empty */
    if(count_bumped)
      element->parent->xml_element->content_element_seen--;
    element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
  }
  
  /* Right, now ready to enter the grammar */
  raptor_rdfxml_start_element_grammar(rdf_parser, element);

  return;
}


static void
raptor_rdfxml_end_element_handler(void *user_data, 
                                  raptor_xml_element* xml_element)
{
  raptor_parser* rdf_parser;
  raptor_rdfxml_parser* rdf_xml_parser;
  raptor_rdfxml_element* element;

  rdf_parser=(raptor_parser*)user_data;
  rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;

  if(!rdf_parser->failed) {
    raptor_rdfxml_update_document_locator(rdf_parser);

    raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element);
  }
  
  element=raptor_rdfxml_element_pop(rdf_xml_parser);
  if(element) {
    if(element->parent) {
      /* Do not change this; PROPERTYELT will turn into MEMBER if necessary
       * See the switch case for MEMBER / PROPERTYELT where the test is done.
       *
       * PARSETYPE_RESOURCE should never be propogated up since it
       * will turn the next child (node) element into a property
       */
      if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
         element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
        element->parent->child_state=element->state;
    }
  
    raptor_free_rdfxml_element(element);
  }
}


/* cdata (and ignorable whitespace for libxml). 
 * s is not 0 terminated for expat, is for libxml - grrrr.
 */
static void
raptor_rdfxml_characters_handler(void *user_data, 
                                 raptor_xml_element* xml_element,
                                 const unsigned char *s, int len)
{
  raptor_parser* rdf_parser=(raptor_parser*)user_data;

  raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
}


/* cdata (and ignorable whitespace for libxml). 
 * s is not 0 terminated for expat, is for libxml - grrrr.
 */
static void
raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
                            const unsigned char *s, int len)
{
  raptor_parser* rdf_parser=(raptor_parser*)user_data;

  raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
}

/* This is called for a declaration of an unparsed (NDATA) entity */
static void
raptor_rdfxml_unparsed_entity_decl_handler(void *user_data,
                                           const unsigned char* entityName,
                                           const unsigned char* base,
                                           const unsigned char* systemId,
                                           const unsigned char* publicId,
                                           const unsigned char* notationName) 
{
/*  raptor_parser* rdf_parser=(raptor_parser*)user_data; */
  fprintf(stderr,
          "raptor_rdfxml_unparsed_entity_decl_handler: entityName %s base %s systemId %s publicId %s notationName %s\n",
          entityName, (base ? (const char*)base : "(None)"), 
          systemId, (publicId ?  (const char*)publicId: "(None)"),
          (const char*)notationName);
}


static int 
raptor_rdfxml_external_entity_ref_handler(void *user_data,
                                          const unsigned char* context,
                                          const unsigned char* base,
                                          const unsigned char* systemId,
                                          const unsigned char* publicId)
{
/*  raptor_rdfxml_parser* rdf_parser=(raptor_rdfxml_parser*)user_data; */
  fprintf(stderr,
          "raptor_rdfxml_external_entity_ref_handler: base %s systemId %s publicId %s\n",
          (base ?  (const char*)base : "(None)"), 
          systemId, (publicId ?  (const char*)publicId: "(None)"));

  /* "The handler should return 0 if processing should not continue
   * because of a fatal error in the handling of the external entity."
   */
  return 1;
}


/* comment handler
 * s is 0 terminated
 */
static void
raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
                              const unsigned char *s)
{
  raptor_parser* rdf_parser=(raptor_parser*)user_data;
  raptor_rdfxml_parser* rdf_xml_parser;
  raptor_rdfxml_element* element;

  if(rdf_parser->failed || !xml_element)
    return;

  rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
  element=rdf_xml_parser->current_element;

  if(element) {
    if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
      raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
  }
  

#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG2("XML Comment '%s'\n", s);
#endif
}



static int
raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
{
  raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
  raptor_sax2* sax2;
  
  sax2=raptor_new_sax2(rdf_parser, 
                       rdf_parser, raptor_parser_error_message_handler,
                       rdf_parser, raptor_parser_fatal_error_message_handler,
                       rdf_parser, raptor_parser_warning_message_handler);
  rdf_xml_parser->sax2=sax2;

  raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
  raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
  raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
  raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
  raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
  raptor_sax2_set_unparsed_entity_decl_handler(sax2, raptor_rdfxml_unparsed_entity_decl_handler);
  raptor_sax2_set_external_entity_ref_handler(sax2, raptor_rdfxml_external_entity_ref_handler);
  raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
  raptor_sax2_set_locator(sax2, &rdf_parser->locator);
  
  RAPTOR_RDF_type_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("type");
  RAPTOR_RDF_value_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("value");
  RAPTOR_RDF_subject_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("subject");
  RAPTOR_RDF_predicate_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("predicate");
  RAPTOR_RDF_object_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("object");
  RAPTOR_RDF_Statement_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Statement");

  RAPTOR_RDF_Seq_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Seq");
  RAPTOR_RDF_Bag_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Bag");
  RAPTOR_RDF_Alt_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Alt");

  RAPTOR_RDF_List_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("List");
  RAPTOR_RDF_first_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("first");
  RAPTOR_RDF_rest_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("rest");
  RAPTOR_RDF_nil_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("nil");

  RAPTOR_DAML_NS_URI(rdf_xml_parser)=raptor_new_uri((const unsigned char*)"http://www.daml.org/2001/03/daml+oil#");

  RAPTOR_DAML_List_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
  RAPTOR_DAML_first_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
  RAPTOR_DAML_rest_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
  RAPTOR_DAML_nil_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");

  RAPTOR_RDF_RDF_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("RDF");
  RAPTOR_RDF_Description_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Description");
  RAPTOR_RDF_li_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("li");

  RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)=raptor_new_uri(raptor_xml_literal_datatype_uri_string);

  rdf_xml_parser->id_set=raptor_new_id_set();

  return 0;
}


static int
raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
{
  raptor_uri *uri=rdf_parser->base_uri;
  raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;

  /* base URI required for RDF/XML */
  if(!uri)
    return 1;

  /* Optionally normalize language to lowercase
   * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
   */
  raptor_sax2_set_feature(rdf_xml_parser->sax2,
                          RAPTOR_FEATURE_NORMALIZE_LANGUAGE, 
                          rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]);

  /* Optionally forbid network requests in the XML parser */
  raptor_sax2_set_feature(rdf_xml_parser->sax2, 

redland/raptor/src/raptor_rdfxml.c  view on Meta::CPAN

                                      element->subject.type,
                                      element->subject.uri_source,
                                      NULL,

                                      &element->reified,
                                      element->parent);
          } else {
            raptor_rdfxml_generate_statement(rdf_parser, 
                                      element->parent->subject.uri,
                                      element->parent->subject.id,
                                      element->parent->subject.type,
                                      element->parent->subject.uri_source,
                                      
                                      raptor_xml_element_get_name(xml_element)->uri,
                                      NULL,
                                      RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                      RAPTOR_URI_SOURCE_ELEMENT,
                                      0,
                                      
                                      element->subject.uri,
                                      element->subject.id,
                                      element->subject.type,
                                      element->subject.uri_source,
                                      NULL,

                                      &element->reified,
                                      element->parent);
          }
        }
        finished=1;
        break;

      case RAPTOR_STATE_PARSETYPE_COLLECTION:

        finished=1;
        break;

      case RAPTOR_STATE_PARSETYPE_OTHER:
        /* FALLTHROUGH */

      case RAPTOR_STATE_PARSETYPE_LITERAL:
        element->parent->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;

        raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);

        finished=1;
        break;


      case RAPTOR_STATE_PROPERTYELT:
      case RAPTOR_STATE_MEMBER_PROPERTYELT:
        /* A property element
         *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
         *
         * Literal content part is handled here.
         * The element content is handled in the internal states
         * Empty content is checked here.
         */

        if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
          if(xml_element->content_cdata_seen) 
            element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
          else if (xml_element->content_element_seen) 
            element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
          else { /* Empty Literal */
            element->object.type= RAPTOR_IDENTIFIER_TYPE_LITERAL;
            element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
          }
          
        }


        /* Handle terminating a rdf:parseType="Collection" list */
        if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
           element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
          raptor_uri* nil_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_nil_URI(rdf_xml_parser) : RAPTOR_RDF_nil_URI(rdf_xml_parser);
          if (!element->tail_id) {
            /* If No List: set object of statement to rdf:nil */
            element->object.uri= raptor_uri_copy(nil_uri);
            element->object.id= NULL;
            element->object.type= RAPTOR_IDENTIFIER_TYPE_RESOURCE;
            element->object.uri_source= RAPTOR_URI_SOURCE_URI;
          } else {
            raptor_uri* rest_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
            /* terminate the list */
            raptor_rdfxml_generate_statement(rdf_parser, 
                                      NULL,
                                      element->tail_id,
                                      RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
                                      RAPTOR_URI_SOURCE_ID,
                                      
                                      rest_uri,
                                      NULL,
                                      RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                      RAPTOR_URI_SOURCE_URI,
                                      0,
                                      
                                      nil_uri,
                                      NULL,
                                      RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                      RAPTOR_URI_SOURCE_URI,
                                      NULL,

                                      NULL,
                                      NULL);
          }

        } /* end rdf:parseType="Collection" termination */
        

#ifdef RAPTOR_DEBUG_VERBOSE
        RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif

        switch(element->content_type) {
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:

            if(raptor_rdfxml_element_has_property_attributes(element) &&
               element->child_state == RAPTOR_STATE_DESCRIPTION) {
              raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name);
              state=RAPTOR_STATE_SKIPPING;
              element->child_state=RAPTOR_STATE_SKIPPING;
              finished=1;
              break;
            }

            if(element->object.type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
              if(element->rdf_attr[RDF_ATTR_resource]) {
                element->object.uri=raptor_new_uri_relative_to_base(raptor_rdfxml_inscope_base_uri(rdf_parser),
                                                    (const unsigned char*)element->rdf_attr[RDF_ATTR_resource]);
                RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_resource]);
                element->rdf_attr[RDF_ATTR_resource]=NULL;
                element->object.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
                element->object.uri_source=RAPTOR_URI_SOURCE_URI;
                element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
              } else if(element->rdf_attr[RDF_ATTR_nodeID]) {
                element->object.id=raptor_generate_id(rdf_parser, 0, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
                element->rdf_attr[RDF_ATTR_nodeID]=NULL;
                element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
                element->object.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
                element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
                if(!raptor_valid_xml_ID(rdf_parser, element->object.id)) {
                  raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->object.id);
                  state=RAPTOR_STATE_SKIPPING;
                  element->child_state=RAPTOR_STATE_SKIPPING;
                  finished=1;
                  break;
                }
              } else {
                element->object.id=raptor_generate_id(rdf_parser, 0, NULL);
                element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
                element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
                element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
              }

              raptor_rdfxml_process_property_attributes(rdf_parser, element, 
                                                        element->parent, 
                                                        &element->object);

            }

            /* We know object is a resource, so delete any unsignficant
             * whitespace so that FALLTHROUGH code below finds the object.
             */
            if(xml_element->content_cdata_length) {
              raptor_free_stringbuffer(xml_element->content_cdata_sb);
              xml_element->content_cdata_sb=NULL;
              xml_element->content_cdata_length=0;
            }

            /* FALLTHROUGH */
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:

            if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {

              if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
                /* Only an empty literal can have a rdf:bagID */
                if(element->bag.uri || element->bag.id) {
                  if(xml_element->content_cdata_length > 0) {
                    raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name);
                    /* prevent this being used later either */
                    element->rdf_attr[RDF_ATTR_bagID]=NULL;
                  } else
                    raptor_rdfxml_generate_statement(rdf_parser, 
                                              element->bag.uri,
                                              element->bag.id,
                                              element->bag.type,
                                              element->bag.uri_source,
                                              
                                              RAPTOR_RDF_type_URI(rdf_xml_parser),
                                              NULL,
                                              RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                              RAPTOR_URI_SOURCE_URI,
                                              0,
                                              
                                              RAPTOR_RDF_Bag_URI(rdf_xml_parser),
                                              NULL,
                                              RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                              RAPTOR_URI_SOURCE_NOT_URI,
                                              NULL,
                                              
                                              NULL,
                                              NULL);
                }
              } /* if rdf:bagID */

              /* If there is empty literal content with properties
               * generate a node to hang properties off 
               */
              if(raptor_rdfxml_element_has_property_attributes(element) &&
                 xml_element->content_cdata_length > 0) {
                raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name);
                state=RAPTOR_STATE_SKIPPING;
                element->child_state=RAPTOR_STATE_SKIPPING;
                finished=1;
                break;
              }

              if(element->object.type == RAPTOR_IDENTIFIER_TYPE_LITERAL &&
                 raptor_rdfxml_element_has_property_attributes(element) &&
                 !element->object.uri) {
                element->object.id=raptor_generate_id(rdf_parser, 0, NULL);
                element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
                element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
                element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
              }
              
              raptor_rdfxml_process_property_attributes(rdf_parser, element, 
                                                        element,
                                                        &element->object);
            }
            

            /* just be friendly to older compilers and don't declare
             * variables in the middle of a block
             */
            if(1) {
              raptor_uri *predicate_uri=NULL;
              raptor_identifier_type predicate_type;
              int predicate_ordinal=0;
              raptor_uri *object_uri;
              raptor_identifier_type object_type;
              raptor_uri *literal_datatype=NULL;
              const unsigned char* empty_literal=(const unsigned char*)"";

              if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
                element->parent->last_ordinal++;
                predicate_ordinal=element->parent->last_ordinal;
                predicate_type=RAPTOR_IDENTIFIER_TYPE_ORDINAL;

              } else {
                predicate_uri=raptor_xml_element_get_name(xml_element)->uri;
                predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
              }


              if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
                unsigned char* literal;

                object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
                literal=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
                literal_datatype=element->object_literal_datatype;

                if(!literal_datatype && literal &&
                   !raptor_utf8_is_nfc(literal, xml_element->content_cdata_length)) {
                  const char *message="Property element '%s' has a string not in Unicode Normal Form C: %s";
                  raptor_rdfxml_update_document_locator(rdf_parser);
                  if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
                    raptor_parser_error(rdf_parser, message, el_name, literal);
                  else
                    raptor_parser_warning(rdf_parser, message, el_name, literal);
                }

                if(!literal)
                  /* empty literal */
                  literal=(unsigned char*)empty_literal;

                object_uri=(raptor_uri*)literal;
              } else { 
                object_type=element->object.type;
                object_uri=element->object.uri;
              }

              raptor_rdfxml_generate_statement(rdf_parser, 
                                        element->parent->subject.uri,
                                        element->parent->subject.id,
                                        element->parent->subject.type,
                                        RAPTOR_URI_SOURCE_ELEMENT,

                                        predicate_uri,
                                        NULL,
                                        predicate_type,
                                        RAPTOR_URI_SOURCE_NOT_URI,
                                        predicate_ordinal,

                                        object_uri,
                                        element->object.id,
                                        object_type,
                                        element->object.uri_source,
                                        literal_datatype,

                                        &element->reified,
                                        element->parent);
              
            }
            
            break;

        case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
        case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
            {
              unsigned char *buffer;
              unsigned int length;
              
              if(rdf_xml_parser->xml_writer) {
                raptor_free_iostream(rdf_xml_parser->iostream);
                rdf_xml_parser->iostream=NULL;
                
                buffer=(unsigned char*)rdf_xml_parser->xml_content;
                length=rdf_xml_parser->xml_content_length;
              } else {
                buffer=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
                length=xml_element->content_cdata_length;
              }

              if(!raptor_utf8_is_nfc(buffer, length)) {
                const char *message="Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
                raptor_rdfxml_update_document_locator(rdf_parser);
                if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
                  raptor_parser_error(rdf_parser, message, el_name, buffer);
                else
                  raptor_parser_warning(rdf_parser, message, el_name, buffer);
              }
              

              if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
                element->parent->last_ordinal++;
                raptor_rdfxml_generate_statement(rdf_parser, 
                                          element->parent->subject.uri,
                                          element->parent->subject.id,
                                          element->parent->subject.type,
                                          element->parent->subject.uri_source,
                                          
                                          NULL,
                                          NULL,
                                          RAPTOR_IDENTIFIER_TYPE_ORDINAL,
                                          RAPTOR_URI_SOURCE_NOT_URI,
                                          element->parent->last_ordinal,
                                          
                                          (raptor_uri*)buffer,
                                          NULL,
                                          RAPTOR_IDENTIFIER_TYPE_LITERAL,
                                          RAPTOR_URI_SOURCE_NOT_URI,
                                          RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
                                          
                                          &element->reified,
                                          element->parent);
              } else {
                raptor_rdfxml_generate_statement(rdf_parser, 
                                          element->parent->subject.uri,
                                          element->parent->subject.id,
                                          element->parent->subject.type,
                                          element->parent->subject.uri_source,
                                          
                                          raptor_xml_element_get_name(xml_element)->uri,
                                          NULL,
                                          RAPTOR_IDENTIFIER_TYPE_RESOURCE,
                                          RAPTOR_URI_SOURCE_ELEMENT,
                                          0,
                                          
                                          (raptor_uri*)buffer,
                                          NULL,
                                          RAPTOR_IDENTIFIER_TYPE_LITERAL,
                                          RAPTOR_URI_SOURCE_NOT_URI,
                                          RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
                                          
                                          &element->reified,
                                          element->parent);
              }
              
              /* Finish the xml writer iostream for parseType="Literal" */
              if(rdf_xml_parser->xml_writer) {
                raptor_free_xml_writer(rdf_xml_parser->xml_writer);
                RAPTOR_FREE(cstring, rdf_xml_parser->xml_content);
                rdf_xml_parser->xml_content=NULL;
                rdf_xml_parser->xml_content_length=0;
              }
            }
            
          break;

          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
            abort();
            
            break;

          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
            
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
          case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
          default:
            raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: state RAPTOR_STATE_PROPERTYELT - unexpected content type %s (%d)", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
        } /* end switch */

      finished=1;
      break;

      case RAPTOR_STATE_INVALID:
      default:
        raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
        finished=1;

    } /* end switch */

    if(state != element->state) {
      element->state=state;
#ifdef RAPTOR_DEBUG_VERBOSE
      RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
#endif
    }

  } /* end while */

#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
#endif

}



static void
raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
                            const unsigned char *s, int len,
                            int is_cdata)
{
  raptor_rdfxml_parser* rdf_xml_parser;
  raptor_rdfxml_element* element;
  raptor_xml_element* xml_element;
  raptor_state state;
  int all_whitespace=1;
  int i;

  rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;

  if(rdf_parser->failed)
    return;

#ifdef RAPTOR_DEBUG_CDATA
  RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
  (void)fwrite(s, 1, len, stderr);
  fprintf(stderr, "' (%d bytes)\n", len);
#endif

  for(i=0; i<len; i++)
    if(!isspace(s[i])) {
      all_whitespace=0;
      break;
    }

  element=rdf_xml_parser->current_element;
  xml_element=element->xml_element;

  /* this file is very broke - probably not XML, whatever */
  if(!element)
    return;
  
  raptor_rdfxml_update_document_locator(rdf_parser);

  /* cdata never changes the parser state 
   * and the containing element state always determines what to do.
   * Use the child_state first if there is one, since that applies
   */
  state=element->child_state;
#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
#endif


#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif
  


  if(state == RAPTOR_STATE_SKIPPING)
    return;

  if(state == RAPTOR_STATE_UNKNOWN) {
    /* Ignore all cdata if still looking for RDF */
    if(rdf_parser->features[RAPTOR_FEATURE_SCANNING])
      return;

    /* Ignore all whitespace cdata before first element */
    if(all_whitespace)
      return;
    
    /* This probably will never happen since that would make the
     * XML not be well-formed
     */
    raptor_parser_warning(rdf_parser, "Character data before RDF element.");
  }


  if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
    /* If found non-whitespace content, move to literal content */
    if(!all_whitespace)
      element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; 
  }


  if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {

    /* Whitespace is ignored except for literal or preserved content types */
    if(all_whitespace) {
#ifdef RAPTOR_DEBUG_CDATA
      RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name);
#endif
      return;
    }

    if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
      /* Uh oh - mixed content, this element has elements too */
      raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", 
                            raptor_xml_element_get_name(element->parent->xml_element)->local_name);
    }
  }


  if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
    element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
#ifdef RAPTOR_DEBUG_VERBOSE
    RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif
  }

  if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
    raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
  else {
    raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
                                              s, len, 1);
    element->content_cdata_all_whitespace &= all_whitespace;
    
    /* adjust stored length */
    xml_element->content_cdata_length += len;
  }


#ifdef RAPTOR_DEBUG_CDATA
  RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length);
#endif
#ifdef RAPTOR_DEBUG_VERBOSE
  RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
}



/**
 * raptor_rdfxml_inscope_base_uri:
 * @rdf_parser: Raptor parser object
 *
 * Return the in-scope base URI.
 * 
 * Looks for the innermost xml:base on an element or document URI
 * 
 * Return value: The URI string value or NULL on failure.
 **/
static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
{
  raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
  raptor_uri* base_uri;

  base_uri=raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
  if(!base_uri)
    base_uri=rdf_parser->base_uri;

  return base_uri;
}


/**
 * raptor_rdfxml_record_ID:
 * @rdf_parser: Raptor parser object
 * @element: Current element
 * @id: ID string
 *
 * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
 * 
 * Record and check the ID values, if they have been seen already.
 * per in-scope-base URI.
 * 
 * Return value: non-zero if already seen, or failure
 **/
static int
raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
                        raptor_rdfxml_element *element,
                        const unsigned char *id)
{
  raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
  raptor_uri* base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
  size_t id_len=strlen((const char*)id);
  int rc;
  
  if(!rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID])
    return 0;

  rc=raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);



( run in 0.493 second using v1.01-cache-2.11-cpan-5a3173703d6 )