Xacobeo
view release on metacpan or search on metacpan
//
// Sample program that displays an XML file in a GtkTextView.
//
// Copyright (C) 2008 Emmanuel Rodriguez
//
// This program is free software; you can redistribute it and/or modify it under
// the same terms as Perl itself, either Perl version 5.8.8 or, at your option,
// any later version of Perl 5 you may have available.
//
//
#include "code.h"
#include "logger.h"
#include "libxml.h"
#include <string.h>
#define buffer_add(xargs, tag, text) my_buffer_add(xargs, tag, NULL, text)
#define buffer_add_node(xargs, tag, node, text) my_buffer_add(xargs, tag, node, text)
#define buffer_cat(xargs, tag, ...) \
do { \
gchar *content = g_strconcat(__VA_ARGS__, NULL); \
my_buffer_add(xargs, tag, NULL, content); \
g_free(content); \
} while (FALSE)
#define ELEMENT_MATCH(a, b) (a)->type == XML_ELEMENT_NODE \
&& xmlStrEqual((a)->name, (b)->name) \
&& (a)->ns == (b)->ns
// The icon type to use for an element
#define ICON_ELEMENT "gtk-directory"
// The markup styles to be used
typedef struct _MarkupTags {
GtkTextTag *result_count;
GtkTextTag *boolean;
GtkTextTag *number;
GtkTextTag *attribute_name;
GtkTextTag *attribute_value;
GtkTextTag *comment;
GtkTextTag *dtd;
GtkTextTag *element;
GtkTextTag *pi;
GtkTextTag *pi_data;
GtkTextTag *syntax;
GtkTextTag *literal;
GtkTextTag *cdata;
GtkTextTag *cdata_content;
GtkTextTag *namespace_name;
GtkTextTag *namespace_uri;
GtkTextTag *entity_ref;
GtkTextTag *error;
} MarkupTags;
// The context used for displaying the XML. Since a lot of functions need these
// parameters, it's easier to group them in a custom struct and pass that struct
// around.
typedef struct _TextRenderCtx {
// The GTK text buffer on which to perform the rendering
GtkTextBuffer *buffer;
// The markup tags defined in the text buffer
MarkupTags *markup;
// Perl hash with the namespaces to use (key: uri, value: prefix)
HV *namespaces;
// Contents of the XML document (it gets build at runtime)
GString *xml_data;
// Current position on the XML document. It counts the characters (not the
// bytes) accumulated. This counter keeps track of the characters already
// present in the buffer. It's purpose is to provide the position where to
// apply the text tags (syntax highlighting styles).
guint buffer_pos;
// The tags to apply (collected at runtime as the XML document gets built).
GArray *tags;
// Statistics used for debugging purposes
gsize calls;
} TextRenderCtx;
//
// The text styles to apply for the syntax highlighting of the XML.
//
typedef struct _ApplyTag {
GtkTextTag *tag;
gsize start;
gsize end;
gchar *name;
} ApplyTag;
//
// The context used for populating the DOM tree.
//
typedef struct _TreeRenderCtx {
// The GTK tree store to fill
GtkTreeStore *store;
// Perl hash with the namespaces to use (key: uri, value: prefix)
HV *namespaces;
g_free(id_name);
g_free(id_value);
break;
}
}
// Add the current node if it wasn't already added
if (! done) {
gtk_tree_store_insert_with_values(
xargs->store, &iter, parent, pos,
DOM_COL_ICON, ICON_ELEMENT,
DOM_COL_XML_POINTER, sv,
DOM_COL_ELEMENT_NAME, node_name,
-1
);
}
g_free(node_name);
// Do the children
gint i = 0;
for (xmlNode *child = node->children; child; child = child->next) {
if(child->type == XML_ELEMENT_NODE) {
my_populate_tree_store(xargs, child, &iter, i++);
}
}
}
//
// This function displays an XML node into a GtkTextBuffer. The XML nodes are
// displayed with their corresponding namespace prefix. The prefixes to use are
// taken from the given Perl hash.
//
// The XML is rendered with syntax highlighting. The GtkTextBuffer is expected
// to have the styles already predefined. The name of the styles to be used are:
//
// XPath results:
// result_count - Margin counter used to identify each XPath result.
// boolean - Boolean result from an XPath expression.
// number - Numerical result from an XPath expression.
// literal - Literal result (string) from an XPath expression.
//
// XML Elements
// element - An XML element (both opening and closing tag).
// attribute_name - The name of an attribute.
// attribute_value - The value of an attribute.
// namespace_name - The name (prefix) of a namespace declaration.
// namespace_uri - The URI of a namespace declaration.
//
// XML syntax
// comment - An XML comment.
// dtd - A DTD definition.
// pi - The name of a processing instruction.
// pi_data - The data of a processing instruction.
// syntax - Syntax tokens : <, >, &, ;, etc.
// cdata - A CDATA (both opening and closing syntax).
// cdata_content - The content of a CDATA.
// entity_ref - an entity reference.
//
void xacobeo_populate_gtk_text_buffer (GtkTextBuffer *buffer, xmlNode *node, HV *namespaces) {
////
// Parameters validation
if (buffer == NULL) {
WARN("GtkTextBuffer is NULL");
return;
}
TextRenderCtx xargs = {
.buffer = buffer,
.markup = my_get_buffer_tags(buffer),
.namespaces = namespaces,
.xml_data = g_string_sized_new(5 * 1024),
.buffer_pos = 0,
// A 400Kb document can require to apply up to 150 000 styles!
.tags = g_array_sized_new(TRUE, TRUE, sizeof(ApplyTag), 200 * 1000),
.calls = 0,
};
// Compute the current position in the buffer
GtkTextIter iter;
gtk_text_buffer_get_end_iter(buffer, &iter);
xargs.buffer_pos = gtk_text_iter_get_offset(&iter);
DEBUG("Displaying document with syntax highlighting");
GTimeVal start;
g_get_current_time(&start);
// Render the XML document
DEBUG("Computing syntax highlighting");
my_display_document_syntax(&xargs, node);
g_free(xargs.markup);
// Copy the text into the buffer
gsize tags = xargs.tags->len;
DEBUG("Applying syntax highlighting");
my_render_buffer(&xargs);
GTimeVal end;
g_get_current_time(&end);
// Calculate the number of micro seconds spent since the last time
glong elapsed = (end.tv_sec - start.tv_sec) * 1000000; // Seconds
elapsed += end.tv_usec - start.tv_usec; // Microseconds
INFO("Calls = %d, Tags = %d, Time = %ld, Frequency = %05f Time/Calls", xargs.calls, tags, elapsed, (elapsed/(1.0 * xargs.calls)));
}
//
// Adds the contents of the XML document to the buffer and applies the syntax
// highlighting.
//
// This function frees the data members 'xml_data' and 'tags'.
//
gboolean append = TRUE;
if (do_quotes) {
append = FALSE;
switch (*p) {
case '\'':
my_add_text_and_entity(xargs, buffer, markup, "apos");
break;
case '"':
my_add_text_and_entity(xargs, buffer, markup, "quot");
break;
default:
// Append the UTF-8 character as it is to the buffer
append = TRUE;
break;
}
}
// Keep the UTF-8 character unchanged
if (append) {
g_string_append_len(buffer, p, next - p);
}
}
break;
}
p = next;
}
// Write the last bytes in the buffer
buffer_add(xargs, markup, buffer->str);
g_string_free(buffer, TRUE);
}
//
// Helper function for my_XML_TEXT_NODE() it ensures that the temporary buffer
// is merged into the main buffer before an entity is written.
//
static void my_add_text_and_entity (TextRenderCtx *xargs, GString *buffer, GtkTextTag *markup, const gchar *entity) {
buffer_add(xargs, markup, buffer->str);
g_string_truncate(buffer, 0);
my_XML_ENTITY_REF_VALUE(xargs, entity);
}
// Displays a Comment ex: <!-- comment -->
static void my_XML_COMMENT_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_cat(xargs, xargs->markup->comment, "<!--", (gchar *) node->content, "-->");
}
// Displays a CDATA section ex: <![CDATA[<greeting>Hello, world!</greeting>]]>
static void my_XML_CDATA_SECTION_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_add(xargs, xargs->markup->cdata, "<![CDATA[");
buffer_add(xargs, xargs->markup->cdata_content, (gchar *) node->content);
buffer_add(xargs, xargs->markup->cdata, "]]>");
}
// Displays a PI (processing instruction) ex: <?stuff ?>
static void my_XML_PI_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_add(xargs, xargs->markup->syntax, "<?");
buffer_add(xargs, xargs->markup->pi, (gchar *) node->name);
// Add the data if there's something
if (node->content) {
buffer_add(xargs, xargs->markup->syntax, " ");
buffer_add(xargs, xargs->markup->pi_data,(gchar *) node->content);
}
buffer_add(xargs, xargs->markup->syntax, "?>");
}
// Displays an Entity ex: &entity;
static void my_XML_ENTITY_REF_NODE (TextRenderCtx *xargs, xmlNode *node) {
my_XML_ENTITY_REF_VALUE(xargs, (gchar *) node->name);
}
// Performs the actual display of an Entity ex: &my-chunk;
static void my_XML_ENTITY_REF_VALUE (TextRenderCtx *xargs, const gchar *name) {
buffer_add(xargs, xargs->markup->syntax, "&");
buffer_add(xargs, xargs->markup->entity_ref, name);
buffer_add(xargs, xargs->markup->syntax, ";");
}
// Displays a DTD ex: <!DOCTYPE NewsML PUBLIC ...>
static void my_XML_DTD_NODE (TextRenderCtx *xargs, xmlNode *node) {
// TODO the DTD node has children, so it's possible to have more advanced
// syntax highlighting.
gchar *content = my_to_string(node);
buffer_add(xargs, xargs->markup->dtd, content);
g_free(content);
}
//
// Returns the node name with the right prefix based on the namespaces declared
// in the document. If the node has no namespace then the node name is return
// without a prefix (although the string still needs to be freed).
//
// This function returns an object that has to be freed with g_free().
//
static gchar* my_get_node_name_prefixed (xmlNode *node, HV *namespaces) {
gchar *name = (gchar *) node->name;
if (node->ns) {
// Get the prefix corresponding to the namespace
const gchar *prefix = my_get_uri_prefix(node->ns->href, namespaces);
// buffer is really slow. Also applying the style elements is taking a lot of
// time.
//
// So far the best way for insterting the data into the buffer is to collect it
// all into a string and to add the single string with the contents of the
// document into the buffer. Once the buffer is filled the styles can be
// applied.
//
static void my_buffer_add (TextRenderCtx *xargs, GtkTextTag *tag, xmlNode *node, const gchar *text) {
const gchar *content = text ? text : "";
++xargs->calls;
g_string_append(xargs->xml_data, content);
// We don't want the length of the string but the number of characters.
// UTF-8 may encode one character as multiple bytes.
glong end = xargs->buffer_pos + g_utf8_strlen(content, -1);
gchar *name = node ? xacobeo_get_node_mark(node) : NULL;
// Apply the markup if there's a tag
if (tag) {
ApplyTag to_apply = {
.tag = tag,
.start = xargs->buffer_pos,
.end = end,
.name = name,
};
g_array_append_val(xargs->tags, to_apply);
}
xargs->buffer_pos = end;
}
//
// Gets the markup rules to use for rendering the XML with syntax highlighting.
// The markup rules are expected to be already defined in the buffer as tags.
//
// This function returns an object that has to be freed with g_free().
//
static MarkupTags* my_get_buffer_tags (GtkTextBuffer *buffer) {
MarkupTags *markup = g_new0(MarkupTags, 1);
GtkTextTagTable *table = gtk_text_buffer_get_tag_table(buffer);
markup->result_count = gtk_text_tag_table_lookup(table, "result_count");
markup->boolean = gtk_text_tag_table_lookup(table, "boolean");
markup->number = gtk_text_tag_table_lookup(table, "number");
markup->literal = gtk_text_tag_table_lookup(table, "literal");
markup->attribute_name = gtk_text_tag_table_lookup(table, "attribute_name");
markup->attribute_value = gtk_text_tag_table_lookup(table, "attribute_value");
markup->comment = gtk_text_tag_table_lookup(table, "comment");
markup->dtd = gtk_text_tag_table_lookup(table, "dtd");
markup->element = gtk_text_tag_table_lookup(table, "element");
markup->pi = gtk_text_tag_table_lookup(table, "pi");
markup->pi_data = gtk_text_tag_table_lookup(table, "pi_data");
markup->syntax = gtk_text_tag_table_lookup(table, "syntax");
markup->cdata = gtk_text_tag_table_lookup(table, "cdata");
markup->cdata_content = gtk_text_tag_table_lookup(table, "cdata_content");
markup->entity_ref = gtk_text_tag_table_lookup(table, "entity_ref");
markup->namespace_name = gtk_text_tag_table_lookup(table, "namespace_name");
markup->namespace_uri = gtk_text_tag_table_lookup(table, "namespace_uri");
markup->error = gtk_text_tag_table_lookup(table, "error");
return markup;
}
//
// Returns the path of a node. The path is expected to be unique for each node.
//
// This function returns a string that has to be freed with g_free().
//
gchar* xacobeo_get_node_path (xmlNode *origin, HV *namespaces) {
if (origin == NULL) {
return NULL;
}
// Reverse the path to the node (from top to bottom)
GSList *list = NULL;
for (xmlNode *iter = origin; iter; iter = iter->parent) {
list = g_slist_prepend(list, iter);
}
// Build the path to the node
GString *gstring = g_string_sized_new(32);
gboolean use_separator = FALSE;
for (GSList *iter = list; iter; iter = iter->next) {
xmlNode *node = (xmlNode *) iter->data;
switch (node->type) {
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
g_string_append_c(gstring, '/');
break;
case XML_ELEMENT_NODE:
if (use_separator) {
g_string_append_c(gstring, '/');
}
else {
use_separator = TRUE;
}
gchar *name = my_get_node_name_prefixed(node, namespaces);
g_string_append(gstring, name);
g_free(name);
// Check if the node has siblings with the same name and namespace. If
// yes then we must add an offset to the xpath expression.
// Look for previous sibling with the same name.
int similar = 0;
for (xmlNode *sibling = node->prev; sibling; sibling = sibling->prev) {
if (ELEMENT_MATCH(sibling, node)) {
( run in 0.659 second using v1.01-cache-2.11-cpan-39bf76dae61 )