SWISH-3
view release on metacpan or search on metacpan
libswish3.c view on Meta::CPAN
* SAX2 callback
*/
static void
mywarn(
void *user_data,
xmlChar *msg,
...
)
{
swish_ParserData *parser_data;
va_list args;
char str[1000];
if (!SWISH_PARSER_WARNINGS)
return;
parser_data = (swish_ParserData *)user_data;
SWISH_WARN("libxml2 warning for %s:", parser_data->docinfo->uri);
if (parser_data->ctxt == NULL) {
SWISH_WARN("ctxt is null");
}
va_start(args, msg);
vsnprintf((char *)str, 1000, (char *)msg, args);
/* passing args as last param is ignored but quiets a gcc warning */
xmlParserWarning(parser_data->ctxt, (char *)str, args);
va_end(args);
}
/*
* SAX2 handler struct for html and xml parsing
*/
xmlSAXHandler my_parser = {
NULL, /* internalSubset */
NULL, /* isStandalone */
NULL, /* hasInternalSubset */
NULL, /* hasExternalSubset */
NULL, /* resolveEntity */
NULL, /* getEntity */
NULL, /* entityDecl */
NULL, /* notationDecl */
NULL, /* attributeDecl */
NULL, /* elementDecl */
NULL, /* unparsedEntityDecl */
NULL, /* setDocumentLocator */
mystartDocument, /* startDocument */
myendDocument, /* endDocument */
mystartElement, /* startElement */
myendElement, /* endElement */
NULL, /* reference */
mycharacters, /* characters */
NULL, /* ignorableWhitespace */
NULL, /* processingInstruction */
mycomments, /* comment */
(warningSAXFunc) & mywarn, /* xmlParserWarning */
(errorSAXFunc) & mywarn, /* xmlParserError */
(fatalErrorSAXFunc) & myerr, /* xmlfatalParserError */
NULL, /* getParameterEntity */
NULL, /* cdataBlock */
NULL, /* externalSubset; */
XML_SAX2_MAGIC,
NULL,
mystartElementNs, /* startElementNs */
myendElementNs, /* endElementNs */
NULL /* xmlStructuredErrorFunc */
};
xmlSAXHandlerPtr my_parser_ptr = &my_parser;
static int
docparser(
swish_ParserData *parser_data,
xmlChar *filename,
xmlChar *buffer,
int size
)
{
int ret;
ret = 0;
xmlChar *mime = (xmlChar *)parser_data->docinfo->mime;
xmlChar *parser = (xmlChar *)parser_data->docinfo->parser;
if (!size && !xmlStrlen(buffer) && !parser_data->docinfo->size) {
SWISH_WARN("%s appears to be empty -- can't parse it", parser_data->docinfo->uri);
return 1;
}
if (SWISH_DEBUG & SWISH_DEBUG_PARSER) {
SWISH_DEBUG_MSG("%s -- using %s parser [%c]", parser_data->docinfo->uri, parser, parser[0]);
}
/*
* slurp file if not already in memory
*/
if (filename && !buffer) {
if (parser_data->docinfo->is_gzipped) {
buffer = swish_io_slurp_gzfile_len(
filename,
&(parser_data->docinfo->size),
SWISH_FALSE
);
parser_data->docinfo->size = xmlStrlen(buffer);
}
else {
buffer = swish_io_slurp_file_len(
filename,
(off_t)parser_data->docinfo->size,
SWISH_FALSE
);
}
size = parser_data->docinfo->size;
}
if (parser[0] == 'H' || parser[0] == 'h') {
parser_data->is_html = SWISH_TRUE;
ret = html_parser(my_parser_ptr, parser_data, buffer, size);
}
( run in 0.638 second using v1.01-cache-2.11-cpan-39bf76dae61 )