SWISH-3

 view release on metacpan or  search on metacpan

libswish3.c  view on Meta::CPAN

* SAX2 callback 
*/
static void
mywarn(
    void *user_data,
    xmlChar *msg,
    ...
)
{
    swish_ParserData *parser_data;
    va_list args;
    char str[1000];

    if (!SWISH_PARSER_WARNINGS)
        return;

    parser_data = (swish_ParserData *)user_data;

    SWISH_WARN("libxml2 warning for %s:", parser_data->docinfo->uri);
    if (parser_data->ctxt == NULL) {
        SWISH_WARN("ctxt is null");
    }

    va_start(args, msg);
    vsnprintf((char *)str, 1000, (char *)msg, args);
    /* passing args as last param is ignored but quiets a gcc warning */
    xmlParserWarning(parser_data->ctxt, (char *)str, args);
    va_end(args);
}

/* 
* SAX2 handler struct for html and xml parsing 
*/

xmlSAXHandler my_parser = {
    NULL,                       /* internalSubset */
    NULL,                       /* isStandalone */
    NULL,                       /* hasInternalSubset */
    NULL,                       /* hasExternalSubset */
    NULL,                       /* resolveEntity */
    NULL,                       /* getEntity */
    NULL,                       /* entityDecl */
    NULL,                       /* notationDecl */
    NULL,                       /* attributeDecl */
    NULL,                       /* elementDecl */
    NULL,                       /* unparsedEntityDecl */
    NULL,                       /* setDocumentLocator */
    mystartDocument,            /* startDocument */
    myendDocument,              /* endDocument */
    mystartElement,             /* startElement */
    myendElement,               /* endElement */
    NULL,                       /* reference */
    mycharacters,               /* characters */
    NULL,                       /* ignorableWhitespace */
    NULL,                       /* processingInstruction */
    mycomments,                 /* comment */
    (warningSAXFunc) & mywarn,  /* xmlParserWarning */
    (errorSAXFunc) & mywarn,     /* xmlParserError */
    (fatalErrorSAXFunc) & myerr, /* xmlfatalParserError */
    NULL,                       /* getParameterEntity */
    NULL,                       /* cdataBlock */
    NULL,                       /* externalSubset; */
    XML_SAX2_MAGIC,
    NULL,
    mystartElementNs,           /* startElementNs */
    myendElementNs,             /* endElementNs */
    NULL                        /* xmlStructuredErrorFunc */
};

xmlSAXHandlerPtr my_parser_ptr = &my_parser;

static int
docparser(
    swish_ParserData *parser_data,
    xmlChar *filename,
    xmlChar *buffer,
    int size
)
{

    int ret;
    ret = 0;
    xmlChar *mime = (xmlChar *)parser_data->docinfo->mime;
    xmlChar *parser = (xmlChar *)parser_data->docinfo->parser;

    if (!size && !xmlStrlen(buffer) && !parser_data->docinfo->size) {
        SWISH_WARN("%s appears to be empty -- can't parse it", parser_data->docinfo->uri);

        return 1;
    }

    if (SWISH_DEBUG & SWISH_DEBUG_PARSER) {
        SWISH_DEBUG_MSG("%s -- using %s parser [%c]", parser_data->docinfo->uri, parser, parser[0]);
    }
    
/*
* slurp file if not already in memory 
*/
    if (filename && !buffer) {
        if (parser_data->docinfo->is_gzipped) {
            buffer = swish_io_slurp_gzfile_len(
                filename, 
                &(parser_data->docinfo->size), 
                SWISH_FALSE
            );
            parser_data->docinfo->size = xmlStrlen(buffer);
        }
        else {
            buffer = swish_io_slurp_file_len(
                filename, 
                (off_t)parser_data->docinfo->size,
                SWISH_FALSE
            );
        }
        size = parser_data->docinfo->size;
    }

    if (parser[0] == 'H' || parser[0] == 'h') {
        parser_data->is_html = SWISH_TRUE;
        ret = html_parser(my_parser_ptr, parser_data, buffer, size);
    }



( run in 0.638 second using v1.01-cache-2.11-cpan-39bf76dae61 )