HTML-Gumbo

 view release on metacpan or  search on metacpan

lib/HTML/Gumbo.xs  view on Meta::CPAN

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "gumbo.h"

#define PHG_IS_VOID_ELEMENT(tag) \
    (  tag == GUMBO_TAG_AREA \
    || tag == GUMBO_TAG_BASE \
    || tag == GUMBO_TAG_BR \
    || tag == GUMBO_TAG_COL \
    || tag == GUMBO_TAG_EMBED \
    || tag == GUMBO_TAG_HR \
    || tag == GUMBO_TAG_IMG \
    || tag == GUMBO_TAG_INPUT \
    || tag == GUMBO_TAG_KEYGEN \
    || tag == GUMBO_TAG_LINK \
    || tag == GUMBO_TAG_META \
    || tag == GUMBO_TAG_PARAM \
    || tag == GUMBO_TAG_SOURCE \
    || tag == GUMBO_TAG_TRACK \
    || tag == GUMBO_TAG_WBR )

#define newSVpvz8(str) \
    newSVpvn_utf8((str), strlen(str), 1)

#define newSVpvn8(str, len) \
    newSVpvn_utf8((str), (len), 1)

#define PHG_FLAG_SKIP_ROOT_ELEMENT 1

typedef enum {
    PHG_ELEMENT_START,
    PHG_ELEMENT_END,
    PHG_TEXT
} PerlHtmlGumboType;

STATIC
void
walk_tree(pTHX_ GumboNode* node, int flags, void (*cb)(pTHX_ PerlHtmlGumboType, GumboNode*, void*), void* ctx ) {
    if ( node->type == GUMBO_NODE_DOCUMENT || node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE) {
        GumboVector* children;
        int skip = flags&PHG_FLAG_SKIP_ROOT_ELEMENT && node->type == GUMBO_NODE_ELEMENT && node->parent && node->parent->type == GUMBO_NODE_DOCUMENT;
        if ( !skip ) {
            (*cb)(aTHX_ PHG_ELEMENT_START, node, ctx);
        }
        if ( node->type == GUMBO_NODE_DOCUMENT ) {
            children = &node->v.document.children;
        } else {
            children = &node->v.element.children;
        }
        if (children) {
            int i = 0;
            for (i = 0; i < children->length; ++i) {
                walk_tree(aTHX_ children->data[i], flags, cb, ctx);
            }
        }
        if ( !skip ) {
            (*cb)(aTHX_ PHG_ELEMENT_END, node, ctx);
        }
    } else {
        (*cb)(aTHX_ PHG_TEXT, node, ctx);
    }
}

STATIC
GumboStringPiece
get_tag_name(GumboElement* e) {
    GumboStringPiece res;
    if ( e->tag == GUMBO_TAG_UNKNOWN ) {
        res = e->original_tag;
        gumbo_tag_from_original_text(&res);



( run in 0.603 second using v1.01-cache-2.11-cpan-71847e10f99 )