HTML-Content-Extractor

 view release on metacpan or  search on metacpan

Extractor.xs  view on Meta::CPAN

    skip_tags[1]  = get_tag_id(my_r->tags, "applet");
    skip_tags[2]  = get_tag_id(my_r->tags, "audio");
    skip_tags[3]  = get_tag_id(my_r->tags, "video");
    skip_tags[4]  = get_tag_id(my_r->tags, "source");
    skip_tags[5]  = get_tag_id(my_r->tags, "track");
    skip_tags[6]  = get_tag_id(my_r->tags, "bgsound");
    skip_tags[7]  = get_tag_id(my_r->tags, "canvas");
    skip_tags[8]  = get_tag_id(my_r->tags, "datalist");
    skip_tags[9]  = get_tag_id(my_r->tags, "button");
    skip_tags[10] = get_tag_id(my_r->tags, "fieldset");
    skip_tags[11] = get_tag_id(my_r->tags, "legend");
    skip_tags[12] = get_tag_id(my_r->tags, "input");
    skip_tags[13] = get_tag_id(my_r->tags, "keygen");
    skip_tags[14] = get_tag_id(my_r->tags, "textarea");
    skip_tags[15] = get_tag_id(my_r->tags, "frameset");
    skip_tags[16] = get_tag_id(my_r->tags, "noframes");
    skip_tags[17] = get_tag_id(my_r->tags, "label");
    skip_tags[18] = get_tag_id(my_r->tags, "link");
    skip_tags[19] = get_tag_id(my_r->tags, "map");
    skip_tags[20] = get_tag_id(my_r->tags, "object");
    skip_tags[21] = get_tag_id(my_r->tags, "progress");

Extractor.xs  view on Meta::CPAN

    
    add_tag_R(tags, "font", 4, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "footer", 6, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    
    // ++ form ++
    add_tag_R(tags, "form", 4, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "button", 6, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    
    // ++ form: fieldset ++
    add_tag_R(tags, "fieldset", 8, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "legend", 6, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    // -- form: fieldset --
    
    // ++ form: select ++
    add_tag_R(tags, "select", 6, 20, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS, AI_NULL);
    add_tag_R(tags, "optgroup", 8, 19, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS_SAVE, AI_NULL);
    add_tag_R(tags, "option", 6, 18, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS_SAVE, AI_NULL);
    // -- form: select --
    
    add_tag_R(tags, "input", 5, 0, 0, TYPE_TAG_ONE, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "keygen", 6, 0, 0, TYPE_TAG_ONE, 0, OPTION_NULL, AI_NULL);

libextractor/libextractor.c  view on Meta::CPAN

    skip_tags[1]  = get_tag_id(my_r->tags, "applet");
    skip_tags[2]  = get_tag_id(my_r->tags, "audio");
    skip_tags[3]  = get_tag_id(my_r->tags, "video");
    skip_tags[4]  = get_tag_id(my_r->tags, "source");
    skip_tags[5]  = get_tag_id(my_r->tags, "track");
    skip_tags[6]  = get_tag_id(my_r->tags, "bgsound");
    skip_tags[7]  = get_tag_id(my_r->tags, "canvas");
    skip_tags[8]  = get_tag_id(my_r->tags, "datalist");
    skip_tags[9]  = get_tag_id(my_r->tags, "button");
    skip_tags[10] = get_tag_id(my_r->tags, "fieldset");
    skip_tags[11] = get_tag_id(my_r->tags, "legend");
    skip_tags[12] = get_tag_id(my_r->tags, "input");
    skip_tags[13] = get_tag_id(my_r->tags, "keygen");
    skip_tags[14] = get_tag_id(my_r->tags, "textarea");
    skip_tags[15] = get_tag_id(my_r->tags, "frameset");
    skip_tags[16] = get_tag_id(my_r->tags, "noframes");
    skip_tags[17] = get_tag_id(my_r->tags, "label");
    skip_tags[18] = get_tag_id(my_r->tags, "link");
    skip_tags[19] = get_tag_id(my_r->tags, "map");
    skip_tags[20] = get_tag_id(my_r->tags, "object");
    skip_tags[21] = get_tag_id(my_r->tags, "progress");

libextractor/libextractor.c  view on Meta::CPAN

    
    add_tag_R(tags, "font", 4, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "footer", 6, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    
    // ++ form ++
    add_tag_R(tags, "form", 4, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "button", 6, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    
    // ++ form: fieldset ++
    add_tag_R(tags, "fieldset", 8, 0, 0, TYPE_TAG_BLOCK, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "legend", 6, 0, 0, TYPE_TAG_NORMAL, 0, OPTION_NULL, AI_NULL);
    // -- form: fieldset --
    
    // ++ form: select ++
    add_tag_R(tags, "select", 6, 20, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS, AI_NULL);
    add_tag_R(tags, "optgroup", 8, 19, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS_SAVE, AI_NULL);
    add_tag_R(tags, "option", 6, 18, FAMILY_SELECT, TYPE_TAG_NORMAL, EXTRA_TAG_CLOSE_PRIORITY, OPTION_CLEAN_TAGS_SAVE, AI_NULL);
    // -- form: select --
    
    add_tag_R(tags, "input", 5, 0, 0, TYPE_TAG_ONE, 0, OPTION_NULL, AI_NULL);
    add_tag_R(tags, "keygen", 6, 0, 0, TYPE_TAG_ONE, 0, OPTION_NULL, AI_NULL);



( run in 0.983 second using v1.01-cache-2.11-cpan-49f99fa48dc )