SWISH-3
view release on metacpan or search on metacpan
libswish3.c view on Meta::CPAN
typedef struct swish_3 swish_3;
typedef struct swish_StringList swish_StringList;
typedef struct swish_Config swish_Config;
typedef struct swish_ConfigFlags swish_ConfigFlags;
typedef struct swish_ConfigValue swish_ConfigValue;
typedef struct swish_DocInfo swish_DocInfo;
typedef struct swish_MetaStackElement swish_MetaStackElement;
typedef struct swish_MetaStackElement *swish_MetaStackElementPtr;
typedef struct swish_MetaStack swish_MetaStack;
typedef struct swish_MetaName swish_MetaName;
typedef struct swish_Property swish_Property;
typedef struct swish_Token swish_Token;
typedef struct swish_TokenList swish_TokenList;
typedef struct swish_TokenIterator swish_TokenIterator;
typedef struct swish_ParserData swish_ParserData;
typedef struct swish_Tag swish_Tag;
typedef struct swish_TagStack swish_TagStack;
typedef struct swish_Analyzer swish_Analyzer;
typedef struct swish_Parser swish_Parser;
typedef struct swish_NamedBuffer swish_NamedBuffer;
/*
=head2 Data Structures
*/
struct swish_3
{
int ref_cnt;
void *stash;
swish_Config *config;
swish_Analyzer *analyzer;
swish_Parser *parser;
};
struct swish_StringList
{
unsigned int n;
unsigned int max;
xmlChar** word;
};
struct swish_Config
{
int ref_cnt;
void *stash; /* for bindings */
xmlHashTablePtr misc;
xmlHashTablePtr properties;
xmlHashTablePtr metanames;
xmlHashTablePtr tag_aliases;
xmlHashTablePtr parsers;
xmlHashTablePtr mimes;
xmlHashTablePtr index;
xmlHashTablePtr stringlists;
struct swish_ConfigFlags *flags; /* shortcuts for parsing */
};
struct swish_ConfigFlags
{
boolean tokenize;
boolean cascade_meta_context;
boolean ignore_xmlns;
boolean follow_xinclude;
int undef_metas;
int undef_attrs;
int max_meta_id;
int max_prop_id;
xmlHashTablePtr meta_ids;
xmlHashTablePtr prop_ids;
//xmlHashTablePtr contexts;
};
struct swish_NamedBuffer
{
int ref_cnt; /* for bindings */
void *stash; /* for bindings */
xmlHashTablePtr hash; /* the meat */
};
struct swish_DocInfo
{
time_t mtime;
off_t size;
xmlChar * mime;
xmlChar * encoding;
xmlChar * uri;
unsigned int nwords;
xmlChar * ext;
xmlChar * parser;
xmlChar * action;
boolean is_gzipped;
int ref_cnt;
};
struct swish_MetaName
{
int ref_cnt;
int id;
xmlChar *name;
int bias;
xmlChar *alias_for;
};
struct swish_Property
{
int ref_cnt;
int id;
xmlChar *name;
boolean ignore_case;
int type;
boolean verbatim;
xmlChar *alias_for;
unsigned int max;
boolean sort;
boolean presort;
unsigned int sort_length;
};
struct swish_Token
{
unsigned int pos; // this token's position in document
libswish3.c view on Meta::CPAN
if (prop->ref_cnt < 1) {
swish_property_free(prop);
}
}
static void
free_metas(
swish_MetaName *meta,
xmlChar *metaname
)
{
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG(" freeing config->meta %s", metaname);
swish_metaname_debug((swish_MetaName *)meta);
}
meta->ref_cnt--;
if (meta->ref_cnt < 1) {
swish_metaname_free(meta);
}
}
void
swish_config_free(
swish_Config *config
)
{
if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
SWISH_DEBUG_MSG("freeing config");
SWISH_DEBUG_MSG("ptr addr: 0x%x %d", (long int)config, (long int)config);
swish_mem_debug();
}
xmlHashFree(config->misc, (xmlHashDeallocator)free_string);
xmlHashFree(config->properties, (xmlHashDeallocator)free_props);
xmlHashFree(config->metanames, (xmlHashDeallocator)free_metas);
xmlHashFree(config->tag_aliases, (xmlHashDeallocator)free_string);
xmlHashFree(config->parsers, (xmlHashDeallocator)free_string);
xmlHashFree(config->mimes, (xmlHashDeallocator)free_string);
xmlHashFree(config->index, (xmlHashDeallocator)free_string);
xmlHashFree(config->stringlists, (xmlHashDeallocator)free_stringlist);
swish_config_flags_free(config->flags);
if (config->ref_cnt != 0) {
SWISH_WARN("config ref_cnt != 0: %d", config->ref_cnt);
}
if (config->stash != NULL) {
SWISH_WARN("possible memory leak: config->stash was not freed");
}
swish_xfree(config);
}
swish_ConfigFlags *
swish_config_init_flags(
)
{
swish_ConfigFlags *flags;
flags = swish_xmalloc(sizeof(swish_ConfigFlags));
flags->tokenize = SWISH_TRUE;
flags->cascade_meta_context = SWISH_FALSE; /* add tokens to every metaname in the stack */
flags->ignore_xmlns = SWISH_TRUE;
flags->follow_xinclude = SWISH_TRUE;
flags->undef_metas = SWISH_UNDEF_METAS_INDEX;
flags->undef_attrs = SWISH_UNDEF_ATTRS_DISABLE;
flags->max_meta_id = -1;
flags->max_prop_id = -1;
flags->meta_ids = swish_hash_init(8);
flags->prop_ids = swish_hash_init(8);
//flags->contexts = swish_hash_init(8); // TODO cache these to save malloc/frees
return flags;
}
void
swish_config_flags_free(
swish_ConfigFlags * flags
)
{
/*
these hashes are for convenience and are really freed in swish_config_free()
*/
xmlHashFree(flags->meta_ids, NULL);
xmlHashFree(flags->prop_ids, NULL);
if (SWISH_DEBUG) {
swish_config_flags_debug(flags);
}
swish_xfree(flags);
}
void
swish_config_flags_debug(
swish_ConfigFlags *flags
)
{
SWISH_DEBUG_MSG("config->tokenize == %d", flags->tokenize);
SWISH_DEBUG_MSG("config->cascade_meta_context == %d", flags->cascade_meta_context);
SWISH_DEBUG_MSG("config->ignore_xmlns == %d", flags->ignore_xmlns);
SWISH_DEBUG_MSG("config->follow_xinclude == %d", flags->follow_xinclude);
SWISH_DEBUG_MSG("config->undef_metas == %d", flags->undef_metas);
SWISH_DEBUG_MSG("config->undef_attrs == %d", flags->undef_attrs);
SWISH_DEBUG_MSG("config->max_meta_id == %d", flags->max_meta_id);
SWISH_DEBUG_MSG("config->max_prop_id == %d", flags->max_prop_id);
}
/* init config object */
swish_Config *
swish_config_init(
)
{
swish_Config *config;
if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
SWISH_DEBUG_MSG("init config");
}
/* the hashes will automatically grow as needed so we init with sane starting size */
config = swish_xmalloc(sizeof(swish_Config));
config->flags = swish_config_init_flags();
config->misc = swish_hash_init(8);
config->metanames = swish_hash_init(8);
config->properties = swish_hash_init(8);
config->parsers = swish_hash_init(8);
config->index = swish_hash_init(8);
config->tag_aliases = swish_hash_init(8); /* alias => real */
config->stringlists = swish_hash_init(8);
config->mimes = NULL;
config->ref_cnt = 0;
config->stash = NULL;
if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
SWISH_DEBUG_MSG("config ptr 0x%x", (long int)config);
}
return config;
}
void
swish_config_set_default(
swish_Config *config
)
{
swish_Property *tmpprop;
swish_MetaName *tmpmeta;
xmlChar *tmpbuf;
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
SWISH_DEBUG_MSG("setting default config");
/* we xstrdup a lot in order to consistently free in swish_config_free() */
/* MIME types */
config->mimes = swish_mime_defaults();
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
SWISH_DEBUG_MSG("mime hash set");
libswish3.c view on Meta::CPAN
/* values in config2 override and are set in config1 */
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("Merging config2 0x%lx into config1 0x%lx",
config2, config1);
swish_config_debug(config2);
swish_config_debug(config1);
}
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge properties");
}
merge_properties(config1->properties, config2->properties);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge metanames");
}
merge_metanames(config1->metanames, config2->metanames);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge parsers");
}
swish_hash_merge(config1->parsers, config2->parsers);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge mimes");
}
swish_hash_merge(config1->mimes, config2->mimes);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge index");
}
swish_hash_merge(config1->index, config2->index);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge tag_aliases");
}
swish_hash_merge(config1->tag_aliases, config2->tag_aliases);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge misc");
}
swish_hash_merge(config1->misc, config2->misc);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge stringlists");
}
merge_stringlists(config1->stringlists, config2->stringlists);
if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
SWISH_DEBUG_MSG("merge complete");
}
/* set flags */
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_TOKENIZE)) {
config2->flags->tokenize = swish_string_to_boolean(swish_hash_fetch(config2->misc, BAD_CAST SWISH_TOKENIZE));
}
config1->flags->tokenize = config2->flags->tokenize;
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_CASCADE_META_CONTEXT)) {
config2->flags->cascade_meta_context =
swish_string_to_boolean(swish_hash_fetch(config2->misc, BAD_CAST SWISH_CASCADE_META_CONTEXT));
}
config1->flags->cascade_meta_context = config2->flags->cascade_meta_context;
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_IGNORE_XMLNS)) {
config2->flags->ignore_xmlns =
swish_string_to_boolean(swish_hash_fetch(config2->misc, BAD_CAST SWISH_IGNORE_XMLNS));
}
config1->flags->ignore_xmlns = config2->flags->ignore_xmlns;
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_FOLLOW_XINCLUDE)) {
config2->flags->follow_xinclude =
swish_string_to_boolean(swish_hash_fetch(config2->misc, BAD_CAST SWISH_FOLLOW_XINCLUDE));
}
config1->flags->follow_xinclude = config2->flags->follow_xinclude;
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_UNDEFINED_METATAGS)) {
v = swish_hash_fetch(config2->misc, BAD_CAST SWISH_UNDEFINED_METATAGS);
if (xmlStrEqual(v, BAD_CAST "error")) {
config2->flags->undef_metas = SWISH_UNDEF_METAS_ERROR;
}
else if (xmlStrEqual(v, BAD_CAST "ignore")) {
config2->flags->undef_metas = SWISH_UNDEF_METAS_IGNORE;
}
else if (xmlStrEqual(v, BAD_CAST "index")) {
config2->flags->undef_metas = SWISH_UNDEF_METAS_INDEX;
}
else if (xmlStrEqual(v, BAD_CAST "auto")) {
config2->flags->undef_metas = SWISH_UNDEF_METAS_AUTO;
}
else if (xmlStrEqual(v, BAD_CAST "autoall")) {
config2->flags->undef_metas = SWISH_UNDEF_METAS_AUTOALL;
}
else {
SWISH_CROAK("Unknown value for %s: %s", SWISH_UNDEFINED_METATAGS, v);
}
}
config1->flags->undef_metas = config2->flags->undef_metas;
if (swish_hash_exists(config2->misc, BAD_CAST SWISH_UNDEFINED_XML_ATTRIBUTES)) {
v = swish_hash_fetch(config2->misc, BAD_CAST SWISH_UNDEFINED_XML_ATTRIBUTES);
if (xmlStrEqual(v, BAD_CAST "error")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_ERROR;
}
else if (xmlStrEqual(v, BAD_CAST "ignore")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_IGNORE;
}
else if (xmlStrEqual(v, BAD_CAST "index")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_INDEX;
}
else if (xmlStrEqual(v, BAD_CAST "auto")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_AUTO;
}
else if (xmlStrEqual(v, BAD_CAST "autoall")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_AUTOALL;
}
else if (xmlStrEqual(v, BAD_CAST "disable")) {
config2->flags->undef_attrs = SWISH_UNDEF_ATTRS_DISABLE;
}
else {
SWISH_CROAK("Unknown value for %s: %s", SWISH_UNDEFINED_XML_ATTRIBUTES, v);
}
}
config1->flags->undef_attrs = config2->flags->undef_attrs;
if (config1->flags->max_meta_id < config2->flags->max_meta_id) {
config1->flags->max_meta_id = config2->flags->max_meta_id;
libswish3.c view on Meta::CPAN
*/
alias = swish_hash_fetch(parser_data->s3->config->tag_aliases, swishtag);
if (alias) {
if (SWISH_DEBUG & SWISH_DEBUG_PARSER) {
SWISH_DEBUG_MSG("%s alias -> %s", swishtag, alias);
}
swish_xfree(swishtag);
swishtag = swish_xstrdup(alias);
}
else {
swishdomtag = flatten_tag_stack(swishtag, parser_data->domstack, SWISH_DOT);
alias = swish_hash_fetch(parser_data->s3->config->tag_aliases, swishdomtag);
if (alias) {
if (SWISH_DEBUG & SWISH_DEBUG_PARSER) {
SWISH_DEBUG_MSG("%s alias -> %s", swishdomtag, alias);
}
swish_xfree(swishtag);
swishtag = swish_xstrdup(alias);
}
swish_xfree(swishdomtag);
}
if (SWISH_DEBUG & SWISH_DEBUG_PARSER) {
SWISH_DEBUG_MSG(" swishtag = %s", swishtag);
}
return swishtag;
}
static void
flush_buffer(
swish_ParserData *parser_data,
xmlChar *metaname,
xmlChar *context
)
{
swish_MetaName *meta;
xmlChar *metaname_stored_as;
swish_TagStack *s = parser_data->metastack;
if (SWISH_DEBUG & SWISH_DEBUG_PARSER)
SWISH_DEBUG_MSG("buffer is >>%s<< before flush",
xmlBufferContent(parser_data->meta_buf));
/*
* add meta_buf as-is to metanames buffer under current tag. this
* gives us both tokens and raw text de-tagged but organized by
* metaname. If the metaname is an alias_for, use the target of the alias.
*/
meta = swish_hash_fetch(parser_data->s3->config->metanames, metaname);
if (meta->alias_for != NULL) {
metaname_stored_as = meta->alias_for;
}
else {
metaname_stored_as = metaname;
}
swish_nb_add_buf(parser_data->metanames, metaname_stored_as, parser_data->meta_buf,
(xmlChar *)SWISH_TOKENPOS_BUMPER, 0, 1);
/*
* if cascade_meta_context is true, add tokens (buffer) to every metaname on the stack.
*/
if (parser_data->s3->config->flags->cascade_meta_context) {
for (s->temp = s->head; s->temp != NULL; s->temp = s->temp->next) {
if (xmlStrEqual(s->temp->baked, metaname_stored_as)) /* already added */
continue;
swish_nb_add_buf(parser_data->metanames, s->temp->baked,
parser_data->meta_buf, (xmlChar *)SWISH_TOKENPOS_BUMPER,
0, 1);
}
}
if (parser_data->s3->analyzer->tokenize) {
tokenize(parser_data, (xmlChar *)xmlBufferContent(parser_data->meta_buf),
xmlBufferLength(parser_data->meta_buf), metaname_stored_as, context);
}
xmlBufferEmpty(parser_data->meta_buf);
}
/*
* SAX2 callback
*/
static void
mystartDocument(
void *data
)
{
/*
* swish_ParserData *parser_data = (swish_ParserData *) data;
*/
if (SWISH_DEBUG & SWISH_DEBUG_PARSER)
SWISH_DEBUG_MSG("startDocument()");
}
/*
* SAX2 callback
*/
static void
myendDocument(
void *parser_data
)
{
if (SWISH_DEBUG & SWISH_DEBUG_PARSER)
SWISH_DEBUG_MSG("endDocument()");
/*
* whatever's left
*/
flush_buffer(parser_data, (xmlChar *)SWISH_DEFAULT_METANAME,
(xmlChar *)SWISH_DEFAULT_METANAME);
}
/*
* SAX1 callback
*/
libswish3.c view on Meta::CPAN
static void write_property(
swish_Property *prop,
xmlTextWriterPtr writer,
xmlChar *name
);
static void write_properties(
xmlTextWriterPtr writer,
xmlHashTablePtr properties
);
static void write_parser(
xmlChar *val,
xmlTextWriterPtr writer,
xmlChar *key
);
static void write_parsers(
xmlTextWriterPtr writer,
xmlHashTablePtr parsers
);
static void write_mime(
xmlChar *type,
temp_things *things,
xmlChar *ext
);
static void write_mimes(
xmlTextWriterPtr writer,
xmlHashTablePtr mimes
);
static void write_index(
xmlTextWriterPtr writer,
xmlHashTablePtr index
);
static void write_tag_aliases(
xmlTextWriterPtr writer,
xmlHashTablePtr tag_aliases
);
static void write_misc(
xmlTextWriterPtr writer,
xmlHashTablePtr hash
);
static void handle_special_misc_flags(
headmaker *h
);
static void
handle_special_misc_flags(
headmaker *h
)
{
xmlChar *v;
if (swish_hash_exists(h->config->misc, BAD_CAST SWISH_TOKENIZE)) {
/*
SWISH_DEBUG_MSG("tokenize in config == %s",
swish_hash_fetch(h->config->misc, BAD_CAST SWISH_TOKENIZE));
*/
h->config->flags->tokenize =
swish_string_to_boolean(swish_hash_fetch(h->config->misc, BAD_CAST SWISH_TOKENIZE));
}
if (swish_hash_exists(h->config->misc, BAD_CAST SWISH_CASCADE_META_CONTEXT)) {
/*
SWISH_DEBUG_MSG("cascade_meta_context in config == %s",
swish_hash_fetch(h->config->misc, BAD_CAST SWISH_CASCADE_META_CONTEXT));
*/
h->config->flags->cascade_meta_context =
swish_string_to_boolean(swish_hash_fetch(h->config->misc, BAD_CAST SWISH_CASCADE_META_CONTEXT));
}
if (swish_hash_exists(h->config->misc, BAD_CAST SWISH_IGNORE_XMLNS)) {
/*
SWISH_DEBUG_MSG("ignore_xmlns in config == %s",
swish_hash_fetch(h->config->misc, BAD_CAST SWISH_IGNORE_XMLNS));
*/
h->config->flags->ignore_xmlns =
swish_string_to_boolean(swish_hash_fetch(h->config->misc, BAD_CAST SWISH_IGNORE_XMLNS));
}
if (swish_hash_exists(h->config->misc, BAD_CAST SWISH_UNDEFINED_METATAGS)) {
v = swish_hash_fetch(h->config->misc, BAD_CAST SWISH_UNDEFINED_METATAGS);
if (xmlStrEqual(v, BAD_CAST "error")) {
h->config->flags->undef_metas = SWISH_UNDEF_METAS_ERROR;
}
else if (xmlStrEqual(v, BAD_CAST "ignore")) {
h->config->flags->undef_metas = SWISH_UNDEF_METAS_IGNORE;
}
else if (xmlStrEqual(v, BAD_CAST "index")) {
h->config->flags->undef_metas = SWISH_UNDEF_METAS_INDEX;
}
else if (xmlStrEqual(v, BAD_CAST "auto")) {
h->config->flags->undef_metas = SWISH_UNDEF_METAS_AUTO;
}
else if (xmlStrEqual(v, BAD_CAST "autoall")) {
h->config->flags->undef_metas = SWISH_UNDEF_METAS_AUTOALL;
}
else {
SWISH_CROAK("Unknown value for %s: %s", SWISH_UNDEFINED_METATAGS, v);
}
}
if (swish_hash_exists(h->config->misc, BAD_CAST SWISH_UNDEFINED_XML_ATTRIBUTES)) {
v = swish_hash_fetch(h->config->misc, BAD_CAST SWISH_UNDEFINED_XML_ATTRIBUTES);
if (xmlStrEqual(v, BAD_CAST "error")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_ERROR;
}
else if (xmlStrEqual(v, BAD_CAST "ignore")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_IGNORE;
}
else if (xmlStrEqual(v, BAD_CAST "index")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_INDEX;
}
else if (xmlStrEqual(v, BAD_CAST "auto")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_AUTO;
}
else if (xmlStrEqual(v, BAD_CAST "autoall")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_AUTOALL;
}
else if (xmlStrEqual(v, BAD_CAST "disable")) {
h->config->flags->undef_attrs = SWISH_UNDEF_ATTRS_DISABLE;
}
else {
SWISH_CROAK("Unknown value for %s: %s", SWISH_UNDEFINED_XML_ATTRIBUTES, v);
}
}
}
static void
read_metaname_aliases(
( run in 1.288 second using v1.01-cache-2.11-cpan-5837b0d9d2c )