unicode results from the CPAN

unicode
DBD-SQLeet
view release on metacpan or search on metacpan
    n_retval = call_pv(argv[0], G_SCALAR);
    SPAGAIN;

    /* store a copy of the returned coderef into the tokenizer structure */
    if (n_retval != 1) {
        warn("tokenizer_Create returned %d arguments", n_retval);
    }
    retval = POPs;
    t->coderef   = newSVsv(retval);
    *ppTokenizer = &t->base;

    PUTBACK;
    FREETMPS;
    LEAVE;

    return SQLITE_OK;
}

/*
** Destroy a tokenizer
*/
static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){
    dTHX;
    perl_tokenizer *t = (perl_tokenizer *) pTokenizer;
    sv_free(t->coderef);
    sqlite3_free(t);
    return SQLITE_OK;
}

/*
** Prepare to begin tokenizing a particular string.  The input
** string to be tokenized is supposed to be pInput[0..nBytes-1] ..
** except that nBytes passed by fts3 is -1 (don't know why) !
** This is passed to the tokenizer instance, which then returns a
** closure implementing the cursor (so the cursor is again a coderef).
*/
static int perl_tokenizer_Open(
    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
    const char *pInput, int nBytes,      /* Input buffer */
    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
){
    dTHX;
    dSP;
    dMY_CXT;
    U32 flags;
    SV *perl_string;
    int n_retval;

    perl_tokenizer *t = (perl_tokenizer *)pTokenizer;

    /* allocate and initialize the cursor struct */
    perl_tokenizer_cursor *c;
    c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
    memset(c, 0, sizeof(*c));
    *ppCursor = &c->base;

    /* flags for creating the Perl SV containing the input string */
    flags = SVs_TEMP; /* will call sv_2mortal */

    /* special handling if working with utf8 strings */
    if (MY_CXT.last_dbh_is_unicode) {

        /* data to keep track of byte offsets */
        c->lastByteOffset = c->pInput = pInput;
        c->lastCharOffset = 0;

        /* string passed to Perl needs to be flagged as utf8 */
        flags |= SVf_UTF8;
    }

    ENTER;
    SAVETMPS;

    /* build a Perl copy of the input string */
    if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */
        nBytes = strlen(pInput);
    }
    perl_string = newSVpvn_flags(pInput, nBytes, flags);

    /* call the tokenizer coderef */
    PUSHMARK(SP);
    XPUSHs(perl_string);
    PUTBACK;
    n_retval = call_sv(t->coderef, G_SCALAR);
    SPAGAIN;

    /* store the cursor coderef returned by the tokenizer */
    if (n_retval != 1) {
        warn("tokenizer returned %d arguments", n_retval);
    }
    c->coderef = newSVsv(POPs);

    PUTBACK;
    FREETMPS;
    LEAVE;
    return SQLITE_OK;
}

/*
** Close a tokenization cursor previously opened by a call to
** perl_tokenizer_Open() above.
*/
static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){
    perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;

    dTHX;
    sv_free(c->coderef);
    if (c->pToken) sqlite3_free(c->pToken);
    sqlite3_free(c);
    return SQLITE_OK;
}

/*
** Extract the next token from a tokenization cursor.  The cursor must
** have been opened by a prior call to perl_tokenizer_Open().
*/
static int perl_tokenizer_Next(
    sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by perl_tokenizer_Open */
    const char **ppToken,               /* OUT: *ppToken is the token text */
    int *pnBytes,                       /* OUT: Number of bytes in token */
    int *piStartOffset,                 /* OUT: Starting offset of token */
( run in 0.949 second using v1.01-cache-2.11-cpan-39bf76dae61 )