DBD-SQLite
view release on metacpan or search on metacpan
fts3_tokenizer.h view on Meta::CPAN
int *piPosition /* OUT: Number of tokens returned before this one */
);
/***********************************************************************
** Methods below this point are only available if iVersion>=1.
*/
/*
** Configure the language id of a tokenizer cursor.
*/
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
lib/DBD/SQLite/VirtualTable/PerlData.pm view on Meta::CPAN
time. Here is a way to do it with a virtual table :
my @files = ... ; # list of files to inspect
# apply the L<stat> function to each file
our $file_stats = [ map { [ $_, stat $_ ] } @files];
# create a temporary virtual table
$dbh->do(<<"");
CREATE VIRTUAL TABLE temp.file_stats'
USING perl(path, dev, ino, mode, nlink, uid, gid, rdev, size,
atime, mtime, ctime, blksize, blocks,
arrayrefs="main::file_stats");
# search files
my $sth = $dbh->prepare(<<"");
SELECT * FROM file_stats
WHERE mtime BETWEEN ? AND ?
AND uid IN (...)
=head2 Hashref example : unicode characters
#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
{ "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
#if defined(HAVE_FCHOWN)
{ "fchown", (sqlite3_syscall_ptr)fchown, 0 },
#else
{ "fchown", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
#if defined(HAVE_FCHOWN)
{ "geteuid", (sqlite3_syscall_ptr)geteuid, 0 },
#else
{ "geteuid", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent)
#if (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) \
&& !defined(SQLITE_WASI)
}
#else
# define unixPosixAdvisoryLocks(A,B) SQLITE_ERROR_UNABLE
#endif /* SQLITE_DEBUG || SQLITE_ENABLE_FILESTAT */
/*
** On some systems, calls to fchown() will trigger a message in a security
** log if they come from non-root processes. So avoid calling fchown() if
** we are not running as root.
*/
static int robustFchown(int fd, uid_t uid, gid_t gid){
#if defined(HAVE_FCHOWN)
return osGeteuid() ? 0 : osFchown(fd,uid,gid);
#else
return 0;
#endif
}
/*
** This is the xSetSystemCall() method of sqlite3_vfs for all of the
** "unix" VFSes. Return SQLITE_OK upon successfully updating the
** system call pointer, or SQLITE_NOTFOUND if there is no configurable
** system call named zName.
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm);
goto shm_open_err;
}
pShmNode->isReadonly = 1;
}
/* If this process is running as root, make sure that the SHM file
** is owned by the same user that owns the original database. Otherwise,
** the original owner will not be able to connect.
*/
robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid);
rc = unixLockSharedMemory(pDbFd, pShmNode);
if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err;
}
}
/* Make the new connection a child of the unixShmNode */
p->pShmNode = pShmNode;
#ifdef SQLITE_DEBUG
p->id = pShmNode->nextShmId++;
}
sqlite3_mutex_leave(pInode->pLockMutex);
}
}
unixLeaveMutex();
#endif /* if !OS_VXWORKS */
return pUnused;
}
/*
** Find the mode, uid and gid of file zFile.
*/
static int getFileMode(
const char *zFile, /* File name */
mode_t *pMode, /* OUT: Permissions of zFile */
uid_t *pUid, /* OUT: uid of zFile. */
gid_t *pGid /* OUT: gid of zFile. */
){
struct stat sStat; /* Output of stat() on database file */
int rc = SQLITE_OK;
if( 0==osStat(zFile, &sStat) ){
*pMode = sStat.st_mode & 0777;
*pUid = sStat.st_uid;
*pGid = sStat.st_gid;
}else{
rc = SQLITE_IOERR_FSTAT;
}
return rc;
}
/*
** This function is called by unixOpen() to determine the unix permissions
** to create new files with. If no error occurs, then SQLITE_OK is returned
** and a value suitable for passing as the third argument to open(2) is
** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
** original filename is unavailable. But 8_3_NAMES is only used for
** FAT filesystems and permissions do not matter there, so just use
** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero.
*/
static int findCreateFileMode(
const char *zPath, /* Path of file (possibly) being created */
int flags, /* Flags passed as 4th argument to xOpen() */
mode_t *pMode, /* OUT: Permissions to open file with */
uid_t *pUid, /* OUT: uid to set on the file */
gid_t *pGid /* OUT: gid to set on the file */
){
int rc = SQLITE_OK; /* Return Code */
*pMode = 0;
*pUid = 0;
*pGid = 0;
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
char zDb[MAX_PATHNAME+1]; /* Database file path */
int nDb; /* Number of valid bytes in zDb */
/* zPath is a path to a WAL or journal file. The following block derives
rc = getFileMode(zDb, pMode, pUid, pGid);
break;
}
nDb--;
}
}else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
*pMode = 0600;
}else if( flags & SQLITE_OPEN_URI ){
/* If this is a main database file and the file was opened using a URI
** filename, check for the "modeof" parameter. If present, interpret
** its value as a filename and try to copy the mode, uid and gid from
** that file. */
const char *z = sqlite3_uri_parameter(zPath, "modeof");
if( z ){
rc = getFileMode(z, pMode, pUid, pGid);
}
}
return rc;
}
/*
** 'conch file' locking functions later on. */
if( isReadonly ) openFlags |= O_RDONLY;
if( isReadWrite ) openFlags |= O_RDWR;
if( isCreate ) openFlags |= O_CREAT;
if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW);
if( fd<0 ){
mode_t openMode; /* Permissions to create file with */
uid_t uid; /* Userid for the file */
gid_t gid; /* Groupid for the file */
rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
if( rc!=SQLITE_OK ){
assert( !p->pPreallocatedUnused );
assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
return rc;
}
fd = robust_open(zName, openFlags, openMode);
OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));
assert( !isExclusive || (openFlags & O_CREAT)!=0 );
if( fd<0 ){
if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){
}
/* The owner of the rollback journal or WAL file should always be the
** same as the owner of the database file. Try to ensure that this is
** the case. The chown() system call will be a no-op if the current
** process lacks root privileges, be we should at least try. Without
** this step, if a root process opens a database file, it can leave
** behinds a journal/WAL that is owned by root and hence make the
** database inaccessible to unprivileged processes.
**
** If openMode==0, then that means uid and gid are not set correctly
** (probably because SQLite is configured to use 8+3 filename mode) and
** in that case we do not want to attempt the chown().
*/
if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){
robustFchown(fd, uid, gid);
}
}
assert( fd>=0 );
if( pOutFlags ){
*pOutFlags = flags;
}
if( p->pPreallocatedUnused ){
p->pPreallocatedUnused->fd = fd;
p->pPreallocatedUnused->flags =
int *piPosition /* OUT: Number of tokens returned before this one */
);
/***********************************************************************
** Methods below this point are only available if iVersion>=1.
*/
/*
** Configure the language id of a tokenizer cursor.
*/
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
u8 bDescIdx; /* True if doclists are in reverse order */
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
int iSavepoint;
/*
** The following array of hash tables is used to buffer pending index
** updates during transactions. All pending updates buffered at any one
** time must share a common language-id (see the FTS4 langid= feature).
** The current language id is stored in variable iPrevLangid.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
** terms that appear in the document set. Each subsequent index in aIndex[]
** is an index of prefixes of a specific length.
**
** Variable nPendingData contains an estimate the memory consumed by the
** pending data structures, including hash table overhead, but not including
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
** recently inserted record.
*/
int nIndex; /* Size of aIndex[] */
struct Fts3Index {
int nPrefix; /* Prefix length (0 for main terms index) */
Fts3Hash hPending; /* Pending terms table for this index */
} *aIndex;
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
int iPrevLangid; /* Langid of recently inserted document */
int bPrevDelete; /* True if last operation was a delete */
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
** values do not contribute to FTS functionality; they are used for
** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
** the xOpen method. Cursors are destroyed using the xClose method.
*/
struct Fts3Cursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
i16 eSearch; /* Search strategy (see below) */
u8 isEof; /* True if at End Of Results */
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
u8 bSeekStmt; /* True if pStmt is a seek */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
char *pNextId; /* Pointer into the body of aDoclist */
char *aDoclist; /* List of docids for full-text queries */
int nDoclist; /* Size of buffer at aDoclist */
u8 bDesc; /* True to sort in descending order */
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
sqlite3_int64 nDoc; /* Documents in table */
** before returning.
*/
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
if( *pRc==SQLITE_OK ){
int i; /* Iterator variable */
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
const char *zLanguageid;
zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
sqlite3_vtab_config(p->db, SQLITE_VTAB_INNOCUOUS);
/* Create a list of user columns for the virtual table */
zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
for(i=1; zCols && i<p->nColumn; i++){
zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
}
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
const char *zLanguageid = p->zLanguageid;
char *zContentCols; /* Columns of %_content table */
/* Create a list of user columns for the content table */
zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
for(i=0; zContentCols && i<p->nColumn; i++){
char *z = p->azColumn[i];
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
}
if( zLanguageid && zContentCols ){
zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
}
if( zContentCols==0 ) rc = SQLITE_NOMEM;
/* Create the content table */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_content'(%s)",
p->zDb, p->zName, zContentCols
);
sqlite3_free(zContentCols);
}
if( !zFunc ){
zFunction = "";
}else{
zFree = zFunction = fts3QuoteId(zFunc);
}
fts3Appendf(pRc, &zRet, "docid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
}
sqlite3_free(zFree);
}else{
fts3Appendf(pRc, &zRet, "rowid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
}
**
** 1. Direct lookup by rowid or docid.
** 2. Full-text search using a MATCH operator on a non-docid column.
** 3. Linear scan of %_content table.
*/
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
int iLangidCons = -1; /* Index of langid=x constraint, if present */
int iDocidGe = -1; /* Index of docid>=x constraint, if present */
int iDocidLe = -1; /* Index of docid<=x constraint, if present */
int iIdx;
if( p->bLock ){
return SQLITE_ERROR;
}
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
** context" error.
*/
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
&& pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
){
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
}
/* Equality constraint on the langid column */
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& pCons->iColumn==p->nColumn + 2
){
iLangidCons = i;
}
if( bDocid ){
switch( pCons->op ){
case SQLITE_INDEX_CONSTRAINT_GE:
case SQLITE_INDEX_CONSTRAINT_GT:
iDocidGe = i;
break;
case SQLITE_INDEX_CONSTRAINT_LE:
}
/* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */
if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo);
iIdx = 1;
if( iCons>=0 ){
pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
pInfo->idxNum |= FTS3_HAVE_LANGID;
pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
}
if( iDocidGe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
}
if( iDocidLe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
}
/*
** Add seg-reader objects to the Fts3MultiSegReader object passed as the
** 8th argument.
**
** This function returns SQLITE_OK if successful, or an SQLite error code
** otherwise.
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
int rc = SQLITE_OK; /* Error code */
sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
int rc2; /* Result of sqlite3_reset() */
/* If iLevel is less than 0 and this is not a scan, include a seg-reader
** for the pending-terms. If this is a scan, then this call must be being
** made by an fts4aux module, not an FTS table. In this case calling
** Fts3SegReaderPending might segfault, as the data structures used by
** fts4aux are not completely populated. So it's easiest to filter these
** calls out here. */
if( iLevel<0 && p->aIndex && p->iPrevLangid==iLangid ){
Fts3SegReader *pSeg = 0;
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg);
if( rc==SQLITE_OK && pSeg ){
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
}
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
Fts3SegReader *pSeg = 0;
/* Read the values returned by the SELECT into local variables. */
sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
int nRoot = sqlite3_column_bytes(pStmt, 4);
return rc;
}
/*
** Set up a cursor object for iterating through a full-text index or a
** single level therein.
*/
SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
assert( iIndex>=0 && iIndex<p->nIndex );
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
|| iLevel>=0
);
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
/*
** In addition to its current configuration, have the Fts3MultiSegReader
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
return fts3SegReaderCursor(p,
iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
);
}
/*
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
** if isPrefix is true, to scan the doclist for all terms for which
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
** an SQLite error code.
**
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
if( pSegcsr ){
int i;
int bFound = 0; /* True once an index has been found */
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
if( isPrefix ){
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
);
pSegcsr->bLookup = 1;
}
}
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
rc = fts3SegReaderCursorAddZero(
p, pCsr->iLangid, zTerm, nTerm, pSegcsr
);
}
}
}
}
if( bFound==0 ){
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
}
*ppSegcsr = pSegcsr;
return rc;
}
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
int rc = SQLITE_OK;
char *zSql; /* SQL statement used to access %_content */
int eSearch;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
int iIdx;
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
if( p->bLock ){
return SQLITE_ERROR;
}
eSearch = (idxNum & 0x0000FFFF);
assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( p->pSegments==0 );
/* Collect arguments into local variables */
iIdx = 0;
if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
assert( iIdx==nVal );
/* In case the cursor has been used before, clear it now. */
fts3ClearCursor(pCsr);
/* Set the lower and upper bounds on docids to return */
pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
pCsr->eSearch = (i16)eSearch;
if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(pCons);
if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
pCsr->iLangid = 0;
if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,
&p->base.zErrMsg
);
if( rc!=SQLITE_OK ){
return rc;
}
rc = fts3EvalStart(pCsr);
sqlite3Fts3SegmentsClose(p);
if( rc!=SQLITE_OK ) return rc;
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
**
** If:
**
** (iCol < p->nColumn) -> The value of the iCol'th user column.
** (iCol == p->nColumn) -> Magic column with the same name as the table.
** (iCol == p->nColumn+1) -> Docid column
** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
sqlite3_result_pointer(pCtx, pCsr, "fts3cursor", 0);
break;
case 1:
/* The docid column */
sqlite3_result_int64(pCtx, pCsr->iPrevId);
break;
case 2:
if( pCsr->pExpr ){
sqlite3_result_int64(pCtx, pCsr->iLangid);
break;
}else if( p->zLanguageid==0 ){
sqlite3_result_int(pCtx, 0);
break;
}else{
iCol = p->nColumn;
/* no break */ deliberate_fall_through
}
default:
sqlite3_vtab base; /* Base class used by SQLite core */
Fts3Table *pFts3Tab;
};
struct Fts3auxCursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
Fts3MultiSegReader csr; /* Must be right after "base" */
Fts3SegFilter filter;
char *zStop;
int nStop; /* Byte-length of string zStop */
int iLangid; /* Language id to query */
int isEof; /* True if cursor is at EOF */
sqlite3_int64 iRowid; /* Current rowid */
int iCol; /* Current value of 'col' column */
int nStat; /* Size of aStat[] array */
struct Fts3auxColstats {
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
} *aStat;
};
** xBestIndex - Analyze a WHERE and ORDER BY clause.
*/
static int fts3auxBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
int i;
int iEq = -1;
int iGe = -1;
int iLe = -1;
int iLangid = -1;
int iNext = 1; /* Next free argvIndex value */
UNUSED_PARAMETER(pVTab);
/* This vtab delivers always results in "ORDER BY term ASC" order. */
if( pInfo->nOrderBy==1
&& pInfo->aOrderBy[0].iColumn==0
&& pInfo->aOrderBy[0].desc==0
){
pInfo->orderByConsumed = 1;
int iCol = pInfo->aConstraint[i].iColumn;
if( iCol==0 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
}
if( iCol==4 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
}
}
}
if( iEq>=0 ){
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
pInfo->estimatedCost = 5;
}else{
pInfo->idxNum = 0;
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
if( iLe>=0 ){
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
}
if( iLangid>=0 ){
pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
pInfo->estimatedCost--;
}
return SQLITE_OK;
}
/*
** xOpen - Open a cursor.
*/
static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
int isScan = 0;
int iLangVal = 0; /* Language id to query */
int iEq = -1; /* Index of term=? value in apVal */
int iGe = -1; /* Index of term>=? value in apVal */
int iLe = -1; /* Index of term<=? value in apVal */
int iLangid = -1; /* Index of languageid=? value in apVal */
int iNext = 0;
UNUSED_PARAMETER(nVal);
UNUSED_PARAMETER(idxStr);
assert( idxStr==0 );
assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
);
}else{
isScan = 1;
if( idxNum & FTS4AUX_GE_CONSTRAINT ){
iGe = iNext++;
}
if( idxNum & FTS4AUX_LE_CONSTRAINT ){
iLe = iNext++;
}
}
if( iNext<nVal ){
iLangid = iNext++;
}
/* In case this cursor is being reused, close and zero it. */
testcase(pCsr->filter.zTerm);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->aStat);
sqlite3_free(pCsr->zStop);
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm);
}
}
if( iLe>=0 ){
pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
pCsr->nStop = (int)strlen(pCsr->zStop);
}
if( iLangid>=0 ){
iLangVal = sqlite3_value_int(apVal[iLangid]);
/* If the user specified a negative value for the languageid, use zero
** instead. This works, as the "languageid=?" constraint will also
** be tested by the VDBE layer. The test will always be false (since
** this module will not return a row with a negative languageid), and
** so the overall query will return zero rows. */
if( iLangVal<0 ) iLangVal = 0;
}
pCsr->iLangid = iLangVal;
rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
}
if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
return rc;
case 2: /* documents */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
break;
case 3: /* occurrences */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
break;
default: /* languageid */
assert( iCol==4 );
sqlite3_result_int(pCtx, p->iLangid);
break;
}
return SQLITE_OK;
}
/*
** xRowid - Return the current rowid for the cursor.
*/
static int fts3auxRowidMethod(
** isNot:
** This variable is used by function getNextNode(). When getNextNode() is
** called, it sets ParseContext.isNot to true if the 'next node' is a
** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
** zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
int iLangid; /* Language id used with tokenizer */
const char **azCol; /* Array of column names for fts3 table */
int bFts4; /* True to allow FTS4-only syntax */
int nCol; /* Number of entries in azCol[] */
int iDefaultCol; /* Default column to query */
int isNot; /* True if getNextNode() sees a unary - */
sqlite3_context *pCtx; /* Write error message here */
int nNest; /* Number of nested brackets */
};
/*
** return NULL.
*/
SQLITE_PRIVATE void *sqlite3Fts3MallocZero(sqlite3_int64 nByte){
void *pRet = sqlite3_malloc64(nByte);
if( pRet ) memset(pRet, 0, nByte);
return pRet;
}
SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(
sqlite3_tokenizer *pTokenizer,
int iLangid,
const char *z,
int n,
sqlite3_tokenizer_cursor **ppCsr
){
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
sqlite3_tokenizer_cursor *pCsr = 0;
int rc;
rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
assert( rc==SQLITE_OK || pCsr==0 );
if( rc==SQLITE_OK ){
pCsr->pTokenizer = pTokenizer;
if( pModule->iVersion>=1 ){
rc = pModule->xLanguageid(pCsr, iLangid);
if( rc!=SQLITE_OK ){
pModule->xClose(pCsr);
pCsr = 0;
}
}
}
*ppCsr = pCsr;
return rc;
}
Fts3Expr **ppExpr, /* OUT: expression */
int *pnConsumed /* OUT: Number of bytes consumed */
){
sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
int rc;
sqlite3_tokenizer_cursor *pCursor;
Fts3Expr *pRet = 0;
*pnConsumed = n;
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
sqlite3_int64 nByte; /* total space to allocate */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
if( rc==SQLITE_OK ){
/* Check that this tokenization did not gobble up any " characters. Or,
** if enable_parenthesis is true, that it did not gobble up any
** open or close parenthesis characters either. If it did, call
** structure, followed by the array of Fts3PhraseToken
** structures. This pass only populates the Fts3PhraseToken array.
**
** Buffer zTemp: Contains copies of all tokens.
**
** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
** structures.
*/
rc = sqlite3Fts3OpenTokenizer(
pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
if( rc==SQLITE_OK ){
int ii;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
Fts3PhraseToken *pToken;
p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
**
** 1. It does not do expression rebalancing.
** 2. It does not check that the expression does not exceed the
** maximum allowable depth.
** 3. Even if it fails, *ppExpr may still be set to point to an
** expression tree. It should be deleted using sqlite3Fts3ExprFree()
** in this case.
*/
static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr /* OUT: Parsed query structure */
){
int nParsed;
int rc;
ParseContext sParse;
memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
return SQLITE_OK;
}
if( n<0 ){
n = (int)strlen(z);
** Column names must be nul-terminated strings.
**
** The iDefaultCol parameter should be passed the index of the table column
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
SQLITE_PRIVATE int sqlite3Fts3ExprParse(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr, /* OUT: Parsed query structure */
char **pzErr /* OUT: Error message (sqlite3_malloc) */
){
int rc = fts3ExprParseUnbalanced(
pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
);
/* Rebalance the expression. And check that its depth does not exceed
** SQLITE_FTS3_MAX_EXPR_DEPTH. */
if( rc==SQLITE_OK && *ppExpr ){
rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
if( rc==SQLITE_OK ){
rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
}
}
** separate component values into the single 64-bit integer value that
** can be used to query the %_segdir table.
**
** Specifically, each language-id/index combination is allocated 1024
** 64-bit integer level values ("absolute levels"). The main terms index
** for language-id 0 is allocate values 0-1023. The first prefix index
** (if any) for language-id 0 is allocated values 1024-2047. And so on.
** Language 1 indexes are allocated immediately following language 0.
**
** So, for a system with nPrefix prefix indexes configured, the block of
** absolute levels that corresponds to language-id iLangid and index
** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
*/
static sqlite3_int64 getAbsoluteLevel(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index in p->aIndex[] */
int iLevel /* Level of segments */
){
sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */
assert_fts3_nc( iLangid>=0 );
assert( p->nIndex>0 );
assert( iIndex>=0 && iIndex<p->nIndex );
iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
return iBase + iLevel;
}
/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement,
** return an SQLite error code.
**
** There is only ever one instance of this SQL statement compiled for
** The statement returns the following columns from the %_segdir table:
**
** 0: idx
** 1: start_block
** 2: leaves_end_block
** 3: end_block
** 4: root
*/
SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(
Fts3Table *p, /* FTS3 table */
int iLangid, /* Language being queried */
int iIndex, /* Index for p->aIndex[] */
int iLevel, /* Level to select (relative level) */
sqlite3_stmt **ppStmt /* OUT: Compiled statement */
){
int rc;
sqlite3_stmt *pStmt = 0;
assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
if( iLevel<0 ){
/* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
}
}
*ppStmt = pStmt;
return rc;
}
/*
** Append a single varint to a PendingList buffer. SQLITE_OK is returned
** if successful, or an SQLite error code otherwise.
/*
** Tokenize the nul-terminated string zText and add all tokens to the
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
Fts3Table *p, /* Table into which text will be inserted */
int iLangid, /* Language id to use */
const char *zText, /* Text of document to be inserted */
int iCol, /* Column into which text is being inserted */
u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
){
int rc;
int iStart = 0;
int iEnd = 0;
int iPos = 0;
int nWord = 0;
assert( pTokenizer && pModule );
/* If the user has inserted a NULL value, this function may be called with
** zText==0. In this case, add zero token entries to the hash table and
** return early. */
if( zText==0 ){
*pnWord = 0;
return SQLITE_OK;
}
rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
if( rc!=SQLITE_OK ){
return rc;
}
xNext = pModule->xNext;
while( SQLITE_OK==rc
&& SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
){
int i;
if( iPos>=nWord ) nWord = iPos+1;
}
/*
** Calling this function indicates that subsequent calls to
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(
Fts3Table *p, /* Full-text table handle */
int bDelete, /* True if this op is a delete */
int iLangid, /* Language id of row being written */
sqlite_int64 iDocid /* Docid of row being written */
){
assert( iLangid>=0 );
assert( bDelete==1 || bDelete==0 );
/* TODO(shess) Explore whether partially flushing the buffer on
** forced-flush would provide better performance. I suspect that if
** we ordered the doclists by size and flushed the largest until the
** buffer was half empty, that would let the less frequent terms
** generate longer doclists.
*/
if( iDocid<p->iPrevDocid
|| (iDocid==p->iPrevDocid && p->bPrevDelete==0)
|| p->iPrevLangid!=iLangid
|| p->nPendingData>p->nMaxPendingData
){
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc!=SQLITE_OK ) return rc;
}
p->iPrevDocid = iDocid;
p->iPrevLangid = iLangid;
p->bPrevDelete = bDelete;
return SQLITE_OK;
}
/*
** Discard the contents of the pending-terms hash tables.
*/
SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){
int i;
for(i=0; i<p->nIndex; i++){
/*
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(
Fts3Table *p,
int iLangid,
sqlite3_value **apVal,
u32 *aSz
){
int i; /* Iterator variable */
for(i=2; i<p->nColumn+2; i++){
int iCol = i-2;
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_value_text(apVal[i]);
int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]);
if( rc!=SQLITE_OK ){
return rc;
}
aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
}
}
return SQLITE_OK;
}
/*
}
if( p->bHasStat ){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
}
return rc;
}
/*
**
*/
static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
int iLangid = 0;
if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
return iLangid;
}
/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms(
int *pRC, /* Result code */
Fts3Table *p, /* The FTS table to delete from */
){
int rc;
sqlite3_stmt *pSelect;
assert( *pbFound==0 );
if( *pRC ) return;
rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pSelect) ){
int i;
int iLangid = langidFromSelect(p, pSelect);
i64 iDocid = sqlite3_column_int64(pSelect, 0);
rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid);
for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
int iCol = i-1;
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_column_text(pSelect, i);
rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
}
}
if( rc!=SQLITE_OK ){
sqlite3_reset(pSelect);
*pRC = rc;
return;
}
*pbFound = 1;
}
**
** However, if there are already FTS3_MERGE_COUNT indexes at the requested
** level, they are merged into a single level (iLevel+1) segment and the
** allocated index is 0.
**
** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
** returned. Otherwise, an SQLite error code is returned.
*/
static int fts3AllocateSegdirIdx(
Fts3Table *p,
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel,
int *piIdx
){
int rc; /* Return Code */
sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
int iNext = 0; /* Result of query pNextIdx */
assert( iLangid>=0 );
assert( p->nIndex>=1 );
/* Set variable iNext to the next available segdir index at level iLevel. */
rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(
pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
);
if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
iNext = sqlite3_column_int(pNextIdx, 0);
}
rc = sqlite3_reset(pNextIdx);
}
if( rc==SQLITE_OK ){
/* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
** full, merge all segments in level iLevel into a single iLevel+1
** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
*/
if( iNext>=MergeCount(p) ){
fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
*piIdx = 0;
}else{
*piIdx = iNext;
}
}
return rc;
}
/*
/*
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(
Fts3Table *p,
int iLangid,
int iIndex,
sqlite3_int64 *pnMax
){
sqlite3_stmt *pStmt;
int rc;
assert( iIndex>=0 && iIndex<p->nIndex );
/* Set pStmt to the compiled version of:
**
** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
**
** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
*/
rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pnMax = sqlite3_column_int64(pStmt, 0);
}
return sqlite3_reset(pStmt);
}
/*
** iAbsLevel is an absolute level that may be assumed to exist within
** the database. This function checks if it is the largest level number
** each of the SegReader objects in the array passed as the third
** argument, and
**
** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
** entries regardless of level if (iLevel<0).
**
** SQLITE_OK is returned if successful, otherwise an SQLite error code.
*/
static int fts3DeleteSegdir(
Fts3Table *p, /* Virtual table handle */
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel, /* Level of %_segdir entries to delete */
Fts3SegReader **apSegment, /* Array of SegReader objects */
int nReader /* Size of array apSegment */
){
int rc = SQLITE_OK; /* Return Code */
int i; /* Iterator variable */
sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */
for(i=0; rc==SQLITE_OK && i<nReader; i++){
rc = fts3DeleteSegment(p, apSegment[i]);
}
if( rc!=SQLITE_OK ){
return rc;
}
assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
if( iLevel==FTS3_SEGCURSOR_ALL ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pDelete, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(
pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
);
}
}
if( rc==SQLITE_OK ){
sqlite3_step(pDelete);
rc = sqlite3_reset(pDelete);
}
return rc;
** single segment with a level equal to the numerically largest level
** currently present in the database.
**
** If this function is called with iLevel<0, but there is only one
** segment in the database, SQLITE_DONE is returned immediately.
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
** an SQLite error code is returned.
*/
static int fts3SegmentMerge(
Fts3Table *p,
int iLangid, /* Language id to merge */
int iIndex, /* Index in p->aIndex[] to merge */
int iLevel /* Level to merge */
){
int rc; /* Return code */
int iIdx = 0; /* Index of new segment */
sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
i64 iMaxLevel = 0; /* Max level number for this index/langid */
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
|| iLevel>=0
);
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
if( rc!=SQLITE_OK ) goto finished;
}
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the numerically
** greatest segment level currently present in the database for this
** index. The idx of the new segment is always 0. */
if( csr.nSegment==1 && 0==fts3SegReaderIsPending(csr.apSegment[0]) ){
rc = SQLITE_DONE;
}
iNewLevel = iMaxLevel;
bIgnoreEmpty = 1;
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
assert( FTS3_SEGCURSOR_PENDING==-1 );
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
assert_fts3_nc( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert_fts3_nc(
iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL)
);
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
while( SQLITE_OK==rc ){
rc = sqlite3Fts3SegReaderStep(p, &csr);
if( rc!=SQLITE_ROW ) break;
rc = fts3SegWriterAdd(p, &pWriter, 1,
csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
}
if( rc!=SQLITE_OK ) goto finished;
assert_fts3_nc( pWriter || bIgnoreEmpty );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(
p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
);
if( rc!=SQLITE_OK ) goto finished;
}
if( pWriter ){
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
}
}
/*
** Flush the contents of pendingTerms to level 0 segments.
*/
SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
int i;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
/* Determine the auto-incr-merge setting if unknown. If enabled,
** estimate the number of leaf blocks of content to be written
*/
if( rc==SQLITE_OK && p->bHasStat
&& p->nAutoincrmerge==0xff && p->nLeafAdd>0
){
sqlite3_stmt *pStmt = 0;
sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
sqlite3_step(pStmt);
*pRC = sqlite3_reset(pStmt);
sqlite3_bind_null(pStmt, 2);
sqlite3_free(a);
}
/*
** Merge the entire database so that there is one segment for each
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
int bSeenDone = 0;
int rc;
sqlite3_stmt *pAllLangid = 0;
rc = sqlite3Fts3PendingTermsFlush(p);
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
}
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
int i;
int iLangid = sqlite3_column_int(pAllLangid, 0);
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
if( rc==SQLITE_DONE ){
bSeenDone = 1;
rc = SQLITE_OK;
}
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
sqlite3Fts3SegmentsClose(p);
return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}
/*
** This function is called when the user executes the following statement:
rc = SQLITE_NOMEM;
}else{
memset(aSz, 0, nByte);
aSzIns = &aSz[p->nColumn+1];
aSzDel = &aSzIns[p->nColumn+1];
}
}
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
int iCol;
int iLangid = langidFromSelect(p, pStmt);
rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0));
memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
if( p->abNotindexed[iCol]==0 ){
const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
}
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSz);
}
if( rc!=SQLITE_OK ){
sqlite3_finalize(pStmt);
pStmt = 0;
}else{
return rc;
}
/*
** Return a 64-bit checksum for the FTS index entry specified by the
** arguments to this function.
*/
static u64 fts3ChecksumEntry(
const char *zTerm, /* Pointer to buffer containing term */
int nTerm, /* Size of zTerm in bytes */
int iLangid, /* Language id for current row */
int iIndex, /* Index (0..Fts3Table.nIndex-1) */
i64 iDocid, /* Docid for current row. */
int iCol, /* Column number */
int iPos /* Position */
){
int i;
u64 ret = (u64)iDocid;
ret += (ret<<3) + iLangid;
ret += (ret<<3) + iIndex;
ret += (ret<<3) + iCol;
ret += (ret<<3) + iPos;
for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
return ret;
}
/*
** Return a checksum of all entries in the FTS index that correspond to
** language id iLangid. The checksum is calculated by XORing the checksums
** of each individual entry (see fts3ChecksumEntry()) together.
**
** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
** return value is undefined in this case.
*/
static u64 fts3ChecksumIndex(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id to return cksum for */
int iIndex, /* Index to cksum (0..p->nIndex-1) */
int *pRc /* OUT: Return code */
){
Fts3SegFilter filter;
Fts3MultiSegReader csr;
int rc;
u64 cksum = 0;
if( *pRc ) return 0;
memset(&filter, 0, sizeof(filter));
memset(&csr, 0, sizeof(csr));
filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
filter.flags |= FTS3_SEGMENT_SCAN;
rc = sqlite3Fts3SegReaderCursor(
p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
}
if( rc==SQLITE_OK ){
while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
char *pCsr = csr.aDoclist;
char *pEnd = &pCsr[csr.nDoclist];
pCsr += sqlite3Fts3GetVarintU(pCsr, &iVal);
if( p->bDescIdx ){
iDocid = (i64)((u64)iDocid - iVal);
}else{
iDocid = (i64)((u64)iDocid + iVal);
}
}
}else{
iPos += (iVal - 2);
cksum = cksum ^ fts3ChecksumEntry(
csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
(int)iCol, (int)iPos
);
}
}
}
}
}
sqlite3Fts3SegReaderFinish(&csr);
*pRc = rc;
** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
** to false before returning.
**
** If an error occurs (e.g. an OOM or IO error), return an SQLite error
** code. The final value of *pbOk is undefined in this case.
*/
SQLITE_PRIVATE int sqlite3Fts3IntegrityCheck(Fts3Table *p, int *pbOk){
int rc = SQLITE_OK; /* Return code */
u64 cksum1 = 0; /* Checksum based on FTS index contents */
u64 cksum2 = 0; /* Checksum based on %_content contents */
sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
/* This block calculates the checksum according to the FTS index. */
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
int iLangid = sqlite3_column_int(pAllLangid, 0);
int i;
for(i=0; i<p->nIndex; i++){
cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
/* This block calculates the checksum according to the %_content table */
if( rc==SQLITE_OK ){
sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
sqlite3_stmt *pStmt = 0;
char *zSql;
zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
sqlite3_free(zSql);
}
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iDocid = sqlite3_column_int64(pStmt, 0);
int iLang = langidFromSelect(p, pStmt);
int iCol;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
sqlite3_tokenizer_cursor *pT = 0;
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, -1, &pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
sqlite3_tokenizer_module const *pModule = pT->pModule;
assert( pCsr->isRequireSeek==0 );
iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
if( p->abNotindexed[i]==0 ){
const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
sqlite3_tokenizer_cursor *pTC = 0;
rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken = 0; /* Number of bytes in token */
int iDum1 = 0, iDum2 = 0; /* Dummy variables */
int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
Fts3PhraseToken *pPT = pDef->pToken;
if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
}
/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables. The schema of the virtual table being:
**
** CREATE TABLE <table name>(
** <user columns>,
** <table name> HIDDEN,
** docid HIDDEN,
** <langid> HIDDEN
** );
**
**
*/
SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
sqlite3_vtab *pVtab, /* FTS3 vtab object */
int nArg, /* Size of argument array */
sqlite3_value **apVal, /* Array of arguments */
sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
){
}
/* If this is a DELETE or UPDATE operation, remove the old record. */
if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
}
/* If this is an INSERT or UPDATE operation, insert the new record. */
if( nArg>1 && rc==SQLITE_OK ){
int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
if( bInsertDone==0 ){
rc = fts3InsertData(p, apVal, pRowid);
if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
rc = FTS_CORRUPT_VTAB;
}
}
if( rc==SQLITE_OK ){
rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid);
}
if( rc==SQLITE_OK ){
assert( p->iPrevDocid==*pRowid );
rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSzIns);
}
nChng++;
}
if( p->bFts4 ){
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
}
**
** ....X.....X....
**
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document
** actually contains terms that follow the final highlighted term.
*/
static int fts3SnippetShift(
Fts3Table *pTab, /* FTS3 table snippet comes from */
int iLangid, /* Language id to use in tokenizing */
int nSnippet, /* Number of tokens desired for snippet */
const char *zDoc, /* Document text to extract snippet from */
int nDoc, /* Size of buffer zDoc in bytes */
int *piPos, /* IN/OUT: First token of snippet */
u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
){
u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
if( hlmask ){
int nLeft; /* Tokens to the left of first highlight */
int nShift; /* Number of tokens to shift snippet by */
int iCurrent = 0; /* Token counter */
int rc; /* Return Code */
sqlite3_tokenizer_module *pMod;
sqlite3_tokenizer_cursor *pC;
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
** or more tokens in zDoc/nDoc.
*/
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
return rc;
}
while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
}
pMod->xClose(pC);
if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
if( zDoc==0 ){
if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
return SQLITE_OK;
}
nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
if( rc!=SQLITE_OK ){
return rc;
}
while( rc==SQLITE_OK ){
const char *ZDUMMY; /* Dummy argument used with tokenizer */
int DUMMY1 = -1; /* Dummy argument used with tokenizer */
int iBegin = 0; /* Offset in zDoc of start of token */
int iFin = 0; /* Offset in zDoc of end of token */
int isHighlight = 0; /* True for highlighted terms */
** Then break out of the loop. */
rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
}
break;
}
if( iCurrent<iPos ){ continue; }
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(
pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
);
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
** required. They are required if (a) this is not the first fragment,
** or (b) this fragment does not begin at position 0 of its column.
*/
if( rc==SQLITE_OK ){
if( iPos>0 || iFragment>0 ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
if( zDoc==0 ){
if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
continue;
}
rc = SQLITE_NOMEM;
goto offsets_out;
}
/* Initialize a tokenizer iterator to iterate through column iCol. */
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
zDoc, nDoc, &pC
);
if( rc!=SQLITE_OK ) goto offsets_out;
rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
while( rc==SQLITE_OK ){
int i; /* Used to loop through terms */
int iMinPos = 0x7FFFFFFF; /* Position of next token */
TermOffset *pTerm = 0; /* TermOffset associated with next token */
** then the RBU database should contain:
**
** CREATE TABLE data_x1(a, b, rbu_rowid, rbu_control);
**
** All non-hidden columns (i.e. all columns matched by "SELECT *") of the
** target table must be present in the input table. For virtual tables,
** hidden columns are optional - they are updated by RBU if present in
** the input table, or not otherwise. For example, to write to an fts4
** table with a hidden languageid column such as:
**
** CREATE VIRTUAL TABLE ft1 USING fts4(a, b, languageid='langid');
**
** Either of the following input table schemas may be used:
**
** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control);
** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control);
**
** For each row to INSERT into the target database as part of the RBU
** update, the corresponding data_% table should contain a single record
** with the "rbu_control" column set to contain integer value 0. The
** other columns should be set to the values that make up the new record
** to insert.
**
** If the target database table has an INTEGER PRIMARY KEY, it is not
** possible to insert a NULL value into the IPK column. Attempting to
**
** The rowid for each segment leaf is found by passing the segment id and
** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
** sequentially starting from 1.
*/
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
#define fts5_dri(segid, dlidx, height, pgno) ( \
((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
((i64)(height) << (FTS5_DATA_PAGE_B)) + \
((i64)(pgno)) \
)
#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
#define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
#ifdef SQLITE_DEBUG
static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif
/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
/* Error state. */
int rc; /* Current error code */
int flushRc;
/* State used by the fts5DataXXX() functions. */
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
sqlite3_stmt *pIdxSelect;
sqlite3_stmt *pIdxNextSelect;
int nRead; /* Total number of blocks read */
sqlite3_stmt *pDeleteFromIdx;
sqlite3_stmt *pDataVersion;
i64 iStructVersion; /* data_version when pStruct read */
Fts5Structure *pStruct; /* Current db structure (or NULL) */
};
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the
** other Fts5StructureXXX types as components.
**
** nOriginCntr:
** This value is set to non-zero for structure records created for
** contentlessdelete=1 tables only. In that case it represents the
** origin value to apply to the next top-level segment created.
*/
struct Fts5StructureSegment {
int iSegid; /* Segment id */
int pgnoFirst; /* First leaf page number in segment */
int pgnoLast; /* Last leaf page number in segment */
/* contentlessdelete=1 tables only: */
u64 iOrigin1;
u64 iOrigin2;
int nPgTombstone; /* Number of tombstone hash table pages */
u64 nEntryTombstone; /* Number of tombstone entries that "count" */
u64 nEntry; /* Number of rows in this segment */
};
/* Size (in bytes) of an Fts5Structure object holding up to N levels */
#define SZ_FTS5STRUCTURE(N) \
(offsetof(Fts5Structure,aLevel) + (N)*sizeof(Fts5StructureLevel))
/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
int pgno; /* Page number for this page */
int iPrevPgidx; /* Previous value written into pgidx */
Fts5Buffer buf; /* Buffer containing leaf data */
Fts5Buffer pgidx; /* Buffer containing page-index */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
int pgno; /* Page number for this page */
int bPrevValid; /* True if iPrev is valid */
i64 iPrev; /* Previous rowid value written to page */
Fts5Buffer buf; /* Buffer containing page data */
};
struct Fts5SegWriter {
int iSegid; /* Segid to write to */
Fts5PageWriter writer; /* PageWriter object */
i64 iPrevRowid; /* Previous rowid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
/* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
int nDlidx; /* Allocated size of aDlidx[] array */
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
/* Values to insert into the %_idx table */
Fts5Buffer btterm; /* Next term to insert into %_idx table */
int iBtPage; /* Page number corresponding to btterm */
Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
/* Next method */
void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
/* The page and offset from which the current term was read. The offset
** is the offset of the first rowid in the current doclist. */
int iTermLeafPgno;
int iTermLeafOffset;
int iPgidxOff; /* Next offset in pgidx */
int iEndofDoclist;
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
int iRowidOffset; /* Current entry in aRowidOffset[] */
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
int *aRowidOffset; /* Array of offset to rowid fields */
Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
/* Variables populated based on current entry. */
iRowid, pIdx->pConfig->zName
);
return SQLITE_CORRUPT_VTAB;
}
#define FTS5_CORRUPT_ROWID(pIdx, iRowid) fts5IndexCorruptRowid(pIdx, iRowid)
static int fts5IndexCorruptIter(Fts5Index *pIdx, Fts5SegIter *pIter){
pIdx->rc = FTS5_CORRUPT;
sqlite3Fts5ConfigErrmsg(pIdx->pConfig,
"fts5: corruption on page %d, segment %d, table \"%s\"",
pIter->iLeafPgno, pIter->pSeg->iSegid, pIdx->pConfig->zName
);
return SQLITE_CORRUPT_VTAB;
}
#define FTS5_CORRUPT_ITER(pIdx, pIter) fts5IndexCorruptIter(pIdx, pIter)
static int fts5IndexCorruptIdx(Fts5Index *pIdx){
pIdx->rc = FTS5_CORRUPT;
sqlite3Fts5ConfigErrmsg(pIdx->pConfig,
"fts5: corruption in table \"%s\"", pIdx->pConfig->zName
);
int iOff; /* Current offset into pData */
int bEof; /* At EOF already */
int iFirstOff; /* Used by reverse iterators */
/* Output variables */
int iLeafPgno; /* Page number of current leaf page */
i64 iRowid; /* First rowid on leaf iLeafPgno */
};
struct Fts5DlidxIter {
int nLvl;
int iSegid;
Fts5DlidxLvl aLvl[FLEXARRAY];
};
/* Size (in bytes) of an Fts5DlidxIter object with up to N levels */
#define SZ_FTS5DLIDXITER(N) \
(offsetof(Fts5DlidxIter,aLvl)+(N)*sizeof(Fts5DlidxLvl))
static void fts5PutU16(u8 *aOut, u16 iVal){
aOut[0] = (iVal>>8);
aOut[1] = (iVal&0xFF);
if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
}
sqlite3_bind_int64(p->pDeleter, 1, iFirst);
sqlite3_bind_int64(p->pDeleter, 2, iLast);
sqlite3_step(p->pDeleter);
p->rc = sqlite3_reset(p->pDeleter);
}
/*
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
int iSegid = pSeg->iSegid;
i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
fts5DataDelete(p, iFirst, iLast);
if( pSeg->nPgTombstone ){
i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
fts5DataDelete(p, iTomb1, iTomb2);
}
if( p->pIdxDeleter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
"DELETE FROM '%q'.'%q_idx' WHERE segid=?",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
sqlite3_step(p->pIdxDeleter);
p->rc = sqlite3_reset(p->pIdxDeleter);
}
}
/*
** Release a reference to an Fts5Structure object returned by an earlier
** call to fts5StructureRead() or fts5StructureDecode().
*/
static void fts5StructureRelease(Fts5Structure *pStruct){
if( rc==SQLITE_OK ){
pLvl->nSeg = nTotal;
for(iSeg=0; iSeg<nTotal; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
if( i>=nData ){
rc = FTS5_CORRUPT;
break;
}
assert( pSeg!=0 );
i += fts5GetVarint32(&pData[i], pSeg->iSegid);
i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
if( bStructureV2 ){
i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
i += fts5GetVarint(&pData[i], &pSeg->nEntry);
nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
}
if( pTest ){
int i, j;
assert_nc( p->pStruct->nSegment==pTest->nSegment );
assert_nc( p->pStruct->nLevel==pTest->nLevel );
for(i=0; i<pTest->nLevel; i++){
assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
for(j=0; j<pTest->aLevel[i].nSeg; j++){
Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
assert_nc( p1->iSegid==p2->iSegid );
assert_nc( p1->pgnoFirst==p2->pgnoFirst );
assert_nc( p1->pgnoLast==p2->pgnoLast );
}
}
fts5StructureRelease(pTest);
}
}
#endif
if( p->rc!=SQLITE_OK ) return 0;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
int iSeg; /* Used to iterate through segments */
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
assert( pLvl->nMerge<=pLvl->nSeg );
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
if( pStruct->nOriginCntr>0 ){
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
}
}
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
assert( iLvl<pIter->nLvl );
if( fts5DlidxLvlNext(pLvl) ){
if( (iLvl+1) < pIter->nLvl ){
fts5DlidxIterNextR(p, pIter, iLvl+1);
if( pLvl[1].bEof==0 ){
fts5DataRelease(pLvl->pData);
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
);
if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
}
}
}
return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
return fts5DlidxIterNextR(p, pIter, 0);
for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
while( fts5DlidxLvlNext(pLvl)==0 );
pLvl->bEof = 0;
if( i>0 ){
Fts5DlidxLvl *pChild = &pLvl[-1];
fts5DataRelease(pChild->pData);
memset(pChild, 0, sizeof(Fts5DlidxLvl));
pChild->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
);
}
}
}
/*
** Move the iterator passed as the only argument to the previous entry.
*/
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
int iOff = pLvl->iOff;
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
assert( iLvl<pIter->nLvl );
if( fts5DlidxLvlPrev(pLvl) ){
if( (iLvl+1) < pIter->nLvl ){
fts5DlidxIterPrevR(p, pIter, iLvl+1);
if( pLvl[1].bEof==0 ){
fts5DataRelease(pLvl->pData);
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
);
if( pLvl->pData ){
while( fts5DlidxLvlNext(pLvl)==0 );
pLvl->bEof = 0;
}
}
}
}
return pIter->aLvl[0].bEof;
for(i=0; i<pIter->nLvl; i++){
fts5DataRelease(pIter->aLvl[i].pData);
}
sqlite3_free(pIter);
}
}
static Fts5DlidxIter *fts5DlidxIterInit(
Fts5Index *p, /* Fts5 Backend to iterate within */
int bRev, /* True for ORDER BY ASC */
int iSegid, /* Segment id */
int iLeafPg /* Leaf page number to load dlidx for */
){
Fts5DlidxIter *pIter = 0;
int i;
int bDone = 0;
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1);
Fts5DlidxIter *pNew;
pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
if( pNew==0 ){
p->rc = SQLITE_NOMEM;
}else{
i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
pIter = pNew;
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p, iRowid);
if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
bDone = 1;
}
pIter->nLvl = i+1;
}
}
if( p->rc==SQLITE_OK ){
pIter->iSegid = iSegid;
if( bRev==0 ){
fts5DlidxIterFirst(pIter);
}else{
fts5DlidxIterLast(p, pIter);
}
}
if( p->rc!=SQLITE_OK ){
fts5DlidxIterFree(pIter);
pIter = 0;
){
Fts5Data *pLeaf;
Fts5StructureSegment *pSeg = pIter->pSeg;
fts5DataRelease(pIter->pLeaf);
pIter->iLeafPgno++;
if( pIter->pNextLeaf ){
pIter->pLeaf = pIter->pNextLeaf;
pIter->pNextLeaf = 0;
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
pIter->pLeaf = fts5LeafRead(p,
FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
);
}else{
pIter->pLeaf = 0;
}
pLeaf = pIter->pLeaf;
if( pLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf;
if( fts5LeafIsTermless(pLeaf) ){
pIter->iEndofDoclist = pLeaf->nn+1;
}else{
pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
pIter->iEndofDoclist
);
}
}
}
/*
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
return;
}
pIter->term.n = nKeep;
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
assert( pIter->term.n<=pIter->term.nSpace );
iOff += nNew;
pIter->iTermLeafOffset = iOff;
pIter->iTermLeafPgno = pIter->iLeafPgno;
pIter->iLeafOffset = iOff;
if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
}else{
int nExtra;
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
pIter->iEndofDoclist += nExtra;
}
fts5SegIterLoadRowid(p, pIter);
}
static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
do {
fts5SegIterNextPage(p, pIter);
}while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
}
if( p->rc==SQLITE_OK && pIter->pLeaf ){
pIter->iLeafOffset = 4;
assert( pIter->pLeaf!=0 );
assert_nc( pIter->pLeaf->nn>4 );
assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
fts5SegIterLoadTerm(p, pIter, 0);
fts5SegIterLoadNPos(p, pIter);
fts5SegIterAllocTombstone(p, pIter);
}
}
/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
**
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
assert( pIter->flags & FTS5_SEGITER_REVERSE );
assert( pIter->flags & FTS5_SEGITER_ONETERM );
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
Fts5Data *pNew;
pIter->iLeafPgno--;
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
pIter->pSeg->iSegid, pIter->iLeafPgno
));
if( pNew ){
/* iTermLeafOffset may be equal to szLeaf if the term is the last
** thing on the page - i.e. the first rowid is on the following page.
** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
assert( pIter->pLeaf==0 );
if( pIter->iTermLeafOffset<pNew->szLeaf ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = pIter->iTermLeafOffset;
while( iOff==0 ){
fts5SegIterNextPage(p, pIter);
pLeaf = pIter->pLeaf;
if( pLeaf==0 ) break;
ASSERT_SZLEAF_OK(pLeaf);
if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
if( pLeaf->nn>pLeaf->szLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
&pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
);
}
}
else if( pLeaf->nn>pLeaf->szLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
&pLeaf->p[pLeaf->szLeaf], iOff
);
pIter->iLeafOffset = iOff;
pIter->iEndofDoclist = iOff;
bNewTerm = 1;
}
assert_nc( iOff<pLeaf->szLeaf );
if( iOff>pLeaf->szLeaf ){
FTS5_CORRUPT_ITER(p, pIter);
return;
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
Fts5DlidxIter *pDlidx = pIter->pDlidx;
Fts5Data *pLast = 0;
int pgnoLast = 0;
if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = fts5DlidxIterPgno(pDlidx);
pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
}else{
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of
** position-list content for the current rowid. Back it up so that it
** points to the start of the position-list size field. */
int iPoslist;
if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
iPoslist = pIter->iTermLeafOffset;
}else{
/* If this condition is true then the largest rowid for the current
** term may not be stored on the current page. So search forward to
** see where said rowid really is. */
if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
int pgno;
Fts5StructureSegment *pSeg = pIter->pSeg;
/* The last rowid in the doclist may not be on the current page. Search
** forward to find the page containing the last rowid. */
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
Fts5Data *pNew = fts5LeafRead(p, iAbs);
if( pNew ){
int iRowid, bTermless;
iRowid = fts5LeafFirstRowidOff(pNew);
bTermless = fts5LeafIsTermless(pNew);
if( iRowid ){
SWAPVAL(Fts5Data*, pNew, pLast);
pgnoLast = pgno;
}
fts5DataRelease(pNew);
fts5SegIterReverseInitPage(p, pIter);
}
/*
** Iterator pIter currently points to the first rowid of a doclist.
** There is a doclist-index associated with the final term on the current
** page. If the current term is the last term on the page, load the
** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
*/
static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
int iSeg = pIter->pSeg->iSegid;
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
assert( pIter->flags & FTS5_SEGITER_ONETERM );
assert( pIter->pDlidx==0 );
/* Check if the current doclist ends on this page. If it does, return
** early without loading the doclist-index (as it belongs to a different
** term. */
if( pIter->iTermLeafPgno==pIter->iLeafPgno
const u8 *pTerm, int nTerm /* Term to search for */
){
u32 iOff;
const u8 *a = pIter->pLeaf->p;
u32 n = (u32)pIter->pLeaf->nn;
u32 nMatch = 0;
u32 nKeep = 0;
u32 nNew = 0;
u32 iTermOff;
u32 iPgidx; /* Current offset in pgidx */
int bEndOfPage = 0;
assert( p->rc==SQLITE_OK );
iPgidx = (u32)pIter->pLeaf->szLeaf;
iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
iOff = iTermOff;
if( iOff>n ){
FTS5_CORRUPT_ITER(p, pIter);
return;
}
while( 1 ){
/* Figure out how many new bytes are in this term */
fts5FastGetVarint32(a, iOff, nNew);
if( i==nNew ){
goto search_success;
}else{
goto search_failed;
}
}else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
goto search_failed;
}
}
if( iPgidx>=n ){
bEndOfPage = 1;
break;
}
iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
iTermOff += nKeep;
iOff = iTermOff;
if( iOff>=n ){
FTS5_CORRUPT_ITER(p, pIter);
return;
}
/* Read the nKeep field of the next term. */
fts5FastGetVarint32(a, iOff, nKeep);
if( bGe==0 ){
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
return;
}else if( bEndOfPage ){
do {
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ) return;
a = pIter->pLeaf->p;
if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
iPgidx = (u32)pIter->pLeaf->szLeaf;
iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
FTS5_CORRUPT_ITER(p, pIter);
return;
}else{
nKeep = 0;
iTermOff = iOff;
n = (u32)pIter->pLeaf->nn;
iOff += fts5GetVarint32(&a[iOff], nNew);
break;
}
FTS5_CORRUPT_ITER(p, pIter);
return;
}
pIter->iLeafOffset = iOff + nNew;
pIter->iTermLeafOffset = pIter->iLeafOffset;
pIter->iTermLeafPgno = pIter->iLeafPgno;
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
if( iPgidx>=n ){
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
}else{
int nExtra;
iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
pIter->iEndofDoclist = iTermOff + nExtra;
}
pIter->iPgidxOff = iPgidx;
fts5SegIterLoadRowid(p, pIter);
fts5SegIterLoadNPos(p, pIter);
}
static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
if( p->pIdxSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
"segid=? AND term<=? ORDER BY term DESC LIMIT 1",
pConfig->zDb, pConfig->zName
));
}
return p->pIdxSelect;
}
/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
assert( pTerm && nTerm );
memset(pIter, 0, sizeof(*pIter));
pIter->pSeg = pSeg;
/* This block sets stack variable iPg to the leaf page number that may
** contain term (pTerm/nTerm), if it is present in the segment. */
pIdxSelect = fts5IdxSelectStmt(p);
if( p->rc ) return;
sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
i64 val = sqlite3_column_int(pIdxSelect, 0);
iPg = (int)(val>>1);
bDlidx = (val & 0x0001);
}
p->rc = sqlite3_reset(pIdxSelect);
sqlite3_bind_null(pIdxSelect, 2);
if( iPg<pSeg->pgnoFirst ){
/*
** SQL used by fts5SegIterNextInit() to find the page to open.
*/
static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
if( p->pIdxNextSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
"segid=? AND term>? ORDER BY term ASC LIMIT 1",
pConfig->zDb, pConfig->zName
));
}
return p->pIdxNextSelect;
}
/*
** This is similar to fts5SegIterSeekInit(), except that it initializes
** the segment iterator to point to the first term following the page
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){
int iPg = -1; /* Page of segment to open */
int bDlidx = 0;
sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
pSel = fts5IdxNextStmt(p);
if( pSel ){
assert( p->rc==SQLITE_OK );
sqlite3_bind_int(pSel, 1, pSeg->iSegid);
sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
if( sqlite3_step(pSel)==SQLITE_ROW ){
i64 val = sqlite3_column_int64(pSel, 0);
iPg = (int)(val>>1);
bDlidx = (val & 0x0001);
}
p->rc = sqlite3_reset(pSel);
sqlite3_bind_null(pSel, 2);
if( p->rc ) return;
pIter->flags |= FTS5_SEGITER_ONETERM;
if( iPg>=0 ){
pIter->iLeafPgno = iPg - 1;
fts5SegIterNextPage(p, pIter);
fts5SegIterSetNext(p, pIter);
}
if( pIter->pLeaf ){
const u8 *a = pIter->pLeaf->p;
int iTermOff = 0;
pIter->iPgidxOff = pIter->pLeaf->szLeaf;
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
pIter->iLeafOffset = iTermOff;
fts5SegIterLoadTerm(p, pIter, 0);
fts5SegIterLoadNPos(p, pIter);
if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
assert( p->rc!=SQLITE_OK ||
fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0
);
}
}
if( pSeg->pLeaf && pArray ){
/* Figure out which page the rowid might be present on. */
int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
assert( iPg>=0 );
/* If tombstone hash page iPg has not yet been loaded from the
** database, load it now. */
if( pArray->apTombstone[iPg]==0 ){
pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
);
if( pArray->apTombstone[iPg]==0 ) return 0;
}
return fts5IndexTombstoneQuery(
pArray->apTombstone[iPg],
pArray->nTombstone,
pSeg->iRowid
);
}
xChunk(p, pCtx, pChunk, nChunk);
nRem -= nChunk;
fts5DataRelease(pData);
if( nRem<=0 ){
break;
}else if( pSeg->pSeg==0 ){
FTS5_CORRUPT_IDX(p);
return;
}else{
pgno++;
pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
if( pData==0 ) break;
pChunk = &pData->p[4];
nChunk = MIN(nRem, pData->szLeaf - 4);
if( pgno==pgnoSave ){
assert( pSeg->pNextLeaf==0 );
pSeg->pNextLeaf = pData;
pData = 0;
}
}
}
/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by
** any currently existing segment. If a free segment id cannot be found,
** SQLITE_FULL is returned.
**
** If an error has already occurred, this function is a no-op. 0 is
** returned in this case.
*/
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
int iSegid = 0;
if( p->rc==SQLITE_OK ){
if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
p->rc = SQLITE_FULL;
}else{
/* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
** array is 63 elements, or 252 bytes, in size. */
u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
int iLvl, iSeg;
int i;
u32 mask;
memset(aUsed, 0, sizeof(aUsed));
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
}
}
}
for(i=0; aUsed[i]==0xFFFFFFFF; i++);
mask = aUsed[i];
for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
iSegid += 1 + i*32;
#ifdef SQLITE_DEBUG
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
}
}
assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
{
sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
if( p->rc==SQLITE_OK ){
u8 aBlob[2] = {0xff, 0xff};
sqlite3_bind_int(pIdxSelect, 1, iSegid);
sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
p->rc = sqlite3_reset(pIdxSelect);
sqlite3_bind_null(pIdxSelect, 2);
}
}
#endif
}
}
return iSegid;
}
/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
assert( p->pHash || p->nPendingData==0 );
if( p->pHash ){
sqlite3Fts5HashClear(p->pHash);
p->nPendingData = 0;
int bFlush /* If true, write dlidx to disk */
){
int i;
assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
for(i=0; i<pWriter->nDlidx; i++){
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
if( pDlidx->buf.n==0 ) break;
if( bFlush ){
assert( pDlidx->pgno!=0 );
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
}
sqlite3Fts5BufferZero(&pDlidx->buf);
pDlidx->bPrevValid = 0;
}
}
/*
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag;
assert( pWriter->iBtPage || pWriter->nEmpty==0 );
if( pWriter->iBtPage==0 ) return;
bFlag = fts5WriteFlushDlidx(p, pWriter);
if( p->rc==SQLITE_OK ){
const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
/* The following was already done in fts5WriteInit(): */
/* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
sqlite3_step(p->pIdxWriter);
p->rc = sqlite3_reset(p->pIdxWriter);
sqlite3_bind_null(p->pIdxWriter, 2);
}
pWriter->iBtPage = 0;
}
/*
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
if( pDlidx->buf.n>=p->pConfig->pgsz ){
/* The current doclist-index page is full. Write it to disk and push
** a copy of iRowid (which will become the first rowid on the next
** doclist-index leaf page) up into the next level of the b-tree
** hierarchy. If the node being flushed is currently the root node,
** also push its first rowid upwards. */
pDlidx->buf.p[0] = 0x01; /* Not the root node */
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
fts5WriteDlidxGrow(p, pWriter, i+2);
pDlidx = &pWriter->aDlidx[i];
if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
/* This was the root node. Push its first rowid up to the new root. */
pDlidx[1].pgno = pDlidx->pgno;
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
pDlidx->bPrevValid = 1;
pDlidx->iPrev = iRowid;
}
}
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
/* Set the szLeaf header field. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
assert( pPage->pgidx.n==0 );
fts5WriteBtreeNoTerm(p, pWriter);
}else{
/* Append the pgidx to the page buffer. Set the szLeaf header field. */
fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
}
/* Write the page out to disk */
iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->pgidx);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
pPage->iPrevPgidx = 0;
pPage->pgno++;
/* Increase the leaves written counter */
pWriter->nLeafWritten++;
/* The new leaf holds no terms or rowids */
pWriter->bFirstTermInPage = 1;
pWriter->bFirstRowidInPage = 1;
}
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static void fts5WriteAppendTerm(
Fts5Index *p,
Fts5SegWriter *pWriter,
int nTerm, const u8 *pTerm
){
int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->writer;
Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
int nMin = MIN(pPage->term.n, nTerm);
assert( p->rc==SQLITE_OK );
assert( pPage->buf.n>=4 );
assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
/* If the current leaf page is full, flush it to disk. */
if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
if( pPage->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
if( p->rc!=SQLITE_OK ) return;
}
fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
}
/* TODO1: Updating pgidx here. */
pPgidx->n += sqlite3Fts5PutVarint(
&pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
);
pPage->iPrevPgidx = pPage->buf.n;
#if 0
fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
pPgidx->n += 2;
#endif
if( pWriter->bFirstTermInPage ){
nPrefix = 0;
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
** the segment b-tree. In this case it is necessary to add a term to
** the b-tree hierarchy that is (a) larger than the largest term
** already written to the segment and (b) smaller than or equal to
** this term. In other words, a prefix of (pTerm/nTerm) that is one
** Append a rowid and position-list size field to the writers output.
*/
static void fts5WriteAppendRowid(
Fts5Index *p,
Fts5SegWriter *pWriter,
i64 iRowid
){
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->writer;
if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
fts5WriteFlushLeaf(p, pWriter);
}
/* If this is to be the first rowid written to the page, set the
** rowid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */
if( pWriter->bFirstRowidInPage ){
fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
fts5WriteDlidxAppend(p, pWriter, iRowid);
}
Fts5SegWriter *pWriter,
const u8 *aData,
int nData
){
Fts5PageWriter *pPage = &pWriter->writer;
const u8 *a = aData;
int n = nData;
assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK );
while( p->rc==SQLITE_OK
&& (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
){
int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
int nCopy = 0;
while( nCopy<nReq ){
i64 dummy;
nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
}
fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
a += nCopy;
n -= nCopy;
fts5WriteFlushLeaf(p, pWriter);
}
if( pLeaf->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
}
*pnLeaf = pLeaf->pgno-1;
if( pLeaf->pgno>1 ){
fts5WriteFlushBtree(p, pWriter);
}
}
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
fts5BufferFree(&pLeaf->pgidx);
fts5BufferFree(&pWriter->btterm);
for(i=0; i<pWriter->nDlidx; i++){
sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
}
sqlite3_free(pWriter->aDlidx);
}
static void fts5WriteInit(
Fts5Index *p,
Fts5SegWriter *pWriter,
int iSegid
){
const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = iSegid;
fts5WriteDlidxGrow(p, pWriter, 1);
pWriter->writer.pgno = 1;
pWriter->bFirstTermInPage = 1;
pWriter->iBtPage = 1;
assert( pWriter->writer.buf.n==0 );
assert( pWriter->writer.pgidx.n==0 );
/* Grow the two buffers to pgsz + padding bytes in size. */
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
if( p->pIdxWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
"INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
/* Initialize the 4-byte leaf-page header to 0x00. */
memset(pWriter->writer.buf.p, 0, 4);
pWriter->writer.buf.n = 4;
/* Bind the current output segment id to the index-writer. This is an
** optimization over binding the same value over and over as rows are
** inserted into %_idx by the current writer. */
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
int i;
}else if( pSeg->pLeaf==0 ){
/* All keys from this input segment have been transfered to the output.
** Set both the first and last page-numbers to 0 to indicate that the
** segment is now empty. */
pSeg->pSeg->pgnoLast = 0;
pSeg->pSeg->pgnoFirst = 0;
}else{
int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
i64 iLeafRowid;
Fts5Data *pData;
int iId = pSeg->pSeg->iSegid;
u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
pData = fts5LeafRead(p, iLeafRowid);
if( pData ){
if( iOff>pData->szLeaf ){
/* This can occur if the pages that the segments occupy overlap - if
** a single page has been assigned to more than one segment. In
** this case a prior iteration of this loop may have corrupted the
** segment currently being trimmed. */
fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
if( p->rc==SQLITE_OK ){
/* Set the szLeaf field */
fts5PutU16(&buf.p[2], (u16)buf.n);
}
/* Set up the new page-index array */
fts5BufferAppendVarint(&p->rc, &buf, 4);
if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
&& pSeg->iEndofDoclist<pData->szLeaf
&& pSeg->iPgidxOff<=pData->nn
){
int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
fts5BufferAppendBlob(&p->rc, &buf,
pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
);
}
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
}
fts5DataRelease(pData);
}
}
assert( pLvl->nMerge<=pLvl->nSeg );
memset(&writer, 0, sizeof(Fts5SegWriter));
memset(&term, 0, sizeof(Fts5Buffer));
if( pLvl->nMerge ){
pLvlOut = &pStruct->aLevel[iLvl+1];
assert( pLvlOut->nSeg>0 );
nInput = pLvl->nMerge;
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
fts5WriteInit(p, &writer, pSeg->iSegid);
writer.writer.pgno = pSeg->pgnoLast+1;
writer.iBtPage = 0;
}else{
int iSegid = fts5AllocateSegid(p, pStruct);
/* Extend the Fts5Structure object as required to ensure the output
** segment exists. */
if( iLvl==pStruct->nLevel-1 ){
fts5StructureAddLevel(&p->rc, ppStruct);
pStruct = *ppStruct;
}
fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
if( p->rc ) return;
pLvl = &pStruct->aLevel[iLvl];
pLvlOut = &pStruct->aLevel[iLvl+1];
fts5WriteInit(p, &writer, iSegid);
/* Add the new segment to the output level */
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
pLvlOut->nSeg++;
pSeg->pgnoFirst = 1;
pSeg->iSegid = iSegid;
pStruct->nSegment++;
/* Read input from all segments in the input level */
nInput = pLvl->nSeg;
/* Set the range of origins that will go into the output segment. */
if( pStruct->nOriginCntr>0 ){
pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
}
if( (ret + i) > nMax ) break;
ret += i;
}
}
return ret;
}
/*
** Execute the SQL statement:
**
** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
**
** This is used when a secure-delete operation removes the last term
** from a segment leaf page. In that case the %_idx entry is removed
** too. This is done to ensure that if all instances of a token are
** removed from an fts5 database in secure-delete mode, no trace of
** the token itself remains in the database.
*/
static void fts5SecureDeleteIdxEntry(
Fts5Index *p, /* FTS5 backend object */
int iSegid, /* Id of segment to delete entry for */
int iPgno /* Page number within segment */
){
if( iPgno!=1 ){
assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
if( p->pDeleteFromIdx==0 ){
fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
"DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
p->pConfig->zDb, p->pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
sqlite3_step(p->pDeleteFromIdx);
p->rc = sqlite3_reset(p->pDeleteFromIdx);
}
}
}
/*
** This is called when a secure-delete operation removes a position-list
** that overflows onto segment page iPgno of segment pSeg. This function
int iPgno,
int *pbLastInDoclist
){
const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
int pgno;
Fts5Data *pLeaf = 0;
assert( iPgno!=1 );
*pbLastInDoclist = 1;
for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
int iNext = 0;
u8 *aPg = 0;
pLeaf = fts5DataRead(p, iRowid);
if( pLeaf==0 ) break;
aPg = pLeaf->p;
iNext = fts5GetU16(&aPg[0]);
if( iNext!=0 ){
*pbLastInDoclist = 0;
/*
** Completely remove the entry that pSeg currently points to from
** the database.
*/
static void fts5DoSecureDelete(
Fts5Index *p,
Fts5SegIter *pSeg
){
const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
int iSegid = pSeg->pSeg->iSegid;
u8 *aPg = pSeg->pLeaf->p;
int nPg = pSeg->pLeaf->nn;
int iPgIdx = pSeg->pLeaf->szLeaf;
u64 iDelta = 0;
int iNextOff = 0;
int iOff = 0;
int nIdx = 0;
u8 *aIdx = 0;
int bLastInDoclist = 0;
iNextOff += nSuffix2;
}
}
}else if( iStart==4 ){
int iPgno;
assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
/* The entry being removed may be the only position list in
** its doclist. */
for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
int bEmpty = (pPg && pPg->nn==4);
fts5DataRelease(pPg);
if( bEmpty==0 ) break;
}
if( iPgno==pSeg->iTermLeafPgno ){
i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
Fts5Data *pTerm = fts5DataRead(p, iId);
if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
int nTermIdx = pTerm->nn - pTerm->szLeaf;
int iTermIdx = 0;
int iTermOff = 0;
while( 1 ){
u32 iVal = 0;
int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
if( (iTermIdx+nByte)>=nTermIdx ) break;
iTermIdx += nByte;
}
nTermIdx = iTermIdx;
memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
fts5PutU16(&pTerm->p[2], iTermOff);
fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
if( nTermIdx==0 ){
fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
}
}
fts5DataRelease(pTerm);
}
}
/* Assuming no error has occurred, this block does final edits to the
** leaf page before writing it back to disk. Input variables are:
**
** nPg: Total initial size of leaf page.
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
iKeyIn += iVal;
if( iKeyIn!=iDelKeyOff ){
int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
iPrevKeyOut = iKeyOut;
}
}
if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
}
assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
}
sqlite3_free(aIdx);
}
/*
** This is called as part of flushing a delete to disk in 'secure-delete'
** mode. It edits the segments within the database described by argument
** pStruct to remove the entries for term zTerm, rowid iRowid.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
/*
** Flush the contents of in-memory hash table iHash to a new level-0
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p){
Fts5Hash *pHash = p->pHash;
Fts5Structure *pStruct;
int iSegid;
int pgnoLast = 0; /* Last leaf page number in segment */
/* Obtain a reference to the index structure and allocate a new segment-id
** for the new level-0 segment. */
pStruct = fts5StructureRead(p);
fts5StructureInvalidate(p);
if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
iSegid = fts5AllocateSegid(p, pStruct);
if( iSegid ){
const int pgsz = p->pConfig->pgsz;
int eDetail = p->pConfig->eDetail;
int bSecureDelete = p->pConfig->bSecureDelete;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
Fts5SegWriter writer;
fts5WriteInit(p, &writer, iSegid);
pBuf = &writer.writer.buf;
pPgidx = &writer.writer.pgidx;
/* fts5WriteInit() should have initialized the buffers to (most likely)
** the maximum space required. */
assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
/* Begin scanning through hash table entries. This loop runs once for each
** term/doclist currently stored within the hash table. */
if( p->rc==SQLITE_OK ){
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
}
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
const char *zTerm; /* Buffer containing term */
int nTerm; /* Size of zTerm in bytes */
const u8 *pDoclist; /* Pointer to doclist for this term */
int nDoclist; /* Size of doclist in bytes */
/* Get the term and doclist for this entry. */
sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
if( bSecureDelete==0 ){
fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
if( p->rc!=SQLITE_OK ) break;
assert( writer.bFirstRowidInPage==0 );
}
if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
/* The entire doclist will fit on the current leaf. */
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
}else{
int bTermWritten = !bSecureDelete;
i64 iRowid = 0;
i64 iPrev = 0;
int iOff = 0;
/* The entire doclist will not fit on this leaf. The following
** loop iterates through the poslists that make up the current
if( eDetail==FTS5_DETAIL_NONE ){
if( iOff<nDoclist && pDoclist[iOff]==0 ){
pBuf->p[pBuf->n++] = 0;
iOff++;
if( iOff<nDoclist && pDoclist[iOff]==0 ){
pBuf->p[pBuf->n++] = 0;
iOff++;
}
}
if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
}
}else{
int bDel = 0;
int nPos = 0;
int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
if( bDel && bSecureDelete ){
fts5BufferAppendVarint(&p->rc, pBuf, nPos*2);
iOff += nCopy;
nCopy = nPos;
}else{
nCopy += nPos;
}
if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
/* The entire poslist will fit on the current leaf. So copy
** it in one go. */
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
}else{
/* The entire poslist will not fit on this leaf. So it needs
** to be broken into sections. The only qualification being
** that each varint must be stored contiguously. */
const u8 *pPoslist = &pDoclist[iOff];
int iPos = 0;
while( p->rc==SQLITE_OK ){
int nSpace = pgsz - pBuf->n - pPgidx->n;
int n = 0;
if( (nCopy - iPos)<=nSpace ){
n = nCopy - iPos;
}else{
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
}
assert( n>0 );
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
iPos += n;
if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
}
if( iPos>=nCopy ) break;
}
}
iOff += nCopy;
}
}
}
assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
if( pgnoLast>0 ){
/* Update the Fts5Structure. It is written back to the database by the
** fts5StructureRelease() call below. */
if( pStruct->nLevel==0 ){
fts5StructureAddLevel(&p->rc, &pStruct);
}
fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
if( p->rc==SQLITE_OK ){
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
pSeg->iSegid = iSegid;
pSeg->pgnoFirst = 1;
pSeg->pgnoLast = pgnoLast;
if( pStruct->nOriginCntr>0 ){
pSeg->iOrigin1 = pStruct->nOriginCntr;
pSeg->iOrigin2 = pStruct->nOriginCntr;
pSeg->nEntry = p->nPendingRow;
pStruct->nOriginCntr++;
}
pStruct->nSegment++;
}
if( rc==SQLITE_OK ){
p->pConfig = pConfig;
p->nWorkUnit = FTS5_WORK_UNIT;
p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
if( p->zDataTbl && bCreate ){
rc = sqlite3Fts5CreateTable(
pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5CreateTable(pConfig, "idx",
"segid, term, pgno, PRIMARY KEY(segid, term)",
1, pzErr
);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5IndexReinit(p);
}
}
}
assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
}
/* Loop through the current pages of the hash table. */
for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
Fts5Data *pData = 0; /* Page ii of the current hash table */
Fts5Data *pFree = 0; /* Free this at the end of the loop */
if( iPg1==ii ){
pData = pData1;
}else{
pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
}
if( pData ){
int szKeyIn = TOMBSTONE_KEYSIZE(pData);
int nSlotIn = (pData->nn - 8) / szKeyIn;
int iIn;
for(iIn=0; iIn<nSlotIn; iIn++){
u64 iVal = 0;
/* Read the value from slot iIn of the input page into iVal. */
Fts5Data *pPg = 0;
int iPg = -1;
int szKey = 0;
int nHash = 0;
Fts5Data **apHash = 0;
p->nContentlessDelete++;
if( pSeg->nPgTombstone>0 ){
iPg = iRowid % pSeg->nPgTombstone;
pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
if( pPg==0 ){
assert( p->rc!=SQLITE_OK );
return;
}
if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
fts5DataRelease(pPg);
return;
}
}
/* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
if( iRowid>0xFFFFFFFF ) szKey = 8;
/* Rebuild the hash table */
fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
/* If all has succeeded, write the new rowid into one of the new hash
** table pages, then write them all out to disk. */
if( nHash ){
int ii = 0;
fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
for(ii=0; ii<nHash; ii++){
i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
}
pSeg->nPgTombstone = nHash;
fts5StructureWrite(p, p->pStruct);
}
fts5DataRelease(pPg);
fts5IndexFreeArray(apHash, nHash);
}
return ret;
}
#ifdef SQLITE_DEBUG
/*
** This function is purely an internal test. It does not contribute to
** FTS functionality, or even the integrity-check, in any way.
**
** Instead, it tests that the same set of pgno/rowid combinations are
** visited regardless of whether the doclist-index identified by parameters
** iSegid/iLeaf is iterated in forwards or reverse order.
*/
static void fts5TestDlidxReverse(
Fts5Index *p,
int iSegid, /* Segment id to load from */
int iLeaf /* Load doclist-index for this leaf */
){
Fts5DlidxIter *pDlidx = 0;
u64 cksum1 = 13;
u64 cksum2 = 13;
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(p, pDlidx)
){
i64 iRowid = fts5DlidxIterRowid(pDlidx);
int pgno = fts5DlidxIterPgno(pDlidx);
assert( pgno>iLeaf );
cksum1 += iRowid + ((i64)pgno<<32);
}
fts5DlidxIterFree(pDlidx);
pDlidx = 0;
for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterPrev(p, pDlidx)
){
i64 iRowid = fts5DlidxIterRowid(pDlidx);
int pgno = fts5DlidxIterPgno(pDlidx);
assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
cksum2 += iRowid + ((i64)pgno<<32);
}
fts5DlidxIterFree(pDlidx);
pDlidx = 0;
Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
int iFirst,
int iNoRowid,
int iLast
){
int i;
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
if( pLeaf ){
if( !fts5LeafIsTermless(pLeaf)
|| (i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf))
){
FTS5_CORRUPT_ROWID(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
}
}
fts5DataRelease(pLeaf);
}
}
static void fts5IntegrityCheckPgidx(Fts5Index *p, i64 iRowid, Fts5Data *pLeaf){
i64 iTermOff = 0;
int ii;
Fts5Buffer buf1 = {0,0,0};
Fts5Buffer buf2 = {0,0,0};
ii = pLeaf->szLeaf;
while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
int res;
i64 iOff;
Fts5Config *pConfig = p->pConfig;
int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
sqlite3_stmt *pStmt = 0;
int rc2;
int iIdxPrevLeaf = pSeg->pgnoFirst-1;
int iDlidxPrevLeaf = pSeg->pgnoLast;
if( pSeg->pgnoFirst==0 ) return;
fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
"SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
"ORDER BY 1, 2",
pConfig->zDb, pConfig->zName, pSeg->iSegid
));
/* Iterate through the b-tree hierarchy. */
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iRow; /* Rowid for this leaf */
Fts5Data *pLeaf; /* Data for this leaf */
const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
int iIdxLeaf = sqlite3_column_int(pStmt, 2);
int bIdxDlidx = sqlite3_column_int(pStmt, 3);
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
pLeaf = fts5LeafRead(p, iRow);
if( pLeaf==0 ) break;
/* Check that the leaf contains at least one term, and that it is equal
** to or larger than the split-key in zIdxTerm. Also check that if there
** is also a rowid pointer within the leaf page header, it points to a
** location before the term. */
if( pLeaf->nn<=pLeaf->szLeaf ){
if( nIdxTerm==0
iRowidOff = fts5LeafFirstRowidOff(pLeaf);
if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
FTS5_CORRUPT_ROWID(p, iRow);
}else{
iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
if( res==0 ) res = nTerm - nIdxTerm;
if( res<0 ) FTS5_CORRUPT_ROWID(p, iRow);
}
fts5IntegrityCheckPgidx(p, iRow, pLeaf);
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
fts5IndexIntegrityCheckEmpty(
p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
);
if( p->rc ) break;
/* If there is a doclist-index, check that it looks right. */
if( bIdxDlidx ){
Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
int iPrevLeaf = iIdxLeaf;
int iSegid = pSeg->iSegid;
int iPg = 0;
i64 iKey;
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(p, pDlidx)
){
/* Check any rowid-less pages that occur before the current leaf. */
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
if( fts5LeafFirstRowidOff(pLeaf)!=0 ) FTS5_CORRUPT_ROWID(p, iKey);
fts5DataRelease(pLeaf);
}
}
iPrevLeaf = fts5DlidxIterPgno(pDlidx);
/* Check that the leaf page indicated by the iterator really does
** contain the rowid suggested by the same. */
iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
i64 iRowid;
int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
ASSERT_SZLEAF_OK(pLeaf);
if( iRowidOff>=pLeaf->szLeaf ){
FTS5_CORRUPT_ROWID(p, iKey);
}else if( bSecureDelete==0 || iRowidOff>0 ){
i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
FTS5_CORRUPT_ROWID(p, iKey);
}
}
fts5DataRelease(pLeaf);
}
}
iDlidxPrevLeaf = iPg;
fts5DlidxIterFree(pDlidx);
fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
}else{
iDlidxPrevLeaf = pSeg->pgnoLast;
/* TODO: Check there is no doclist index */
}
iIdxPrevLeaf = iIdxLeaf;
}
rc2 = sqlite3_finalize(pStmt);
if( p->rc==SQLITE_OK ) p->rc = rc2;
*/
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
i64 iRowid, /* Rowid from %_data table */
int *pbTombstone, /* OUT: Tombstone hash flag */
int *piSegid, /* OUT: Segment id */
int *pbDlidx, /* OUT: Dlidx flag */
int *piHeight, /* OUT: Height */
int *piPgno /* OUT: Page number */
){
*piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
iRowid >>= FTS5_DATA_PAGE_B;
*piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
iRowid >>= FTS5_DATA_HEIGHT_B;
*pbDlidx = (int)(iRowid & 0x0001);
iRowid >>= FTS5_DATA_DLI_B;
*piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
iRowid >>= FTS5_DATA_ID_B;
*pbTombstone = (int)(iRowid & 0x0001);
}
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */
fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
if( iSegid==0 ){
if( iKey==FTS5_AVERAGES_ROWID ){
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
}else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
}
}
else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
bDlidx ? "dlidx " : "",
bTomb ? "tombstone " : "",
iSegid, iHeight, iPgno
);
}
}
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
static void fts5DebugStructure(
int *pRc, /* IN/OUT: error code */
Fts5Buffer *pBuf,
Fts5Structure *p
int iLvl, iSeg; /* Iterate through levels, segments */
for(iLvl=0; iLvl<p->nLevel; iLvl++){
Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
" {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
);
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
);
if( pSeg->iOrigin1>0 ){
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
pSeg->iOrigin1, pSeg->iOrigin2
);
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
}
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
/*
** The implementation of user-defined scalar function fts5_decode().
*/
static void fts5DecodeFunction(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args (always 2) */
sqlite3_value **apVal /* Function arguments */
){
i64 iRowid; /* Rowid for record being decoded */
int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
int bTomb;
const u8 *aBlob; int n; /* Record to decode */
u8 *a = 0;
Fts5Buffer s; /* Build up text to return here */
int rc = SQLITE_OK; /* Return code */
sqlite3_int64 nSpace = 0;
int eDetailNone = (sqlite3_user_data(pCtx)!=0);
assert( nArg==2 );
UNUSED_PARAM(nArg);
/* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
** buffer overreads even if the record is corrupt. */
n = sqlite3_value_bytes(apVal[1]);
aBlob = sqlite3_value_blob(apVal[1]);
nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING;
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
if( a==0 ) goto decode_out;
if( n>0 ) memcpy(a, aBlob, n);
fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
fts5DebugRowid(&rc, &s, iRowid);
if( bDlidx ){
Fts5Data dlidx;
Fts5DlidxLvl lvl;
dlidx.p = a;
dlidx.nn = n;
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
u32 *aSlot = (u32*)&aBlob[8];
if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
}else{
u64 *aSlot = (u64*)&aBlob[8];
if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
}
if( iVal!=0 ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
}
}
}else if( iSegid==0 ){
if( iRowid==FTS5_AVERAGES_ROWID ){
fts5DecodeAverages(&rc, &s, a, n);
}else{
fts5DecodeStructure(&rc, &s, a, n);
}
}else if( eDetailNone ){
Fts5Buffer term; /* Current term read from page */
int szLeaf;
int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
int iTermOff;
int nKeep = 0;
int iOff;
memset(&term, 0, sizeof(Fts5Buffer));
/* Decode any entries that occur before the first term. */
if( szLeaf<n ){
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
}else{
iTermOff = szLeaf;
}
fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
iOff = iTermOff;
while( iOff<szLeaf && rc==SQLITE_OK ){
int nAppend;
/* Read the term data for the next term*/
iOff += fts5GetVarint32(&a[iOff], nAppend);
term.n = nKeep;
fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
fts5BufferAppendTerm(&rc, &s, &term);
iOff += nAppend;
/* Figure out where the doclist for this term ends */
if( iPgidxOff<n ){
int nIncr;
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
iTermOff += nIncr;
}else{
iTermOff = szLeaf;
}
if( iTermOff>szLeaf ){
rc = FTS5_CORRUPT;
}else{
fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
}
iOff = iTermOff;
if( iOff<szLeaf ){
iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}
fts5BufferFree(&term);
}else{
Fts5Buffer term; /* Current term read from page */
int szLeaf; /* Offset of pgidx in a[] */
int iPgidxOff;
int iPgidxPrev = 0; /* Previous value read from pgidx */
int iTermOff = 0;
int iRowidOff = 0;
int iOff;
int nDoclist;
memset(&term, 0, sizeof(Fts5Buffer));
if( n<4 ){
sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
goto decode_out;
}else{
iRowidOff = fts5GetU16(&a[0]);
iPgidxOff = szLeaf = fts5GetU16(&a[2]);
if( iPgidxOff<n ){
fts5GetVarint32(&a[iPgidxOff], iTermOff);
}else if( iPgidxOff>n ){
rc = FTS5_CORRUPT;
goto decode_out;
}
}
/* Decode the position list tail at the start of the page */
if( iRowidOff!=0 ){
iOff = iRowidOff;
}else if( iTermOff!=0 ){
iOff = iTermOff;
/* Decode any more doclist data that appears on the page before the
** first term. */
nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
if( nDoclist+iOff>n ){
rc = FTS5_CORRUPT;
goto decode_out;
}
fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
while( iPgidxOff<n && rc==SQLITE_OK ){
int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
int nByte; /* Bytes of data */
int iEnd;
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
iPgidxPrev += nByte;
iOff = iPgidxPrev;
if( iPgidxOff<n ){
fts5GetVarint32(&a[iPgidxOff], nByte);
iEnd = iPgidxPrev + nByte;
}else{
iEnd = szLeaf;
}
if( iEnd>szLeaf ){
rc = FTS5_CORRUPT;
break;
}
if( bFirst==0 ){
iOff += fts5GetVarint32(&a[iOff], nByte);
int nArg, /* Number of args (always 2) */
sqlite3_value **apVal /* Function arguments */
){
const char *zArg;
if( nArg==0 ){
sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
}else{
zArg = (const char*)sqlite3_value_text(apVal[0]);
if( 0==sqlite3_stricmp(zArg, "segment") ){
i64 iRowid;
int segid, pgno;
if( nArg!=3 ){
sqlite3_result_error(pCtx,
"should be: fts5_rowid('segment', segid, pgno))", -1
);
}else{
segid = sqlite3_value_int(apVal[1]);
pgno = sqlite3_value_int(apVal[2]);
iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
sqlite3_result_int64(pCtx, iRowid);
}
}else{
sqlite3_result_error(pCtx,
"first arg to fts5_rowid() must be 'segment'" , -1
);
}
}
}
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
void *pAux,
int argc, const char *const*argv,
sqlite3_vtab **ppVtab,
char **pzErr
){
Fts5StructVtab *pNew = 0;
int rc = SQLITE_OK;
rc = sqlite3_declare_vtab(db,
"CREATE TABLE xyz("
"level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
"npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
);
if( rc==SQLITE_OK ){
pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
}
*ppVtab = (sqlite3_vtab*)pNew;
return rc;
}
switch( i ){
case 0: /* level */
sqlite3_result_int(ctx, pCsr->iLevel);
break;
case 1: /* segment */
sqlite3_result_int(ctx, pCsr->iSeg);
break;
case 2: /* merge */
sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
break;
case 3: /* segid */
sqlite3_result_int(ctx, pSeg->iSegid);
break;
case 4: /* leaf1 */
sqlite3_result_int(ctx, pSeg->pgnoFirst);
break;
case 5: /* leaf2 */
sqlite3_result_int(ctx, pSeg->pgnoLast);
break;
case 6: /* origin1 */
sqlite3_result_int64(ctx, pSeg->iOrigin1);
break;
t/50_foreign_key_info.t view on Meta::CPAN
albumartist INTEGER NOT NULL REFERENCES artist(artistid)
ON DELETE RESTRICT
ON UPDATE CASCADE DEFERRABLE,
albumname TEXT,
albumcover BINARY,
albumeditor INTEGER NOT NULL REFERENCES editor(editorid),
PRIMARY KEY(albumartist, albumname)
);
CREATE TABLE song(
songid INTEGER PRIMARY KEY AUTOINCREMENT,
songartist INTEGER,
songalbum TEXT,
songname TEXT,
FOREIGN KEY(songartist, songalbum) REFERENCES album(albumartist, albumname)
);
__EOSQL__
my $dbh = connect_ok( RaiseError => 1, PrintError => 0, AutoCommit => 1 );
my $sth;
my $fk_data;
t/rt_26775_distinct.t view on Meta::CPAN
$sth->finish( );
}
# ######
# Then we test the bug.
#
# We test with both 'DISTINCT(t.name) [..]' and 'DISTINCT t.name [..]'
#
my $query_with_parens = trim(q{
SELECT DISTINCT(t.name), t.tagid
FROM objtagmap m,tags t
WHERE (m.objid = 1)
AND (t.tagid = m.tagid)
});
my $query_without_parens = trim(q{
SELECT DISTINCT t.name, t.tagid
FROM objtagmap m,tags t
WHERE (m.objid = 1)
AND (t.tagid = m.tagid)
});
foreach my $query (($query_with_parens, $query_without_parens)) {
# just to print readable test descriptions.
my $abbrev = substr $query, 0, 25;
my $sth = $dbh->prepare($query);
ok( ref $sth, "prepare $abbrev" );
my $ret = $sth->execute( );
ok( $ret, "execute $abbrev" );
while (my $hres = $sth->fetchrow_hashref) {
# Here we should get two hash keys: 'name' and 'tagid'.
ok( exists $hres->{name}, 'exists $hres->{name}' );
ok( exists $hres->{tagid}, 'exists $hres->{tagid}' );
if (! exists $hres->{name}) {
$Data::Dumper::Varname = '';
eval 'require Data::Dumper;';
if (! $@) {
$Data::Dumper::Varname = 'fetchrow_hashref';
print {*STDERR} "#[RT #26775] The keys we got was: ",
Data::Dumper::Dumper($hres), "\n";
}
}
}
t/rt_26775_distinct.t view on Meta::CPAN
CREATE TABLE object (
id INTEGER PRIMARY KEY NOT NULL,
parent INTEGER NOT NULL DEFAULT 1,
name VARCHAR(255) NOT NULL,
type CHAR(16) NOT NULL default 'directory'
);
CREATE TABLE objtagmap (
id INTEGER PRIMARY KEY NOT NULL,
objid INTEGER NOT NULL,
tagid INTEGER NOT NULL
);
CREATE TABLE tags (
tagid INTEGER PRIMARY KEY NOT NULL,
name char(32) NOT NULL
);
INSERT INTO object (id, parent, name, type) VALUES
(1, 1, 'All about the the distinct hash key problem, and how to survive
deadly weapons', 'article');
INSERT INTO tags(tagid, name) VALUES (1,'bugs');
INSERT INTO objtagmap(id, objid, tagid) VALUES(1, 1, 1);
( run in 0.747 second using v1.01-cache-2.11-cpan-5735350b133 )