DBD-SQLeet
view release on metacpan or search on metacpan
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pread", (sqlite3_syscall_ptr)pread, 0 },
#else
{ "pread", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
#if defined(USE_PREAD64)
{ "pread64", (sqlite3_syscall_ptr)pread64, 0 },
#else
{ "pread64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent)
{ "write", (sqlite3_syscall_ptr)write, 0 },
#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
#else
{ "pwrite", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\
aSyscall[12].pCurrent)
#if defined(USE_PREAD64)
{ "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },
#else
{ "pwrite64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\
aSyscall[13].pCurrent)
{ "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
{ "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },
#else
{ "fallocate", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
{ "unlink", (sqlite3_syscall_ptr)unlink, 0 },
#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent)
{ "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
{ "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
{ "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
#if defined(HAVE_FCHOWN)
{ "fchown", (sqlite3_syscall_ptr)fchown, 0 },
#else
{ "fchown", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
#if defined(HAVE_FCHOWN)
{ "geteuid", (sqlite3_syscall_ptr)geteuid, 0 },
#else
{ "geteuid", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "mmap", (sqlite3_syscall_ptr)mmap, 0 },
#else
{ "mmap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "munmap", (sqlite3_syscall_ptr)munmap, 0 },
#else
{ "munmap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent)
#if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
{ "mremap", (sqlite3_syscall_ptr)mremap, 0 },
#else
{ "mremap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 },
#else
{ "getpagesize", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent)
#if defined(HAVE_READLINK)
{ "readlink", (sqlite3_syscall_ptr)readlink, 0 },
#else
{ "readlink", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent)
#if defined(HAVE_LSTAT)
{ "lstat", (sqlite3_syscall_ptr)lstat, 0 },
#else
{ "lstat", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)
#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
# ifdef __ANDROID__
{ "ioctl", (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 },
# else
{ "ioctl", (sqlite3_syscall_ptr)ioctl, 0 },
# endif
#else
{ "ioctl", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent)
}; /* End of the overrideable system calls */
/*
** On some systems, calls to fchown() will trigger a message in a security
** log if they come from non-root processes. So avoid calling fchown() if
** we are not running as root.
*/
static int robustFchown(int fd, uid_t uid, gid_t gid){
#if defined(HAVE_FCHOWN)
return osGeteuid() ? 0 : osFchown(fd,uid,gid);
#else
return 0;
#endif
}
/*
** This is the xSetSystemCall() method of sqlite3_vfs for all of the
** "unix" VFSes. Return SQLITE_OK opon successfully updating the
** system call pointer, or SQLITE_NOTFOUND if there is no configurable
** system call named zName.
*/
static int unixSetSystemCall(
sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */
const char *zName, /* Name of system call to override */
sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */
){
unsigned int i;
int rc = SQLITE_NOTFOUND;
UNUSED_PARAMETER(pNotUsed);
if( zName==0 ){
/* If no zName is given, restore all system calls to their default
** settings and return NULL
*/
rc = SQLITE_OK;
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
if( aSyscall[i].pDefault ){
aSyscall[i].pCurrent = aSyscall[i].pDefault;
}
}
}else{
/* If zName is specified, operate on only the one system call
** specified.
*/
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
if( strcmp(zName, aSyscall[i].zName)==0 ){
if( aSyscall[i].pDefault==0 ){
aSyscall[i].pDefault = aSyscall[i].pCurrent;
}
rc = SQLITE_OK;
if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
aSyscall[i].pCurrent = pNewFunc;
break;
}
}
}
return rc;
}
/*
** Return the value of a system call. Return NULL if zName is not a
** recognized system call name. NULL is also returned if the system call
** is currently undefined.
*/
static sqlite3_syscall_ptr unixGetSystemCall(
sqlite3_vfs *pNotUsed,
const char *zName
){
unsigned int i;
const char *zBasePath = pDbFd->zPath;
#endif
/* Call fstat() to figure out the permissions on the database file. If
** a new *-shm file is created, an attempt will be made to create it
** with the same permissions.
*/
if( osFstat(pDbFd->h, &sStat) ){
rc = SQLITE_IOERR_FSTAT;
goto shm_open_err;
}
#ifdef SQLITE_SHM_DIRECTORY
nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
#else
nShmFilename = 6 + (int)strlen(zBasePath);
#endif
pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename );
if( pShmNode==0 ){
rc = SQLITE_NOMEM_BKPT;
goto shm_open_err;
}
memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
zShm = pShmNode->zFilename = (char*)&pShmNode[1];
#ifdef SQLITE_SHM_DIRECTORY
sqlite3_snprintf(nShmFilename, zShm,
SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
(u32)sStat.st_ino, (u32)sStat.st_dev);
#else
sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath);
sqlite3FileSuffix3(pDbFd->zPath, zShm);
#endif
pShmNode->hShm = -1;
pDbFd->pInode->pShmNode = pShmNode;
pShmNode->pInode = pDbFd->pInode;
if( sqlite3GlobalConfig.bCoreMutex ){
pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
if( pShmNode->pShmMutex==0 ){
rc = SQLITE_NOMEM_BKPT;
goto shm_open_err;
}
}
if( pInode->bProcessLock==0 ){
if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT,(sStat.st_mode&0777));
}
if( pShmNode->hShm<0 ){
pShmNode->hShm = robust_open(zShm, O_RDONLY, (sStat.st_mode&0777));
if( pShmNode->hShm<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm);
goto shm_open_err;
}
pShmNode->isReadonly = 1;
}
/* If this process is running as root, make sure that the SHM file
** is owned by the same user that owns the original database. Otherwise,
** the original owner will not be able to connect.
*/
robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid);
rc = unixLockSharedMemory(pDbFd, pShmNode);
if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err;
}
}
/* Make the new connection a child of the unixShmNode */
p->pShmNode = pShmNode;
#ifdef SQLITE_DEBUG
p->id = pShmNode->nextShmId++;
#endif
pShmNode->nRef++;
pDbFd->pShm = p;
unixLeaveMutex();
/* The reference count on pShmNode has already been incremented under
** the cover of the unixEnterMutex() mutex and the pointer from the
** new (struct unixShm) object to the pShmNode has been set. All that is
** left to do is to link the new object into the linked list starting
** at pShmNode->pFirst. This must be done while holding the
** pShmNode->pShmMutex.
*/
sqlite3_mutex_enter(pShmNode->pShmMutex);
p->pNext = pShmNode->pFirst;
pShmNode->pFirst = p;
sqlite3_mutex_leave(pShmNode->pShmMutex);
return rc;
/* Jump here on any error */
shm_open_err:
unixShmPurge(pDbFd); /* This call frees pShmNode if required */
sqlite3_free(p);
unixLeaveMutex();
return rc;
}
/*
** This function is called to obtain a pointer to region iRegion of the
** shared-memory associated with the database file fd. Shared-memory regions
** are numbered starting from zero. Each shared-memory region is szRegion
** bytes in size.
**
** If an error occurs, an error code is returned and *pp is set to NULL.
**
** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
** region has not been allocated (by any client, including one running in a
** separate process), then *pp is set to NULL and SQLITE_OK returned. If
** bExtend is non-zero and the requested shared-memory region has not yet
** been allocated, it is allocated by this function.
**
** If the shared-memory region has already been allocated or is allocated by
** this call as described above, then it is mapped into this processes
** address space (if it is not already), *pp is set to point to the mapped
** memory and SQLITE_OK returned.
*/
static int unixShmMap(
sqlite3_file *fd, /* Handle open on database file */
int iRegion, /* Region to retrieve */
int szRegion, /* Size of regions */
int bExtend, /* True to extend file if necessary */
** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
** argument to this function.
**
** Such a file descriptor may exist if a database connection was closed
** but the associated file descriptor could not be closed because some
** other file descriptor open on the same file is holding a file-lock.
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
UnixUnusedFd *pUnused = 0;
/* Do not search for an unused file descriptor on vxworks. Not because
** vxworks would not benefit from the change (it might, we're not sure),
** but because no way to test it is currently available. It is better
** not to risk breaking vxworks support for the sake of such an obscure
** feature. */
#if !OS_VXWORKS
struct stat sStat; /* Results of stat() call */
unixEnterMutex();
/* A stat() call may fail for various reasons. If this happens, it is
** almost certain that an open() call on the same path will also fail.
** For this reason, if an error occurs in the stat() call here, it is
** ignored and -1 is returned. The caller will try to open a new file
** descriptor on the same path, fail, and return an error to SQLite.
**
** Even if a subsequent open() call does succeed, the consequences of
** not searching for a reusable file descriptor are not dire. */
if( inodeList!=0 && 0==osStat(zPath, &sStat) ){
unixInodeInfo *pInode;
pInode = inodeList;
while( pInode && (pInode->fileId.dev!=sStat.st_dev
|| pInode->fileId.ino!=(u64)sStat.st_ino) ){
pInode = pInode->pNext;
}
if( pInode ){
UnixUnusedFd **pp;
assert( sqlite3_mutex_notheld(pInode->pLockMutex) );
sqlite3_mutex_enter(pInode->pLockMutex);
for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
pUnused = *pp;
if( pUnused ){
*pp = pUnused->pNext;
}
sqlite3_mutex_leave(pInode->pLockMutex);
}
}
unixLeaveMutex();
#endif /* if !OS_VXWORKS */
return pUnused;
}
/*
** Find the mode, uid and gid of file zFile.
*/
static int getFileMode(
const char *zFile, /* File name */
mode_t *pMode, /* OUT: Permissions of zFile */
uid_t *pUid, /* OUT: uid of zFile. */
gid_t *pGid /* OUT: gid of zFile. */
){
struct stat sStat; /* Output of stat() on database file */
int rc = SQLITE_OK;
if( 0==osStat(zFile, &sStat) ){
*pMode = sStat.st_mode & 0777;
*pUid = sStat.st_uid;
*pGid = sStat.st_gid;
}else{
rc = SQLITE_IOERR_FSTAT;
}
return rc;
}
/*
** This function is called by unixOpen() to determine the unix permissions
** to create new files with. If no error occurs, then SQLITE_OK is returned
** and a value suitable for passing as the third argument to open(2) is
** written to *pMode. If an IO error occurs, an SQLite error code is
** returned and the value of *pMode is not modified.
**
** In most cases, this routine sets *pMode to 0, which will become
** an indication to robust_open() to create the file using
** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
** But if the file being opened is a WAL or regular journal file, then
** this function queries the file-system for the permissions on the
** corresponding database file and sets *pMode to this value. Whenever
** possible, WAL and journal files are created using the same permissions
** as the associated database file.
**
** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
** original filename is unavailable. But 8_3_NAMES is only used for
** FAT filesystems and permissions do not matter there, so just use
** the default permissions.
*/
static int findCreateFileMode(
const char *zPath, /* Path of file (possibly) being created */
int flags, /* Flags passed as 4th argument to xOpen() */
mode_t *pMode, /* OUT: Permissions to open file with */
uid_t *pUid, /* OUT: uid to set on the file */
gid_t *pGid /* OUT: gid to set on the file */
){
int rc = SQLITE_OK; /* Return Code */
*pMode = 0;
*pUid = 0;
*pGid = 0;
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
char zDb[MAX_PATHNAME+1]; /* Database file path */
int nDb; /* Number of valid bytes in zDb */
/* zPath is a path to a WAL or journal file. The following block derives
** the path to the associated database file from zPath. This block handles
** the following naming conventions:
**
** "<path to db>-journal"
** "<path to db>-wal"
** "<path to db>-journalNN"
** "<path to db>-walNN"
**
** where NN is a decimal number. The NN naming schemes are
** used by the test_multiplex.c module.
*/
nDb = sqlite3Strlen30(zPath) - 1;
while( zPath[nDb]!='-' ){
/* In normal operation, the journal file name will always contain
** a '-' character. However in 8+3 filename mode, or if a corrupt
** rollback journal specifies a master journal with a goofy name, then
** the '-' might be missing. */
if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK;
nDb--;
}
memcpy(zDb, zPath, nDb);
zDb[nDb] = '\0';
rc = getFileMode(zDb, pMode, pUid, pGid);
}else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
*pMode = 0600;
}else if( flags & SQLITE_OPEN_URI ){
/* If this is a main database file and the file was opened using a URI
** filename, check for the "modeof" parameter. If present, interpret
** its value as a filename and try to copy the mode, uid and gid from
** that file. */
const char *z = sqlite3_uri_parameter(zPath, "modeof");
if( z ){
rc = getFileMode(z, pMode, pUid, pGid);
}
}
return rc;
}
/*
** Open the file zPath.
**
** Previously, the SQLite OS layer used three functions in place of this
** one:
**
** sqlite3OsOpenReadWrite();
** sqlite3OsOpenReadOnly();
** sqlite3OsOpenExclusive();
**
** These calls correspond to the following combinations of flags:
**
** ReadWrite() -> (READWRITE | CREATE)
** ReadOnly() -> (READONLY)
** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
**
** The old OpenExclusive() accepted a boolean argument - "delFlag". If
** true, the file was configured to be automatically deleted when the
** file handle closed. To achieve the same effect using this new
** interface, add the DELETEONCLOSE flag to those specified above for
** OpenExclusive().
*/
static int unixOpen(
sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */
const char *zPath, /* Pathname of file to be opened */
sqlite3_file *pFile, /* The file descriptor to be filled in */
int flags, /* Input flags to control the opening */
int *pOutFlags /* Output flags returned to SQLite core */
){
unixFile *p = (unixFile *)pFile;
int fd = -1; /* File descriptor returned by open() */
int openFlags = 0; /* Flags to pass to open() */
int eType = flags&0xFFFFFF00; /* Type of file to open */
int noLock; /* True to omit locking primitives */
int rc = SQLITE_OK; /* Function Return Code */
int ctrlFlags = 0; /* UNIXFILE_* flags */
int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
int isCreate = (flags & SQLITE_OPEN_CREATE);
int isReadonly = (flags & SQLITE_OPEN_READONLY);
int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
#if SQLITE_ENABLE_LOCKING_STYLE
int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
#endif
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
struct statfs fsInfo;
#endif
/* If creating a master or main-file journal, this function will open
** a file-descriptor on the directory too. The first time unixSync()
|| eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
|| eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
);
/* Detect a pid change and reset the PRNG. There is a race condition
** here such that two or more threads all trying to open databases at
** the same instant might all reset the PRNG. But multiple resets
** are harmless.
*/
if( randomnessPid!=osGetpid(0) ){
randomnessPid = osGetpid(0);
sqlite3_randomness(0,0);
}
memset(p, 0, sizeof(unixFile));
if( eType==SQLITE_OPEN_MAIN_DB ){
UnixUnusedFd *pUnused;
pUnused = findReusableFd(zName, flags);
if( pUnused ){
fd = pUnused->fd;
}else{
pUnused = sqlite3_malloc64(sizeof(*pUnused));
if( !pUnused ){
return SQLITE_NOMEM_BKPT;
}
}
p->pPreallocatedUnused = pUnused;
/* Database filenames are double-zero terminated if they are not
** URIs with parameters. Hence, they can always be passed into
** sqlite3_uri_parameter(). */
assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
}else if( !zName ){
/* If zName is NULL, the upper layer is requesting a temp file. */
assert(isDelete && !isNewJrnl);
rc = unixGetTempname(pVfs->mxPathname, zTmpname);
if( rc!=SQLITE_OK ){
return rc;
}
zName = zTmpname;
/* Generated temporary filenames are always double-zero terminated
** for use by sqlite3_uri_parameter(). */
assert( zName[strlen(zName)+1]==0 );
}
/* Determine the value of the flags parameter passed to POSIX function
** open(). These must be calculated even if open() is not called, as
** they may be stored as part of the file handle and used by the
** 'conch file' locking functions later on. */
if( isReadonly ) openFlags |= O_RDONLY;
if( isReadWrite ) openFlags |= O_RDWR;
if( isCreate ) openFlags |= O_CREAT;
if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
openFlags |= (O_LARGEFILE|O_BINARY);
if( fd<0 ){
mode_t openMode; /* Permissions to create file with */
uid_t uid; /* Userid for the file */
gid_t gid; /* Groupid for the file */
rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
if( rc!=SQLITE_OK ){
assert( !p->pPreallocatedUnused );
assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
return rc;
}
fd = robust_open(zName, openFlags, openMode);
OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));
assert( !isExclusive || (openFlags & O_CREAT)!=0 );
if( fd<0 ){
if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){
/* If unable to create a journal because the directory is not
** writable, change the error code to indicate that. */
rc = SQLITE_READONLY_DIRECTORY;
}else if( errno!=EISDIR && isReadWrite ){
/* Failed to open the file for read/write access. Try read-only. */
flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
openFlags &= ~(O_RDWR|O_CREAT);
flags |= SQLITE_OPEN_READONLY;
openFlags |= O_RDONLY;
isReadonly = 1;
fd = robust_open(zName, openFlags, openMode);
}
}
if( fd<0 ){
int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
if( rc==SQLITE_OK ) rc = rc2;
goto open_finished;
}
/* If this process is running as root and if creating a new rollback
** journal or WAL file, set the ownership of the journal or WAL to be
** the same as the original database.
*/
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
robustFchown(fd, uid, gid);
}
}
assert( fd>=0 );
if( pOutFlags ){
*pOutFlags = flags;
}
if( p->pPreallocatedUnused ){
p->pPreallocatedUnused->fd = fd;
p->pPreallocatedUnused->flags = flags;
}
if( isDelete ){
#if OS_VXWORKS
zPath = zName;
#elif defined(SQLITE_UNLINK_AFTER_CLOSE)
zPath = sqlite3_mprintf("%s", zName);
if( zPath==0 ){
robust_close(p, fd, __LINE__);
return SQLITE_NOMEM_BKPT;
}
#else
osUnlink(zName);
#endif
}
#if SQLITE_ENABLE_LOCKING_STYLE
else{
p->openFlags = openFlags;
}
#endif
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
if( fstatfs(fd, &fsInfo) == -1 ){
storeLastErrno(p, errno);
robust_close(p, fd, __LINE__);
return SQLITE_IOERR_ACCESS;
}
if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
}
if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) {
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
}
#endif
/* Set up appropriate ctrlFlags */
if( isDelete ) ctrlFlags |= UNIXFILE_DELETE;
if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY;
noLock = eType!=SQLITE_OPEN_MAIN_DB;
if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK;
if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC;
if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
#if SQLITE_ENABLE_LOCKING_STYLE
#if SQLITE_PREFER_PROXY_LOCKING
isAutoProxy = 1;
#endif
if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){
char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
int useProxy = 0;
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts3 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset. *piStartOffset should be set to the index of the first
** byte of the token in the input buffer. *piEndOffset should be set
** to the index of the first byte just past the end of the token in
** the input buffer.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
/***********************************************************************
** Methods below this point are only available if iVersion>=1.
*/
/*
** Configure the language id of a tokenizer cursor.
*/
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
int fts3_global_term_cnt(int iTerm, int iCol);
int fts3_term_cnt(int iTerm, int iCol);
#endif /* _FTS3_TOKENIZER_H_ */
/************** End of fts3_tokenizer.h **************************************/
/************** Continuing where we left off in fts3Int.h ********************/
/************** Include fts3_hash.h in the middle of fts3Int.h ***************/
/************** Begin file fts3_hash.h ***************************************/
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS3_HASH_H_
#define _FTS3_HASH_H_
/* Forward declarations of structures. */
typedef struct Fts3Hash Fts3Hash;
typedef struct Fts3HashElem Fts3HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct Fts3Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
Fts3HashElem *first; /* The first element of the array */
int htsize; /* Number of buckets in the hash table */
struct _fts3ht { /* the hash table */
# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
#endif
typedef struct Fts3Table Fts3Table;
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
typedef struct Fts3PhraseToken Fts3PhraseToken;
typedef struct Fts3Doclist Fts3Doclist;
typedef struct Fts3SegFilter Fts3SegFilter;
typedef struct Fts3DeferredToken Fts3DeferredToken;
typedef struct Fts3SegReader Fts3SegReader;
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
typedef struct MatchinfoBuffer MatchinfoBuffer;
/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/
struct Fts3Table {
sqlite3_vtab base; /* Base class used by SQLite core */
sqlite3 *db; /* The database connection */
const char *zDb; /* logical database name */
const char *zName; /* virtual table name */
int nColumn; /* number of named columns in virtual table */
char **azColumn; /* column names. malloced */
u8 *abNotindexed; /* True for 'notindexed' columns */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
int nAutoincrmerge; /* Value configured by 'automerge' */
u32 nLeafAdd; /* Number of leaf blocks added this trans */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
sqlite3_stmt *aStmt[40];
sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
u8 bFts4; /* True for FTS4, false for FTS3 */
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 bDescIdx; /* True if doclists are in reverse order */
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
/*
** The following array of hash tables is used to buffer pending index
** updates during transactions. All pending updates buffered at any one
** time must share a common language-id (see the FTS4 langid= feature).
** The current language id is stored in variable iPrevLangid.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
** terms that appear in the document set. Each subsequent index in aIndex[]
** is an index of prefixes of a specific length.
**
** Variable nPendingData contains an estimate the memory consumed by the
** pending data structures, including hash table overhead, but not including
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
** recently inserted record.
*/
int nIndex; /* Size of aIndex[] */
struct Fts3Index {
int nPrefix; /* Prefix length (0 for main terms index) */
Fts3Hash hPending; /* Pending terms table for this index */
} *aIndex;
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
int iPrevLangid; /* Langid of recently inserted document */
int bPrevDelete; /* True if last operation was a delete */
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
** values do not contribute to FTS functionality; they are used for
** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
#endif
#ifdef SQLITE_TEST
/* True to disable the incremental doclist optimization. This is controled
** by special insert command 'test-no-incr-doclist'. */
int bNoIncrDoclist;
#endif
};
/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/
struct Fts3Cursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
i16 eSearch; /* Search strategy (see below) */
u8 isEof; /* True if at End Of Results */
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
u8 bSeekStmt; /* True if pStmt is a seek */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
char *pNextId; /* Pointer into the body of aDoclist */
char *aDoclist; /* List of docids for full-text queries */
int nDoclist; /* Size of buffer at aDoclist */
u8 bDesc; /* True to sort in descending order */
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
sqlite3_int64 nDoc; /* Documents in table */
i64 iMinDocid; /* Minimum docid to return */
i64 iMaxDocid; /* Maximum docid to return */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
};
#define FTS3_EVAL_FILTER 0
#define FTS3_EVAL_NEXT 1
#define FTS3_EVAL_MATCHINFO 2
/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched. For example, in
**
** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
**
** Because the LHS of the MATCH operator is 2nd column "b",
** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
** indicating that all columns should be searched,
** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
*/
#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
/*
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
** above. The upper 16-bits contain a combination of the following
** bits, used to describe extra constraints on full-text searches.
*/
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
struct Fts3Doclist {
char *aAll; /* Array containing doclist (or NULL) */
int nAll; /* Size of a[] in bytes */
char *pNextDocid; /* Pointer to next docid */
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
int bFreeList; /* True if pList should be sqlite3_free()d */
char *pList; /* Pointer to position list following iDocid */
int nList; /* Length of position list */
};
const char *zFormat, /* Format string for SQL */
... /* Arguments to the format string */
){
va_list ap;
char *zSql;
if( *pRc ) return;
va_start(ap, zFormat);
zSql = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
if( zSql==0 ){
*pRc = SQLITE_NOMEM;
}else{
*pRc = sqlite3_exec(db, zSql, 0, 0, 0);
sqlite3_free(zSql);
}
}
/*
** The xDestroy() virtual table method.
*/
static int fts3DestroyMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table *)pVtab;
int rc = SQLITE_OK; /* Return code */
const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
sqlite3 *db = p->db; /* Database handle */
/* Drop the shadow tables */
if( p->zContentTbl==0 ){
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
}
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
/* If everything has worked, invoke fts3DisconnectMethod() to free the
** memory associated with the Fts3Table structure and return SQLITE_OK.
** Otherwise, return an SQLite error code.
*/
return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
}
/*
** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
** passed as the first argument. This is done as part of the xConnect()
** and xCreate() methods.
**
** If *pRc is non-zero when this function is called, it is a no-op.
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
** before returning.
*/
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
if( *pRc==SQLITE_OK ){
int i; /* Iterator variable */
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
const char *zLanguageid;
zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
/* Create a list of user columns for the virtual table */
zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
for(i=1; zCols && i<p->nColumn; i++){
zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
}
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
zSql = sqlite3_mprintf(
"CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
zCols, p->zName, zLanguageid
);
if( !zCols || !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_declare_vtab(p->db, zSql);
}
sqlite3_free(zSql);
sqlite3_free(zCols);
*pRc = rc;
}
}
/*
** Create the %_stat table if it does not already exist.
*/
SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
fts3DbExec(pRc, p->db,
"CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
"(id INTEGER PRIMARY KEY, value BLOB);",
p->zDb, p->zName
);
if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
}
/*
** Create the backing store tables (%_content, %_segments and %_segdir)
** required by the FTS3 table passed as the only argument. This is done
** as part of the vtab xCreate() method.
**
** If the p->bHasDocsize boolean is true (indicating that this is an
** FTS4 table, not an FTS3 table) then also create the %_docsize and
** %_stat tables required by FTS4.
*/
static int fts3CreateTables(Fts3Table *p){
int rc = SQLITE_OK; /* Return code */
int i; /* Iterator variable */
sqlite3 *db = p->db; /* The database connection */
if( p->zContentTbl==0 ){
const char *zLanguageid = p->zLanguageid;
char *zContentCols; /* Columns of %_content table */
/* Create a list of user columns for the content table */
zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
for(i=0; zContentCols && i<p->nColumn; i++){
char *z = p->azColumn[i];
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
}
if( zLanguageid && zContentCols ){
zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
}
if( zContentCols==0 ) rc = SQLITE_NOMEM;
/* Create the content table */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_content'(%s)",
p->zDb, p->zName, zContentCols
);
sqlite3_free(zContentCols);
}
/* Create other tables */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
p->zDb, p->zName
);
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_segdir'("
"level INTEGER,"
"idx INTEGER,"
"start_block INTEGER,"
"leaves_end_block INTEGER,"
"end_block INTEGER,"
"root BLOB,"
"PRIMARY KEY(level, idx)"
");",
p->zDb, p->zName
);
if( p->bHasDocsize ){
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
p->zDb, p->zName
);
}
assert( p->bHasStat==p->bFts4 );
if( p->bHasStat ){
sqlite3Fts3CreateStatTable(&rc, p);
}
return rc;
}
/*
** Store the current database page-size in bytes in p->nPgsz.
**
** If *pRc is non-zero when this function is called, it is a no-op.
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
** before returning.
*/
static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
if( *pRc==SQLITE_OK ){
int rc; /* Return code */
char *zSql; /* SQL text "PRAGMA %Q.page_size" */
sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
if( rc==SQLITE_OK ){
*/
static char *fts3QuoteId(char const *zInput){
int nRet;
char *zRet;
nRet = 2 + (int)strlen(zInput)*2 + 1;
zRet = sqlite3_malloc(nRet);
if( zRet ){
int i;
char *z = zRet;
*(z++) = '"';
for(i=0; zInput[i]; i++){
if( zInput[i]=='"' ) *(z++) = '"';
*(z++) = zInput[i];
}
*(z++) = '"';
*(z++) = '\0';
}
return zRet;
}
/*
** Return a list of comma separated SQL expressions and a FROM clause that
** could be used in a SELECT statement such as the following:
**
** SELECT <list of expressions> FROM %_content AS x ...
**
** to return the docid, followed by each column of text data in order
** from left to write. If parameter zFunc is not NULL, then instead of
** being returned directly each column of text data is passed to an SQL
** function named zFunc first. For example, if zFunc is "unzip" and the
** table has the three user-defined columns "a", "b", and "c", the following
** string is returned:
**
** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
**
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
** is the responsibility of the caller to eventually free it.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
** no error occurs, *pRc is left unmodified.
*/
static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
char *zRet = 0;
char *zFree = 0;
char *zFunction;
int i;
if( p->zContentTbl==0 ){
if( !zFunc ){
zFunction = "";
}else{
zFree = zFunction = fts3QuoteId(zFunc);
}
fts3Appendf(pRc, &zRet, "docid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
}
sqlite3_free(zFree);
}else{
fts3Appendf(pRc, &zRet, "rowid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
}
}
fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
p->zDb,
(p->zContentTbl ? p->zContentTbl : p->zName),
(p->zContentTbl ? "" : "_content")
);
return zRet;
}
/*
** Return a list of N comma separated question marks, where N is the number
** of columns in the %_content table (one for the docid plus one for each
** user-defined text column).
**
** If argument zFunc is not NULL, then all but the first question mark
** is preceded by zFunc and an open bracket, and followed by a closed
** bracket. For example, if zFunc is "zip" and the FTS3 table has three
** user-defined text columns, the following string is returned:
**
** "?, zip(?), zip(?), zip(?)"
**
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
** is the responsibility of the caller to eventually free it.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
** no error occurs, *pRc is left unmodified.
*/
static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
char *zRet = 0;
char *zFree = 0;
char *zFunction;
int i;
if( !zFunc ){
zFunction = "";
}else{
zFree = zFunction = fts3QuoteId(zFunc);
}
fts3Appendf(pRc, &zRet, "?");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", ?");
}
sqlite3_free(zFree);
return zRet;
}
static int fts3ConnectMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts3CreateMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
}
/*
** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
** extension is currently being used by a version of SQLite too old to
** support estimatedRows. In that case this function is a no-op.
*/
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
#if SQLITE_VERSION_NUMBER>=3008002
if( sqlite3_libversion_number()>=3008002 ){
pIdxInfo->estimatedRows = nRow;
}
#endif
}
/*
** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
** extension is currently being used by a version of SQLite too old to
** support index-info flags. In that case this function is a no-op.
*/
static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){
#if SQLITE_VERSION_NUMBER>=3008012
if( sqlite3_libversion_number()>=3008012 ){
pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
}
#endif
}
/*
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
**
** 1. Direct lookup by rowid or docid.
** 2. Full-text search using a MATCH operator on a non-docid column.
** 3. Linear scan of %_content table.
*/
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
int iLangidCons = -1; /* Index of langid=x constraint, if present */
int iDocidGe = -1; /* Index of docid>=x constraint, if present */
int iDocidLe = -1; /* Index of docid<=x constraint, if present */
int iIdx;
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
** strategy is possible.
*/
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 5000000;
for(i=0; i<pInfo->nConstraint; i++){
int bDocid; /* True if this constraint is on docid */
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
if( pCons->usable==0 ){
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
/* There exists an unusable MATCH constraint. This means that if
** the planner does elect to use the results of this call as part
** of the overall query plan the user will see an "unable to use
** function MATCH in the requested context" error. To discourage
** this, return a very high cost here. */
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 1e50;
fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
return SQLITE_OK;
}
continue;
}
bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
pInfo->idxNum = FTS3_DOCID_SEARCH;
pInfo->estimatedCost = 1.0;
iCons = i;
}
/* A MATCH constraint. Use a full-text search.
**
** If there is more than one MATCH constraint available, use the first
** one encountered. If there is both a MATCH constraint and a direct
** rowid/docid lookup, prefer the MATCH strategy. This is done even
** though the rowid/docid lookup is faster than a MATCH query, selecting
** it would lead to an "unable to use function MATCH in the requested
** context" error.
*/
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
&& pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
){
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
}
/* Equality constraint on the langid column */
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& pCons->iColumn==p->nColumn + 2
){
iLangidCons = i;
}
if( bDocid ){
switch( pCons->op ){
case SQLITE_INDEX_CONSTRAINT_GE:
case SQLITE_INDEX_CONSTRAINT_GT:
iDocidGe = i;
break;
case SQLITE_INDEX_CONSTRAINT_LE:
case SQLITE_INDEX_CONSTRAINT_LT:
iDocidLe = i;
break;
}
}
}
/* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */
if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo);
iIdx = 1;
if( iCons>=0 ){
pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
pInfo->idxNum |= FTS3_HAVE_LANGID;
pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
}
if( iDocidGe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
}
if( iDocidLe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
*/
if( pInfo->nOrderBy==1 ){
struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
if( pOrder->desc ){
pInfo->idxStr = "DESC";
}else{
pInfo->idxStr = "ASC";
}
pInfo->orderByConsumed = 1;
}
}
assert( p->pSegments==0 );
return SQLITE_OK;
}
/*
** Implementation of xOpen method.
*/
static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
sqlite3_vtab_cursor *pCsr; /* Allocated cursor */
UNUSED_PARAMETER(pVTab);
/* Allocate a buffer large enough for an Fts3Cursor structure. If the
** allocation succeeds, zero it and return SQLITE_OK. Otherwise,
** if the allocation fails, return SQLITE_NOMEM.
*/
*ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(Fts3Cursor));
return SQLITE_OK;
}
/*
** Finalize the statement handle at pCsr->pStmt.
**
** Or, if that statement handle is one created by fts3CursorSeekStmt(),
** and the Fts3Table.pSeekStmt slot is currently NULL, save the statement
** pointer there instead of finalizing it.
*/
static void fts3CursorFinalizeStmt(Fts3Cursor *pCsr){
if( pCsr->bSeekStmt ){
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
if( p->pSeekStmt==0 ){
pTS->anOutput[iOut] = nMerge;
break;
}else{
char *aNew;
int nNew;
int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
);
if( rc!=SQLITE_OK ){
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
return rc;
}
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
sqlite3_free(pTS->aaOutput[iOut]);
pTS->aaOutput[iOut] = 0;
aMerge = aNew;
nMerge = nNew;
if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
pTS->aaOutput[iOut] = aMerge;
pTS->anOutput[iOut] = nMerge;
}
}
}
}
return SQLITE_OK;
}
/*
** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
*/
static int fts3SegReaderCursorAppend(
Fts3MultiSegReader *pCsr,
Fts3SegReader *pNew
){
if( (pCsr->nSegment%16)==0 ){
Fts3SegReader **apNew;
int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
if( !apNew ){
sqlite3Fts3SegReaderFree(pNew);
return SQLITE_NOMEM;
}
pCsr->apSegment = apNew;
}
pCsr->apSegment[pCsr->nSegment++] = pNew;
return SQLITE_OK;
}
/*
** Add seg-reader objects to the Fts3MultiSegReader object passed as the
** 8th argument.
**
** This function returns SQLITE_OK if successful, or an SQLite error code
** otherwise.
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
int rc = SQLITE_OK; /* Error code */
sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
int rc2; /* Result of sqlite3_reset() */
/* If iLevel is less than 0 and this is not a scan, include a seg-reader
** for the pending-terms. If this is a scan, then this call must be being
** made by an fts4aux module, not an FTS table. In this case calling
** Fts3SegReaderPending might segfault, as the data structures used by
** fts4aux are not completely populated. So it's easiest to filter these
** calls out here. */
if( iLevel<0 && p->aIndex ){
Fts3SegReader *pSeg = 0;
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg);
if( rc==SQLITE_OK && pSeg ){
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
}
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
Fts3SegReader *pSeg = 0;
/* Read the values returned by the SELECT into local variables. */
sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
int nRoot = sqlite3_column_bytes(pStmt, 4);
char const *zRoot = sqlite3_column_blob(pStmt, 4);
/* If zTerm is not NULL, and this segment is not stored entirely on its
** root node, the range of leaves scanned can be reduced. Do this. */
if( iStartBlock && zTerm ){
sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
if( rc!=SQLITE_OK ) goto finished;
if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
}
rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
(isPrefix==0 && isScan==0),
iStartBlock, iLeavesEndBlock,
iEndBlock, zRoot, nRoot, &pSeg
);
if( rc!=SQLITE_OK ) goto finished;
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
}
finished:
rc2 = sqlite3_reset(pStmt);
if( rc==SQLITE_DONE ) rc = rc2;
return rc;
}
/*
** Set up a cursor object for iterating through a full-text index or a
** single level therein.
*/
SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
assert( iIndex>=0 && iIndex<p->nIndex );
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
|| iLevel>=0
);
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
/*
** In addition to its current configuration, have the Fts3MultiSegReader
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
return fts3SegReaderCursor(p,
iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
);
}
/*
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
** if isPrefix is true, to scan the doclist for all terms for which
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
** an SQLite error code.
**
** It is the responsibility of the caller to free this object by eventually
** passing it to fts3SegReaderCursorFree()
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
** Output parameter *ppSegcsr is set to 0 if an error occurs.
*/
static int fts3TermSegReaderCursor(
Fts3Cursor *pCsr, /* Virtual table cursor handle */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
){
Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
int rc = SQLITE_NOMEM; /* Return code */
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
if( pSegcsr ){
int i;
int bFound = 0; /* True once an index has been found */
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
if( isPrefix ){
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
);
pSegcsr->bLookup = 1;
}
}
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
rc = fts3SegReaderCursorAddZero(
p, pCsr->iLangid, zTerm, nTerm, pSegcsr
);
}
}
}
}
if( bFound==0 ){
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
}
*ppSegcsr = pSegcsr;
return rc;
}
/*
** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
*/
static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
sqlite3Fts3SegReaderFinish(pSegcsr);
sqlite3_free(pSegcsr);
}
/*
** This function retrieves the doclist for the specified term (or term
** prefix) from the database.
*/
static int fts3TermSelect(
Fts3Table *p, /* Virtual table handle */
Fts3PhraseToken *pTok, /* Token to query for */
int iColumn, /* Column to query (or -ve for all columns) */
int *pnOut, /* OUT: Size of buffer at *ppOut */
char **ppOut /* OUT: Malloced result buffer */
){
int rc; /* Return code */
Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
TermSelect tsc; /* Object for pair-wise doclist merging */
Fts3SegFilter filter; /* Segment term filter configuration */
pSegcsr = pTok->pSegcsr;
memset(&tsc, 0, sizeof(TermSelect));
filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
| (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
| (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
filter.iCol = iColumn;
filter.zTerm = pTok->z;
filter.nTerm = pTok->n;
rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
while( SQLITE_OK==rc
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
){
rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
}
if( rc==SQLITE_OK ){
rc = fts3TermSelectFinishMerge(p, &tsc);
}
if( rc==SQLITE_OK ){
*ppOut = tsc.aaOutput[0];
*pnOut = tsc.anOutput[0];
}else{
int i;
return rc;
}
/*
** The following are copied from sqliteInt.h.
**
** Constants for the largest and smallest possible 64-bit signed integers.
** These macros are designed to work correctly on both 32-bit and 64-bit
** compilers.
*/
#ifndef SQLITE_AMALGAMATION
# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
#endif
/*
** If the numeric type of argument pVal is "integer", then return it
** converted to a 64-bit signed integer. Otherwise, return a copy of
** the second parameter, iDefault.
*/
static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
if( pVal ){
int eType = sqlite3_value_numeric_type(pVal);
if( eType==SQLITE_INTEGER ){
return sqlite3_value_int64(pVal);
}
}
return iDefault;
}
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
** information.
**
** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
** the %_content table.
**
** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
** in the %_content table.
**
** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The
** column on the left-hand side of the MATCH operator is column
** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand
** side of the MATCH operator.
*/
static int fts3FilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
int rc = SQLITE_OK;
char *zSql; /* SQL statement used to access %_content */
int eSearch;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
int iIdx;
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
eSearch = (idxNum & 0x0000FFFF);
assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( p->pSegments==0 );
/* Collect arguments into local variables */
iIdx = 0;
if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
assert( iIdx==nVal );
/* In case the cursor has been used before, clear it now. */
fts3ClearCursor(pCsr);
/* Set the lower and upper bounds on docids to return */
pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
if( idxStr ){
pCsr->bDesc = (idxStr[0]=='D');
}else{
pCsr->bDesc = p->bDescIdx;
}
pCsr->eSearch = (i16)eSearch;
if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(pCons);
if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
pCsr->iLangid = 0;
if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,
&p->base.zErrMsg
);
if( rc!=SQLITE_OK ){
return rc;
}
rc = fts3EvalStart(pCsr);
sqlite3Fts3SegmentsClose(p);
if( rc!=SQLITE_OK ) return rc;
pCsr->pNextId = pCsr->aDoclist;
pCsr->iPrevId = 0;
}
/* Compile a SELECT statement for this cursor. For a full-table-scan, the
** statement loops through all rows of the %_content table. For a
** full-text query or docid lookup, the statement retrieves a single
** row by docid.
*/
if( eSearch==FTS3_FULLSCAN_SEARCH ){
if( pDocidGe || pDocidLe ){
zSql = sqlite3_mprintf(
"SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s",
p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid,
(pCsr->bDesc ? "DESC" : "ASC")
);
}else{
zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
);
}
if( zSql ){
rc = sqlite3_prepare_v3(p->db,zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0);
sqlite3_free(zSql);
}else{
rc = SQLITE_NOMEM;
}
}else if( eSearch==FTS3_DOCID_SEARCH ){
rc = fts3CursorSeekStmt(pCsr);
if( rc==SQLITE_OK ){
rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
}
}
if( rc!=SQLITE_OK ) return rc;
return fts3NextMethod(pCursor);
}
/*
** This is the xEof method of the virtual table. SQLite calls this
** routine to find out if it has reached the end of a result set.
*/
static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
Fts3Cursor *pCsr = (Fts3Cursor*)pCursor;
if( pCsr->isEof ){
fts3ClearCursor(pCsr);
pCsr->isEof = 1;
}
return pCsr->isEof;
}
/*
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. fts3
** exposes %_content.docid as the rowid for the virtual table. The
** rowid should be written to *pRowid.
*/
static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
*pRowid = pCsr->iPrevId;
return SQLITE_OK;
}
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
**
** If:
**
** (iCol < p->nColumn) -> The value of the iCol'th user column.
** (iCol == p->nColumn) -> Magic column with the same name as the table.
** (iCol == p->nColumn+1) -> Docid column
** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
/* The column value supplied by SQLite must be in range. */
assert( iCol>=0 && iCol<=p->nColumn+2 );
switch( iCol-p->nColumn ){
case 0:
/* The special 'table-name' column */
sqlite3_result_pointer(pCtx, pCsr, "fts3cursor", 0);
break;
case 1:
/* The docid column */
sqlite3_result_int64(pCtx, pCsr->iPrevId);
break;
case 2:
if( pCsr->pExpr ){
sqlite3_result_int64(pCtx, pCsr->iLangid);
break;
}else if( p->zLanguageid==0 ){
sqlite3_result_int(pCtx, 0);
break;
}else{
iCol = p->nColumn;
/* fall-through */
}
default:
/* A user column. Or, if this is a full-table scan, possibly the
** language-id column. Seek the cursor. */
rc = fts3CursorSeek(0, pCsr);
if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)-1>iCol ){
sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
}
break;
}
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
return rc;
}
/*
** This function is the implementation of the xUpdate callback used by
** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
** inserted, updated or deleted.
*/
static int fts3UpdateMethod(
sqlite3_vtab *pVtab, /* Virtual table handle */
int nArg, /* Size of argument array */
sqlite3_value **apVal, /* Array of arguments */
sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
){
return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
}
/*
** Implementation of xSync() method. Flush the contents of the pending-terms
** hash-table to the database.
*/
static int fts3SyncMethod(sqlite3_vtab *pVtab){
/* Following an incremental-merge operation, assuming that the input
** segments are not completely consumed (the usual case), they are updated
** in place to remove the entries that have already been merged. This
** involves updating the leaf block that contains the smallest unmerged
** entry and each block (if any) between the leaf and the root node. So
** if the height of the input segment b-trees is N, and input segments
** are merged eight at a time, updating the input segments at the end
** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
** small - often between 0 and 2. So the overhead of the incremental
** merge is somewhere between 8 and 24 blocks. To avoid this overhead
** dwarfing the actual productive work accomplished, the incremental merge
** is only attempted if it will write at least 64 leaf blocks. Hence
** nMinMerge.
**
** Of course, updating the input segments also involves deleting a bunch
** of blocks from the segments table. But this is not considered overhead
** as it would also be required by a crisis-merge that used the same input
#ifdef SQLITE_DEBUG
SQLITE_PRIVATE int sqlite3Fts3Corrupt(){
return SQLITE_CORRUPT_VTAB;
}
#endif
#if !SQLITE_CORE
/*
** Initialize API pointer table, if required.
*/
#ifdef _WIN32
__declspec(dllexport)
#endif
SQLITE_API int sqlite3_fts3_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3Fts3Init(db);
}
#endif
#endif
/************** End of fts3.c ************************************************/
/************** Begin file fts3_aux.c ****************************************/
/*
** 2011 Jan 27
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
/* #include "fts3Int.h" */
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
/* #include <string.h> */
/* #include <assert.h> */
typedef struct Fts3auxTable Fts3auxTable;
typedef struct Fts3auxCursor Fts3auxCursor;
struct Fts3auxTable {
sqlite3_vtab base; /* Base class used by SQLite core */
Fts3Table *pFts3Tab;
};
struct Fts3auxCursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
Fts3MultiSegReader csr; /* Must be right after "base" */
Fts3SegFilter filter;
char *zStop;
int nStop; /* Byte-length of string zStop */
int iLangid; /* Language id to query */
int isEof; /* True if cursor is at EOF */
sqlite3_int64 iRowid; /* Current rowid */
int iCol; /* Current value of 'col' column */
int nStat; /* Size of aStat[] array */
struct Fts3auxColstats {
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
} *aStat;
};
/*
** Schema of the terms table.
*/
#define FTS3_AUX_SCHEMA \
"CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
/*
** This function does all the work for both the xConnect and xCreate methods.
** These tables have no persistent representation of their own, so xConnect
** and xCreate are identical operations.
*/
static int fts3auxConnectMethod(
sqlite3 *db, /* Database connection */
void *pUnused, /* Unused */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
char const *zDb; /* Name of database (e.g. "main") */
char const *zFts3; /* Name of fts3 table */
int nDb; /* Result of strlen(zDb) */
int nFts3; /* Result of strlen(zFts3) */
int nByte; /* Bytes of space to allocate here */
int rc; /* value returned by declare_vtab() */
Fts3auxTable *p; /* Virtual table object to return */
UNUSED_PARAMETER(pUnused);
/* The user should invoke this in one of two forms:
**
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
*/
if( argc!=4 && argc!=5 ) goto bad_args;
zDb = argv[1];
nDb = (int)strlen(zDb);
if( argc==5 ){
if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
zDb = argv[3];
nDb = (int)strlen(zDb);
zFts3 = argv[4];
}else{
goto bad_args;
}
}else{
zFts3 = argv[3];
}
rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
p = (Fts3auxTable *)sqlite3_malloc(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, nByte);
p->pFts3Tab = (Fts3Table *)&p[1];
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
p->pFts3Tab->db = db;
p->pFts3Tab->nIndex = 1;
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
*ppVtab = (sqlite3_vtab *)p;
return SQLITE_OK;
bad_args:
sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
return SQLITE_ERROR;
}
/*
** This function does the work for both the xDisconnect and xDestroy methods.
** These tables have no persistent representation of their own, so xDisconnect
** and xDestroy are identical operations.
*/
static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
Fts3auxTable *p = (Fts3auxTable *)pVtab;
Fts3Table *pFts3 = p->pFts3Tab;
int i;
/* Free any prepared statements held */
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
sqlite3_finalize(pFts3->aStmt[i]);
}
sqlite3_free(pFts3->zSegmentsTbl);
sqlite3_free(p);
return SQLITE_OK;
}
#define FTS4AUX_EQ_CONSTRAINT 1
#define FTS4AUX_GE_CONSTRAINT 2
#define FTS4AUX_LE_CONSTRAINT 4
/*
** xBestIndex - Analyze a WHERE and ORDER BY clause.
*/
static int fts3auxBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
int i;
int iEq = -1;
int iGe = -1;
int iLe = -1;
int iLangid = -1;
int iNext = 1; /* Next free argvIndex value */
UNUSED_PARAMETER(pVTab);
/* This vtab delivers always results in "ORDER BY term ASC" order. */
if( pInfo->nOrderBy==1
&& pInfo->aOrderBy[0].iColumn==0
&& pInfo->aOrderBy[0].desc==0
){
pInfo->orderByConsumed = 1;
}
/* Search for equality and range constraints on the "term" column.
** And equality constraints on the hidden "languageid" column. */
for(i=0; i<pInfo->nConstraint; i++){
if( pInfo->aConstraint[i].usable ){
int op = pInfo->aConstraint[i].op;
int iCol = pInfo->aConstraint[i].iColumn;
if( iCol==0 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
}
if( iCol==4 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
}
}
}
if( iEq>=0 ){
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
pInfo->estimatedCost = 5;
}else{
pInfo->idxNum = 0;
pInfo->estimatedCost = 20000;
if( iGe>=0 ){
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
if( iLe>=0 ){
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
}
if( iLangid>=0 ){
pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
pInfo->estimatedCost--;
}
return SQLITE_OK;
}
/*
** xOpen - Open a cursor.
*/
static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
Fts3auxCursor *pCsr; /* Pointer to cursor object to return */
UNUSED_PARAMETER(pVTab);
pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
if( !pCsr ) return SQLITE_NOMEM;
memset(pCsr, 0, sizeof(Fts3auxCursor));
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
return SQLITE_OK;
}
/*
** xClose - Close a cursor.
*/
static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
sqlite3Fts3SegmentsClose(pFts3);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->zStop);
sqlite3_free(pCsr->aStat);
sqlite3_free(pCsr);
return SQLITE_OK;
}
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
if( nSize>pCsr->nStat ){
struct Fts3auxColstats *aNew;
aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat,
sizeof(struct Fts3auxColstats) * nSize
);
if( aNew==0 ) return SQLITE_NOMEM;
memset(&aNew[pCsr->nStat], 0,
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
);
pCsr->aStat = aNew;
pCsr->nStat = nSize;
}
return SQLITE_OK;
}
/*
** xNext - Advance the cursor to the next row, if any.
*/
static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
** The only difference between state 1 and state 2 is that if the
** integer encountered in state 1 is not 0 or 1, then we need to
** increment the column 0 "nDoc" count for this term.
*/
case 1:
assert( iCol==0 );
if( v>1 ){
pCsr->aStat[1].nDoc++;
}
eState = 2;
/* fall through */
case 2:
if( v==0 ){ /* 0x00. Next integer will be a docid. */
eState = 0;
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
eState = 3;
}else{ /* 2 or greater. A position. */
pCsr->aStat[iCol+1].nOcc++;
pCsr->aStat[0].nOcc++;
}
break;
/* State 3. The integer just read is a column number. */
default: assert( eState==3 );
iCol = (int)v;
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
pCsr->aStat[iCol+1].nDoc++;
eState = 2;
break;
}
}
pCsr->iCol = 0;
rc = SQLITE_OK;
}else{
pCsr->isEof = 1;
}
return rc;
}
/*
** xFilter - Initialize a cursor to point at the start of its data.
*/
static int fts3auxFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
int isScan = 0;
int iLangVal = 0; /* Language id to query */
int iEq = -1; /* Index of term=? value in apVal */
int iGe = -1; /* Index of term>=? value in apVal */
int iLe = -1; /* Index of term<=? value in apVal */
int iLangid = -1; /* Index of languageid=? value in apVal */
int iNext = 0;
UNUSED_PARAMETER(nVal);
UNUSED_PARAMETER(idxStr);
assert( idxStr==0 );
assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
);
if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
iEq = iNext++;
}else{
isScan = 1;
if( idxNum & FTS4AUX_GE_CONSTRAINT ){
iGe = iNext++;
}
if( idxNum & FTS4AUX_LE_CONSTRAINT ){
iLe = iNext++;
}
}
if( iNext<nVal ){
iLangid = iNext++;
}
/* In case this cursor is being reused, close and zero it. */
testcase(pCsr->filter.zTerm);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->aStat);
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
if( iEq>=0 || iGe>=0 ){
const unsigned char *zStr = sqlite3_value_text(apVal[0]);
assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
if( zStr ){
pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
}
}
if( iLe>=0 ){
pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
if( iLangid>=0 ){
iLangVal = sqlite3_value_int(apVal[iLangid]);
/* If the user specified a negative value for the languageid, use zero
** instead. This works, as the "languageid=?" constraint will also
** be tested by the VDBE layer. The test will always be false (since
** this module will not return a row with a negative languageid), and
** so the overall query will return zero rows. */
if( iLangVal<0 ) iLangVal = 0;
}
pCsr->iLangid = iLangVal;
rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
}
if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
return rc;
}
/*
** xEof - Return true if the cursor is at EOF, or false otherwise.
*/
static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
return pCsr->isEof;
}
/*
** xColumn - Return a column value.
*/
static int fts3auxColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
assert( p->isEof==0 );
switch( iCol ){
case 0: /* term */
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
break;
case 1: /* col */
if( p->iCol ){
sqlite3_result_int(pCtx, p->iCol-1);
}else{
sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
}
break;
case 2: /* documents */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
break;
case 3: /* occurrences */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
break;
default: /* languageid */
assert( iCol==4 );
sqlite3_result_int(pCtx, p->iLangid);
break;
}
return SQLITE_OK;
}
/*
** xRowid - Return the current rowid for the cursor.
*/
static int fts3auxRowidMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite_int64 *pRowid /* OUT: Rowid value */
){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
*pRowid = pCsr->iRowid;
return SQLITE_OK;
}
/*
** Register the fts3aux module with database connection db. Return SQLITE_OK
** if successful or an error code if sqlite3_create_module() fails.
*/
SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){
static const sqlite3_module fts3aux_module = {
0, /* iVersion */
fts3auxConnectMethod, /* xCreate */
fts3auxConnectMethod, /* xConnect */
fts3auxBestIndexMethod, /* xBestIndex */
fts3auxDisconnectMethod, /* xDisconnect */
fts3auxDisconnectMethod, /* xDestroy */
fts3auxOpenMethod, /* xOpen */
fts3auxCloseMethod, /* xClose */
fts3auxFilterMethod, /* xFilter */
fts3auxNextMethod, /* xNext */
fts3auxEofMethod, /* xEof */
fts3auxColumnMethod, /* xColumn */
fts3auxRowidMethod, /* xRowid */
0, /* xUpdate */
0, /* xBegin */
0, /* xSync */
0, /* xCommit */
0, /* xRollback */
0, /* xFindFunction */
0, /* xRename */
0, /* xSavepoint */
0, /* xRelease */
0, /* xRollbackTo */
0 /* xShadowName */
};
int rc; /* Return code */
rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
/************** End of fts3_aux.c ********************************************/
/************** Begin file fts3_expr.c ***************************************/
/*
** than an implicit AND. When using the new, both implicity and explicit
** AND operators have a higher precedence than OR.
**
** If compiled with SQLITE_TEST defined, then this module exports the
** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
** to zero causes the module to use the old syntax. If it is set to
** non-zero the new syntax is activated. This is so both syntaxes can
** be tested using a single build of testfixture.
**
** The following describes the syntax supported by the fts3 MATCH
** operator in a similar format to that used by the lemon parser
** generator. This module does not use actually lemon, it uses a
** custom parser.
**
** query ::= andexpr (OR andexpr)*.
**
** andexpr ::= notexpr (AND? notexpr)*.
**
** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
** notexpr ::= LP query RP.
**
** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
**
** distance_opt ::= .
** distance_opt ::= / INTEGER.
**
** phrase ::= TOKEN.
** phrase ::= COLUMN:TOKEN.
** phrase ::= "TOKEN TOKEN TOKEN...".
*/
#ifdef SQLITE_TEST
SQLITE_API int sqlite3_fts3_enable_parentheses = 0;
#else
# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
# define sqlite3_fts3_enable_parentheses 1
# else
# define sqlite3_fts3_enable_parentheses 0
# endif
#endif
/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
/* #include <string.h> */
/* #include <assert.h> */
/*
** isNot:
** This variable is used by function getNextNode(). When getNextNode() is
** called, it sets ParseContext.isNot to true if the 'next node' is a
** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
** zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
int iLangid; /* Language id used with tokenizer */
const char **azCol; /* Array of column names for fts3 table */
int bFts4; /* True to allow FTS4-only syntax */
int nCol; /* Number of entries in azCol[] */
int iDefaultCol; /* Default column to query */
int isNot; /* True if getNextNode() sees a unary - */
sqlite3_context *pCtx; /* Write error message here */
int nNest; /* Number of nested brackets */
};
/*
** This function is equivalent to the standard isspace() function.
**
** The standard isspace() can be awkward to use safely, because although it
** is defined to accept an argument of type int, its behavior when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int fts3isspace(char c){
return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
}
/*
** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
** zero the memory before returning a pointer to it. If unsuccessful,
** return NULL.
*/
static void *fts3MallocZero(int nByte){
void *pRet = sqlite3_malloc(nByte);
if( pRet ) memset(pRet, 0, nByte);
return pRet;
}
SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(
sqlite3_tokenizer *pTokenizer,
int iLangid,
const char *z,
int n,
sqlite3_tokenizer_cursor **ppCsr
){
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
sqlite3_tokenizer_cursor *pCsr = 0;
int rc;
rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
assert( rc==SQLITE_OK || pCsr==0 );
if( rc==SQLITE_OK ){
pCsr->pTokenizer = pTokenizer;
if( pModule->iVersion>=1 ){
rc = pModule->xLanguageid(pCsr, iLangid);
if( rc!=SQLITE_OK ){
pModule->xClose(pCsr);
pCsr = 0;
}
}
}
*ppCsr = pCsr;
return rc;
}
/*
** Function getNextNode(), which is called by fts3ExprParse(), may itself
** call fts3ExprParse(). So this forward declaration is required.
*/
static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
** responsibility of the caller to eventually deallocate the allocated
** Fts3Expr structure (if any) by passing it to sqlite3_free().
**
** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
** fails.
*/
static int getNextToken(
ParseContext *pParse, /* fts3 query parse context */
int iCol, /* Value for Fts3Phrase.iColumn */
const char *z, int n, /* Input string */
Fts3Expr **ppExpr, /* OUT: expression */
int *pnConsumed /* OUT: Number of bytes consumed */
){
sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
int rc;
sqlite3_tokenizer_cursor *pCursor;
Fts3Expr *pRet = 0;
int i = 0;
/* Set variable i to the maximum number of bytes of input to tokenize. */
for(i=0; i<n; i++){
if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
if( z[i]=='"' ) break;
}
*pnConsumed = i;
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
int nByte; /* total space to allocate */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
if( rc==SQLITE_OK ){
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
pRet = (Fts3Expr *)fts3MallocZero(nByte);
if( !pRet ){
rc = SQLITE_NOMEM;
}else{
pRet->eType = FTSQUERY_PHRASE;
pRet->pPhrase = (Fts3Phrase *)&pRet[1];
pRet->pPhrase->nToken = 1;
pRet->pPhrase->iColumn = iCol;
pRet->pPhrase->aToken[0].n = nToken;
pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
if( iEnd<n && z[iEnd]=='*' ){
pRet->pPhrase->aToken[0].isPrefix = 1;
iEnd++;
}
while( 1 ){
if( !sqlite3_fts3_enable_parentheses
&& iStart>0 && z[iStart-1]=='-'
){
pParse->isNot = 1;
iStart--;
}else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
pRet->pPhrase->aToken[0].bFirst = 1;
iStart--;
}else{
break;
}
}
}
*pnConsumed = iEnd;
}else if( i && rc==SQLITE_DONE ){
rc = SQLITE_OK;
}
pModule->xClose(pCursor);
}
*ppExpr = pRet;
return rc;
}
/*
** Enlarge a memory allocation. If an out-of-memory allocation occurs,
** then free the old allocation.
*/
static void *fts3ReallocOrFree(void *pOrig, int nNew){
void *pRet = sqlite3_realloc(pOrig, nNew);
if( !pRet ){
sqlite3_free(pOrig);
}
return pRet;
}
/*
** Buffer zInput, length nInput, contains the contents of a quoted string
** that appeared as part of an fts3 query expression. Neither quote character
** is included in the buffer. This function attempts to tokenize the entire
** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
** containing the results.
**
** If successful, SQLITE_OK is returned and *ppExpr set to point at the
** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
** to 0.
*/
static int getNextString(
ParseContext *pParse, /* fts3 query parse context */
const char *zInput, int nInput, /* Input string */
Fts3Expr **ppExpr /* OUT: expression */
){
sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
int rc;
Fts3Expr *p = 0;
sqlite3_tokenizer_cursor *pCursor = 0;
char *zTemp = 0;
int nTemp = 0;
const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
int nToken = 0;
/* The final Fts3Expr data structure, including the Fts3Phrase,
** Fts3PhraseToken structures token buffers are all stored as a single
** allocation so that the expression can be freed with a single call to
** sqlite3_free(). Setting this up requires a two pass approach.
**
** The first pass, in the block below, uses a tokenizer cursor to iterate
** through the tokens in the expression. This pass uses fts3ReallocOrFree()
** to assemble data in two dynamic buffers:
**
** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
** structure, followed by the array of Fts3PhraseToken
** structures. This pass only populates the Fts3PhraseToken array.
**
** Buffer zTemp: Contains copies of all tokens.
**
** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
** structures.
*/
rc = sqlite3Fts3OpenTokenizer(
pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
if( rc==SQLITE_OK ){
int ii;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
Fts3PhraseToken *pToken;
p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
if( !p ) goto no_mem;
zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
if( !zTemp ) goto no_mem;
assert( nToken==ii );
pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
memset(pToken, 0, sizeof(Fts3PhraseToken));
memcpy(&zTemp[nTemp], zByte, nByte);
nTemp += nByte;
pToken->n = nByte;
pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
nToken = ii+1;
}
}
pModule->xClose(pCursor);
pCursor = 0;
}
if( rc==SQLITE_DONE ){
int jj;
char *zBuf = 0;
p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
if( !p ) goto no_mem;
memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
p->eType = FTSQUERY_PHRASE;
p->pPhrase = (Fts3Phrase *)&p[1];
p->pPhrase->iColumn = pParse->iDefaultCol;
p->pPhrase->nToken = nToken;
zBuf = (char *)&p->pPhrase->aToken[nToken];
if( zTemp ){
memcpy(zBuf, zTemp, nTemp);
sqlite3_free(zTemp);
}else{
assert( nTemp==0 );
}
for(jj=0; jj<p->pPhrase->nToken; jj++){
p->pPhrase->aToken[jj].z = zBuf;
zBuf += p->pPhrase->aToken[jj].n;
}
rc = SQLITE_OK;
}
for(i=0; i<nMaxDepth; i++){
sqlite3Fts3ExprFree(apLeaf[i]);
}
while( (pDel=pFree)!=0 ){
pFree = pDel->pParent;
sqlite3_free(pDel);
}
}
assert( pFree==0 );
sqlite3_free( apLeaf );
}
}else if( eType==FTSQUERY_NOT ){
Fts3Expr *pLeft = pRoot->pLeft;
Fts3Expr *pRight = pRoot->pRight;
pRoot->pLeft = 0;
pRoot->pRight = 0;
pLeft->pParent = 0;
pRight->pParent = 0;
rc = fts3ExprBalance(&pLeft, nMaxDepth-1);
if( rc==SQLITE_OK ){
rc = fts3ExprBalance(&pRight, nMaxDepth-1);
}
if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(pRight);
sqlite3Fts3ExprFree(pLeft);
}else{
assert( pLeft && pRight );
pRoot->pLeft = pLeft;
pLeft->pParent = pRoot;
pRoot->pRight = pRight;
pRight->pParent = pRoot;
}
}
}
if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(pRoot);
pRoot = 0;
}
*pp = pRoot;
return rc;
}
/*
** This function is similar to sqlite3Fts3ExprParse(), with the following
** differences:
**
** 1. It does not do expression rebalancing.
** 2. It does not check that the expression does not exceed the
** maximum allowable depth.
** 3. Even if it fails, *ppExpr may still be set to point to an
** expression tree. It should be deleted using sqlite3Fts3ExprFree()
** in this case.
*/
static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr /* OUT: Parsed query structure */
){
int nParsed;
int rc;
ParseContext sParse;
memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
return SQLITE_OK;
}
if( n<0 ){
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
assert( rc==SQLITE_OK || *ppExpr==0 );
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
}
return rc;
}
/*
** Parameters z and n contain a pointer to and length of a buffer containing
** an fts3 query expression, respectively. This function attempts to parse the
** query expression and create a tree of Fts3Expr structures representing the
** parsed expression. If successful, *ppExpr is set to point to the head
** of the parsed expression tree and SQLITE_OK is returned. If an error
** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
** error) is returned and *ppExpr is set to 0.
**
** If parameter n is a negative number, then z is assumed to point to a
** nul-terminated string and the length is determined using strlen().
**
** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
** use to normalize query tokens while parsing the expression. The azCol[]
** array, which is assumed to contain nCol entries, should contain the names
** of each column in the target fts3 table, in order from left to right.
** Column names must be nul-terminated strings.
**
** The iDefaultCol parameter should be passed the index of the table column
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
SQLITE_PRIVATE int sqlite3Fts3ExprParse(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr, /* OUT: Parsed query structure */
char **pzErr /* OUT: Error message (sqlite3_malloc) */
){
int rc = fts3ExprParseUnbalanced(
pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
);
/* Rebalance the expression. And check that its depth does not exceed
** SQLITE_FTS3_MAX_EXPR_DEPTH. */
if( rc==SQLITE_OK && *ppExpr ){
rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
if( rc==SQLITE_OK ){
rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
}
}
if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
if( rc==SQLITE_TOOBIG ){
sqlite3Fts3ErrMsg(pzErr,
"FTS expression tree is too large (maximum depth %d)",
SQLITE_FTS3_MAX_EXPR_DEPTH
);
rc = SQLITE_ERROR;
}else if( rc==SQLITE_ERROR ){
sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z);
}
}
return rc;
}
/*
** Free a single node of an expression tree.
*/
static void fts3FreeExprNode(Fts3Expr *p){
assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
sqlite3_free(p->aMI);
sqlite3_free(p);
}
/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
**
** This function would be simpler if it recursively called itself. But
** that would mean passing a sufficiently large expression to ExprParse()
** could cause a stack overflow.
*/
SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){
Fts3Expr *p;
assert( pDel==0 || pDel->pParent==0 );
for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
}
while( p ){
Fts3Expr *pParent = p->pParent;
fts3FreeExprNode(p);
if( pParent && p==pParent->pLeft && pParent->pRight ){
p = pParent->pRight;
while( p && (p->pLeft || p->pRight) ){
assert( p==p->pParent->pRight || p==p->pParent->pLeft );
p = (p->pLeft ? p->pLeft : p->pRight);
}
if( *pRC ) return;
rc = fts3SqlStmt(p, eStmt, &pStmt, apVal);
if( rc==SQLITE_OK ){
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
}
*pRC = rc;
}
/*
** This function ensures that the caller has obtained an exclusive
** shared-cache table-lock on the %_segdir table. This is required before
** writing data to the fts3 table. If this lock is not acquired first, then
** the caller may end up attempting to take this lock as part of committing
** a transaction, causing SQLite to return SQLITE_LOCKED or
** LOCKED_SHAREDCACHEto a COMMIT command.
**
** It is best to avoid this because if FTS3 returns any error when
** committing a transaction, the whole transaction will be rolled back.
** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE.
** It can still happen if the user locks the underlying tables directly
** instead of accessing them via FTS.
*/
static int fts3Writelock(Fts3Table *p){
int rc = SQLITE_OK;
if( p->nPendingData==0 ){
sqlite3_stmt *pStmt;
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_null(pStmt, 1);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
}
}
return rc;
}
/*
** FTS maintains a separate indexes for each language-id (a 32-bit integer).
** Within each language id, a separate index is maintained to store the
** document terms, and each configured prefix size (configured the FTS
** "prefix=" option). And each index consists of multiple levels ("relative
** levels").
**
** All three of these values (the language id, the specific index and the
** level within the index) are encoded in 64-bit integer values stored
** in the %_segdir table on disk. This function is used to convert three
** separate component values into the single 64-bit integer value that
** can be used to query the %_segdir table.
**
** Specifically, each language-id/index combination is allocated 1024
** 64-bit integer level values ("absolute levels"). The main terms index
** for language-id 0 is allocate values 0-1023. The first prefix index
** (if any) for language-id 0 is allocated values 1024-2047. And so on.
** Language 1 indexes are allocated immediately following language 0.
**
** So, for a system with nPrefix prefix indexes configured, the block of
** absolute levels that corresponds to language-id iLangid and index
** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
*/
static sqlite3_int64 getAbsoluteLevel(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index in p->aIndex[] */
int iLevel /* Level of segments */
){
sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */
assert( iLangid>=0 );
assert( p->nIndex>0 );
assert( iIndex>=0 && iIndex<p->nIndex );
iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
return iBase + iLevel;
}
/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement,
** return an SQLite error code.
**
** There is only ever one instance of this SQL statement compiled for
** each FTS3 table.
**
** The statement returns the following columns from the %_segdir table:
**
** 0: idx
** 1: start_block
** 2: leaves_end_block
** 3: end_block
** 4: root
*/
SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(
Fts3Table *p, /* FTS3 table */
int iLangid, /* Language being queried */
int iIndex, /* Index for p->aIndex[] */
int iLevel, /* Level to select (relative level) */
sqlite3_stmt **ppStmt /* OUT: Compiled statement */
){
int rc;
sqlite3_stmt *pStmt = 0;
assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
if( iLevel<0 ){
/* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
}
}
*ppStmt = pStmt;
return rc;
}
/*
** Append a single varint to a PendingList buffer. SQLITE_OK is returned
** if successful, or an SQLite error code otherwise.
**
** This function also serves to allocate the PendingList structure itself.
** For example, to create a new PendingList structure containing two
** varints:
**
** PendingList *p = 0;
** fts3PendingListAppendVarint(&p, 1);
** fts3PendingListAppendVarint(&p, 2);
*/
static int fts3PendingListAppendVarint(
PendingList **pp, /* IN/OUT: Pointer to PendingList struct */
sqlite3_int64 i /* Value to append to data */
){
PendingList *p = *pp;
/* Allocate or grow the PendingList as required. */
if( !p ){
p = sqlite3_malloc(sizeof(*p) + 100);
if( !p ){
return SQLITE_NOMEM;
}
p->nSpace = 100;
p->aData = (char *)&p[1];
p->nData = 0;
}
else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
int nNew = p->nSpace * 2;
p = sqlite3_realloc(p, sizeof(*p) + nNew);
if( !p ){
sqlite3_free(*pp);
*pp = 0;
return SQLITE_NOMEM;
}
p->nSpace = nNew;
p->aData = (char *)&p[1];
}
/* Append the new serialized varint to the end of the list. */
p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
p->aData[p->nData] = '\0';
*pp = p;
return SQLITE_OK;
}
/*
** Add a docid/column/position entry to a PendingList structure. Non-zero
** is returned if the structure is sqlite3_realloced as part of adding
** the entry. Otherwise, zero.
**
** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
pendinglistappend_out:
*pRc = rc;
if( p!=*pp ){
*pp = p;
return 1;
}
return 0;
}
/*
** Free a PendingList object allocated by fts3PendingListAppend().
*/
static void fts3PendingListDelete(PendingList *pList){
sqlite3_free(pList);
}
/*
** Add an entry to one of the pending-terms hash tables.
*/
static int fts3PendingTermsAddOne(
Fts3Table *p,
int iCol,
int iPos,
Fts3Hash *pHash, /* Pending terms hash table to add entry to */
const char *zToken,
int nToken
){
PendingList *pList;
int rc = SQLITE_OK;
pList = (PendingList *)fts3HashFind(pHash, zToken, nToken);
if( pList ){
p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
}
if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){
/* Malloc failed while inserting the new entry. This can only
** happen if there was no previous entry for this token.
*/
assert( 0==fts3HashFind(pHash, zToken, nToken) );
sqlite3_free(pList);
rc = SQLITE_NOMEM;
}
}
if( rc==SQLITE_OK ){
p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
}
return rc;
}
/*
** Tokenize the nul-terminated string zText and add all tokens to the
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
Fts3Table *p, /* Table into which text will be inserted */
int iLangid, /* Language id to use */
const char *zText, /* Text of document to be inserted */
int iCol, /* Column into which text is being inserted */
u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
){
int rc;
int iStart = 0;
int iEnd = 0;
int iPos = 0;
int nWord = 0;
char const *zToken;
int nToken = 0;
sqlite3_tokenizer *pTokenizer = p->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
sqlite3_tokenizer_cursor *pCsr;
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char**,int*,int*,int*,int*);
assert( pTokenizer && pModule );
/* If the user has inserted a NULL value, this function may be called with
** zText==0. In this case, add zero token entries to the hash table and
** return early. */
if( zText==0 ){
*pnWord = 0;
return SQLITE_OK;
}
rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
if( rc!=SQLITE_OK ){
return rc;
}
xNext = pModule->xNext;
while( SQLITE_OK==rc
&& SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
){
int i;
if( iPos>=nWord ) nWord = iPos+1;
/* Positions cannot be negative; we use -1 as a terminator internally.
** Tokens must have a non-zero length.
*/
if( iPos<0 || !zToken || nToken<=0 ){
rc = SQLITE_ERROR;
break;
}
/* Add the term to the terms index */
rc = fts3PendingTermsAddOne(
p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken
);
/* Add the term to each of the prefix indexes that it is not too
** short for. */
for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){
struct Fts3Index *pIndex = &p->aIndex[i];
if( nToken<pIndex->nPrefix ) continue;
rc = fts3PendingTermsAddOne(
p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix
);
}
}
pModule->xClose(pCsr);
*pnWord += nWord;
return (rc==SQLITE_DONE ? SQLITE_OK : rc);
}
/*
** Calling this function indicates that subsequent calls to
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(
Fts3Table *p, /* Full-text table handle */
int bDelete, /* True if this op is a delete */
int iLangid, /* Language id of row being written */
sqlite_int64 iDocid /* Docid of row being written */
){
assert( iLangid>=0 );
assert( bDelete==1 || bDelete==0 );
/* TODO(shess) Explore whether partially flushing the buffer on
** forced-flush would provide better performance. I suspect that if
** we ordered the doclists by size and flushed the largest until the
** buffer was half empty, that would let the less frequent terms
** generate longer doclists.
*/
if( iDocid<p->iPrevDocid
|| (iDocid==p->iPrevDocid && p->bPrevDelete==0)
|| p->iPrevLangid!=iLangid
|| p->nPendingData>p->nMaxPendingData
){
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc!=SQLITE_OK ) return rc;
}
p->iPrevDocid = iDocid;
p->iPrevLangid = iLangid;
p->bPrevDelete = bDelete;
return SQLITE_OK;
}
/*
** Discard the contents of the pending-terms hash tables.
*/
SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){
int i;
for(i=0; i<p->nIndex; i++){
Fts3HashElem *pElem;
Fts3Hash *pHash = &p->aIndex[i].hPending;
for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){
PendingList *pList = (PendingList *)fts3HashData(pElem);
fts3PendingListDelete(pList);
}
fts3HashClear(pHash);
}
p->nPendingData = 0;
}
/*
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(
Fts3Table *p,
int iLangid,
sqlite3_value **apVal,
u32 *aSz
){
int i; /* Iterator variable */
for(i=2; i<p->nColumn+2; i++){
int iCol = i-2;
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_value_text(apVal[i]);
int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]);
if( rc!=SQLITE_OK ){
return rc;
}
aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
}
}
return SQLITE_OK;
}
/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
** SQLite to the xUpdate() method. i.e:
**
** apVal[0] Not used for INSERT.
** apVal[1] rowid
** apVal[2] Left-most user-defined column
** ...
** apVal[p->nColumn+1] Right-most user-defined column
** apVal[p->nColumn+2] Hidden column with same name as table
** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
** apVal[p->nColumn+4] Hidden languageid column
*/
static int fts3InsertData(
Fts3Table *p, /* Full-text table */
sqlite3_value **apVal, /* Array of values to insert */
sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */
){
int rc; /* Return code */
sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
if( p->zContentTbl ){
sqlite3_value *pRowid = apVal[p->nColumn+3];
if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
pRowid = apVal[1];
}
if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
return SQLITE_CONSTRAINT;
}
*piDocid = sqlite3_value_int64(pRowid);
return SQLITE_OK;
}
/* Locate the statement handle used to insert data into the %_content
** table. The SQL for this statement is:
**
** INSERT INTO %_content VALUES(?, ?, ?, ...)
**
** The statement features N '?' variables, where N is the number of user
** defined columns in the FTS3 table, plus one for the docid field.
*/
rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
if( rc==SQLITE_OK && p->zLanguageid ){
rc = sqlite3_bind_int(
pContentInsert, p->nColumn+2,
sqlite3_value_int(apVal[p->nColumn+4])
);
}
if( rc!=SQLITE_OK ) return rc;
** a value for the "rowid" field, for the "docid" field, or for both.
** Which is a problem, since "rowid" and "docid" are aliases for the
** same value. For example:
**
** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
**
** In FTS3, this is an error. It is an error to specify non-NULL values
** for both docid and some other rowid alias.
*/
if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
if( SQLITE_NULL==sqlite3_value_type(apVal[0])
&& SQLITE_NULL!=sqlite3_value_type(apVal[1])
){
/* A rowid/docid conflict. */
return SQLITE_ERROR;
}
rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
if( rc!=SQLITE_OK ) return rc;
}
/* Execute the statement to insert the record. Set *piDocid to the
** new docid value.
*/
sqlite3_step(pContentInsert);
rc = sqlite3_reset(pContentInsert);
*piDocid = sqlite3_last_insert_rowid(p->db);
return rc;
}
/*
** Remove all data from the FTS3 table. Clear the hash table containing
** pending terms.
*/
static int fts3DeleteAll(Fts3Table *p, int bContent){
int rc = SQLITE_OK; /* Return code */
/* Discard the contents of the pending-terms hash table. */
sqlite3Fts3PendingTermsClear(p);
/* Delete everything from the shadow tables. Except, leave %_content as
** is if bContent is false. */
assert( p->zContentTbl==0 || bContent==0 );
if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
if( p->bHasDocsize ){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
}
if( p->bHasStat ){
fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
}
return rc;
}
/*
**
*/
static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
int iLangid = 0;
if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
return iLangid;
}
/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms(
int *pRC, /* Result code */
Fts3Table *p, /* The FTS table to delete from */
sqlite3_value *pRowid, /* The docid to be deleted */
u32 *aSz, /* Sizes of deleted document written here */
int *pbFound /* OUT: Set to true if row really does exist */
){
int rc;
sqlite3_stmt *pSelect;
assert( *pbFound==0 );
if( *pRC ) return;
rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pSelect) ){
int i;
int iLangid = langidFromSelect(p, pSelect);
i64 iDocid = sqlite3_column_int64(pSelect, 0);
rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid);
for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
int iCol = i-1;
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_column_text(pSelect, i);
rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
}
}
if( rc!=SQLITE_OK ){
sqlite3_reset(pSelect);
*pRC = rc;
return;
}
*pbFound = 1;
}
rc = sqlite3_reset(pSelect);
}else{
sqlite3_reset(pSelect);
}
*pRC = rc;
}
/*
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
static int fts3SegmentMerge(Fts3Table *, int, int, int);
/*
** This function allocates a new level iLevel index in the segdir table.
** Usually, indexes are allocated within a level sequentially starting
** with 0, so the allocated index is one greater than the value returned
** by:
**
** SELECT max(idx) FROM %_segdir WHERE level = :iLevel
**
** However, if there are already FTS3_MERGE_COUNT indexes at the requested
** level, they are merged into a single level (iLevel+1) segment and the
** allocated index is 0.
**
** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
** returned. Otherwise, an SQLite error code is returned.
*/
static int fts3AllocateSegdirIdx(
Fts3Table *p,
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel,
int *piIdx
){
int rc; /* Return Code */
sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
int iNext = 0; /* Result of query pNextIdx */
assert( iLangid>=0 );
assert( p->nIndex>=1 );
/* Set variable iNext to the next available segdir index at level iLevel. */
rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(
pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
);
if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
iNext = sqlite3_column_int(pNextIdx, 0);
}
rc = sqlite3_reset(pNextIdx);
}
if( rc==SQLITE_OK ){
/* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
** full, merge all segments in level iLevel into a single iLevel+1
** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
*/
if( iNext>=FTS3_MERGE_COUNT ){
fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
*piIdx = 0;
}else{
*piIdx = iNext;
}
}
return rc;
}
/*
** The %_segments table is declared as follows:
**
** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
**
** This function reads data from a single row of the %_segments table. The
** specific row is identified by the iBlockid parameter. If paBlob is not
** NULL, then a buffer is allocated using sqlite3_malloc() and populated
** with the contents of the blob stored in the "block" column of the
** identified table row is. Whether or not paBlob is NULL, *pnBlob is set
** to the size of the blob in bytes before returning.
**
** If an error occurs, or the table does not contain the specified row,
** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If
** paBlob is non-NULL, then it is the responsibility of the caller to
** eventually free the returned buffer.
**
** This function may leave an open sqlite3_blob* handle in the
** Fts3Table.pSegments variable. This handle is reused by subsequent calls
** to this function. The handle may be closed by calling the
** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy
** performance improvement, but the blob handle should always be closed
** before control is returned to the user (to prevent a lock being held
** on the database file for longer than necessary). Thus, any virtual table
** method (xFilter etc.) that may directly or indirectly call this function
** must call sqlite3Fts3SegmentsClose() before returning.
*/
SQLITE_PRIVATE int sqlite3Fts3ReadBlock(
Fts3Table *p, /* FTS3 table handle */
sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */
char **paBlob, /* OUT: Blob data in malloc'd buffer */
int *pnBlob, /* OUT: Size of blob data */
int *pnLoad /* OUT: Bytes actually loaded */
){
int rc; /* Return code */
/* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
assert( pnBlob );
if( p->pSegments ){
rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
}else{
if( 0==p->zSegmentsTbl ){
p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName);
if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM;
}
rc = sqlite3_blob_open(
p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
);
}
/* The entire tree fits on the root node. Write it to the segdir table. */
rc = fts3WriteSegdir(p, iLevel, iIdx,
0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);
}
p->nLeafAdd++;
return rc;
}
/*
** Release all memory held by the SegmentWriter object passed as the
** first argument.
*/
static void fts3SegWriterFree(SegmentWriter *pWriter){
if( pWriter ){
sqlite3_free(pWriter->aData);
sqlite3_free(pWriter->zMalloc);
fts3NodeFree(pWriter->pTree);
sqlite3_free(pWriter);
}
}
/*
** The first value in the apVal[] array is assumed to contain an integer.
** This function tests if there exist any documents with docid values that
** are different from that integer. i.e. if deleting the document with docid
** pRowid would mean the FTS3 table were empty.
**
** If successful, *pisEmpty is set to true if the table is empty except for
** document pRowid, or false otherwise, and SQLITE_OK is returned. If an
** error occurs, an SQLite error code is returned.
*/
static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
sqlite3_stmt *pStmt;
int rc;
if( p->zContentTbl ){
/* If using the content=xxx option, assume the table is never empty */
*pisEmpty = 0;
rc = SQLITE_OK;
}else{
rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pisEmpty = sqlite3_column_int(pStmt, 0);
}
rc = sqlite3_reset(pStmt);
}
}
return rc;
}
/*
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(
Fts3Table *p,
int iLangid,
int iIndex,
sqlite3_int64 *pnMax
){
sqlite3_stmt *pStmt;
int rc;
assert( iIndex>=0 && iIndex<p->nIndex );
/* Set pStmt to the compiled version of:
**
** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
**
** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
*/
rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pnMax = sqlite3_column_int64(pStmt, 0);
}
return sqlite3_reset(pStmt);
}
/*
** iAbsLevel is an absolute level that may be assumed to exist within
** the database. This function checks if it is the largest level number
** within its index. Assuming no error occurs, *pbMax is set to 1 if
** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK
** is returned. If an error occurs, an error code is returned and the
** final value of *pbMax is undefined.
*/
static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){
/* Set pStmt to the compiled version of:
**
** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
**
** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
*/
sqlite3_stmt *pStmt;
int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
sqlite3_bind_int64(pStmt, 1, iAbsLevel+1);
sqlite3_bind_int64(pStmt, 2,
((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL
);
*pbMax = 0;
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL;
}
return sqlite3_reset(pStmt);
}
/*
** Delete all entries in the %_segments table associated with the segment
** opened with seg-reader pSeg. This function does not affect the contents
** of the %_segdir table.
*/
static int fts3DeleteSegment(
Fts3Table *p, /* FTS table handle */
Fts3SegReader *pSeg /* Segment to delete */
){
int rc = SQLITE_OK; /* Return code */
if( pSeg->iStartBlock ){
sqlite3_stmt *pDelete; /* SQL statement to delete rows */
rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock);
sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock);
sqlite3_step(pDelete);
rc = sqlite3_reset(pDelete);
}
}
return rc;
}
/*
** This function is used after merging multiple segments into a single large
** segment to delete the old, now redundant, segment b-trees. Specifically,
** it:
**
** 1) Deletes all %_segments entries for the segments associated with
** each of the SegReader objects in the array passed as the third
** argument, and
**
** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
** entries regardless of level if (iLevel<0).
**
** SQLITE_OK is returned if successful, otherwise an SQLite error code.
*/
static int fts3DeleteSegdir(
Fts3Table *p, /* Virtual table handle */
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel, /* Level of %_segdir entries to delete */
Fts3SegReader **apSegment, /* Array of SegReader objects */
int nReader /* Size of array apSegment */
){
int rc = SQLITE_OK; /* Return Code */
int i; /* Iterator variable */
sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */
for(i=0; rc==SQLITE_OK && i<nReader; i++){
rc = fts3DeleteSegment(p, apSegment[i]);
}
if( rc!=SQLITE_OK ){
return rc;
}
assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
if( iLevel==FTS3_SEGCURSOR_ALL ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int64(pDelete, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(
pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
);
}
}
if( rc==SQLITE_OK ){
sqlite3_step(pDelete);
rc = sqlite3_reset(pDelete);
}
return rc;
}
/*
** When this function is called, buffer *ppList (size *pnList bytes) contains
** a position list that may (or may not) feature multiple columns. This
** function adjusts the pointer *ppList and the length *pnList so that they
** identify the subset of the position list that corresponds to column iCol.
**
** If there are no entries in the input position list for column iCol, then
** *pnList is set to zero before returning.
**
** If parameter bZero is non-zero, then any part of the input list following
** the end of the output list is zeroed before returning.
*/
static void fts3ColumnFilter(
int iCol, /* Column to filter on */
int bZero, /* Zero out anything following *ppList */
char **ppList, /* IN/OUT: Pointer to position list */
int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
){
char *pList = *ppList;
int nList = *pnList;
char *pEnd = &pList[nList];
int iCurrent = 0;
char *p = pList;
assert( iCol>=0 );
while( 1 ){
char c = 0;
while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
if( iCol==iCurrent ){
nList = (int)(p - pList);
break;
}
nList -= (int)(p - pList);
pList = p;
if( nList==0 ){
break;
}
p = &pList[1];
p += fts3GetVarint32(p, &iCurrent);
}
if( bZero && &pList[nList]!=pEnd ){
memset(&pList[nList], 0, pEnd - &pList[nList]);
}
*ppList = pList;
*pnList = nList;
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0);
}
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0);
}
if( rc==SQLITE_OK ){
/* Loop through all %_segdir entries for segments in this index with
** levels equal to or greater than iAbsLevel. As each entry is visited,
** updated it to set (level = -1) and (idx = N), where N is 0 for the
** oldest segment in the range, 1 for the next oldest, and so on.
**
** In other words, move all segments being promoted to level -1,
** setting the "idx" fields as appropriate to keep them in the same
** order. The contents of level -1 (which is never used, except
** transiently here), will be moved back to level iAbsLevel below. */
sqlite3_bind_int64(pRange, 1, iAbsLevel);
while( SQLITE_ROW==sqlite3_step(pRange) ){
sqlite3_bind_int(pUpdate1, 1, iIdx++);
sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0));
sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1));
sqlite3_step(pUpdate1);
rc = sqlite3_reset(pUpdate1);
if( rc!=SQLITE_OK ){
sqlite3_reset(pRange);
break;
}
}
}
if( rc==SQLITE_OK ){
rc = sqlite3_reset(pRange);
}
/* Move level -1 to level iAbsLevel */
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pUpdate2, 1, iAbsLevel);
sqlite3_step(pUpdate2);
rc = sqlite3_reset(pUpdate2);
}
}
}
return rc;
}
/*
** Merge all level iLevel segments in the database into a single
** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
** single segment with a level equal to the numerically largest level
** currently present in the database.
**
** If this function is called with iLevel<0, but there is only one
** segment in the database, SQLITE_DONE is returned immediately.
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
** an SQLite error code is returned.
*/
static int fts3SegmentMerge(
Fts3Table *p,
int iLangid, /* Language id to merge */
int iIndex, /* Index in p->aIndex[] to merge */
int iLevel /* Level to merge */
){
int rc; /* Return code */
int iIdx = 0; /* Index of new segment */
sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
i64 iMaxLevel = 0; /* Max level number for this index/langid */
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
|| iLevel>=0
);
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
if( rc!=SQLITE_OK ) goto finished;
}
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the numerically
** greatest segment level currently present in the database for this
** index. The idx of the new segment is always 0. */
if( csr.nSegment==1 && 0==fts3SegReaderIsPending(csr.apSegment[0]) ){
rc = SQLITE_DONE;
goto finished;
}
iNewLevel = iMaxLevel;
bIgnoreEmpty = 1;
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
assert( FTS3_SEGCURSOR_PENDING==-1 );
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
while( SQLITE_OK==rc ){
rc = sqlite3Fts3SegReaderStep(p, &csr);
if( rc!=SQLITE_ROW ) break;
rc = fts3SegWriterAdd(p, &pWriter, 1,
csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
}
if( rc!=SQLITE_OK ) goto finished;
assert( pWriter || bIgnoreEmpty );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(
p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
);
if( rc!=SQLITE_OK ) goto finished;
}
if( pWriter ){
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
}
}
}
finished:
fts3SegWriterFree(pWriter);
sqlite3Fts3SegReaderFinish(&csr);
return rc;
}
/*
** Flush the contents of pendingTerms to level 0 segments.
*/
SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
int i;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
sqlite3Fts3PendingTermsClear(p);
/* Determine the auto-incr-merge setting if unknown. If enabled,
** estimate the number of leaf blocks of content to be written
*/
if( rc==SQLITE_OK && p->bHasStat
&& p->nAutoincrmerge==0xff && p->nLeafAdd>0
){
sqlite3_stmt *pStmt = 0;
rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
rc = sqlite3_step(pStmt);
if( rc==SQLITE_ROW ){
p->nAutoincrmerge = sqlite3_column_int(pStmt, 0);
if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8;
}else if( rc==SQLITE_DONE ){
p->nAutoincrmerge = 0;
}
rc = sqlite3_reset(pStmt);
}
}
return rc;
}
/*
** Encode N integers as varints into a blob.
*/
static void fts3EncodeIntArray(
int N, /* The number of integers to encode */
u32 *a, /* The integer values */
char *zBuf, /* Write the BLOB here */
int *pNBuf /* Write number of bytes if zBuf[] used here */
){
int i, j;
for(i=j=0; i<N; i++){
j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
}
*pNBuf = j;
}
/*
** Decode a blob of varints into N integers
*/
static void fts3DecodeIntArray(
int N, /* The number of integers to decode */
u32 *a, /* Write the integer values */
const char *zBuf, /* The BLOB containing the varints */
int nBuf /* size of the BLOB */
){
int i, j;
UNUSED_PARAMETER(nBuf);
for(i=j=0; i<N; i++){
sqlite3_int64 x;
j += sqlite3Fts3GetVarint(&zBuf[j], &x);
assert(j<=nBuf);
a[i] = (u32)(x & 0xffffffff);
}
const int nStat = p->nColumn+2;
if( *pRC ) return;
a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
if( a==0 ){
*pRC = SQLITE_NOMEM;
return;
}
pBlob = (char*)&a[nStat];
rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc ){
sqlite3_free(a);
*pRC = rc;
return;
}
sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
if( sqlite3_step(pStmt)==SQLITE_ROW ){
fts3DecodeIntArray(nStat, a,
sqlite3_column_blob(pStmt, 0),
sqlite3_column_bytes(pStmt, 0));
}else{
memset(a, 0, sizeof(u32)*(nStat) );
}
rc = sqlite3_reset(pStmt);
if( rc!=SQLITE_OK ){
sqlite3_free(a);
*pRC = rc;
return;
}
if( nChng<0 && a[0]<(u32)(-nChng) ){
a[0] = 0;
}else{
a[0] += nChng;
}
for(i=0; i<p->nColumn+1; i++){
u32 x = a[i+1];
if( x+aSzIns[i] < aSzDel[i] ){
x = 0;
}else{
x = x + aSzIns[i] - aSzDel[i];
}
a[i+1] = x;
}
fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
if( rc ){
sqlite3_free(a);
*pRC = rc;
return;
}
sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
sqlite3_step(pStmt);
*pRC = sqlite3_reset(pStmt);
sqlite3_bind_null(pStmt, 2);
sqlite3_free(a);
}
/*
** Merge the entire database so that there is one segment for each
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
int bSeenDone = 0;
int rc;
sqlite3_stmt *pAllLangid = 0;
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
int i;
int iLangid = sqlite3_column_int(pAllLangid, 0);
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
if( rc==SQLITE_DONE ){
bSeenDone = 1;
rc = SQLITE_OK;
}
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
sqlite3Fts3SegmentsClose(p);
sqlite3Fts3PendingTermsClear(p);
return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}
/*
** This function is called when the user executes the following statement:
**
** INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
**
** The entire FTS index is discarded and rebuilt. If the table is one
** created using the content=xxx option, then the new index is based on
** the current contents of the xxx table. Otherwise, it is rebuilt based
** on the contents of the %_content table.
*/
static int fts3DoRebuild(Fts3Table *p){
int rc; /* Return Code */
rc = fts3DeleteAll(p, 0);
if( rc==SQLITE_OK ){
u32 *aSz = 0;
u32 *aSzIns = 0;
u32 *aSzDel = 0;
sqlite3_stmt *pStmt = 0;
int nEntry = 0;
/* Compose and prepare an SQL statement to loop through the content table */
char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
sqlite3_free(zSql);
}
if( rc==SQLITE_OK ){
int nByte = sizeof(u32) * (p->nColumn+1)*3;
aSz = (u32 *)sqlite3_malloc(nByte);
if( aSz==0 ){
rc = SQLITE_NOMEM;
}else{
memset(aSz, 0, nByte);
aSzIns = &aSz[p->nColumn+1];
aSzDel = &aSzIns[p->nColumn+1];
}
}
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
int iCol;
int iLangid = langidFromSelect(p, pStmt);
rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0));
memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
if( p->abNotindexed[iCol]==0 ){
const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
}
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSz);
}
if( rc!=SQLITE_OK ){
sqlite3_finalize(pStmt);
pStmt = 0;
}else{
nEntry++;
for(iCol=0; iCol<=p->nColumn; iCol++){
aSzIns[iCol] += aSz[iCol];
}
}
}
if( p->bFts4 ){
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
}
sqlite3_free(aSz);
if( pStmt ){
int rc2 = sqlite3_finalize(pStmt);
if( rc==SQLITE_OK ){
rc = rc2;
}
}
}
return rc;
}
/*
** This function opens a cursor used to read the input data for an
** incremental merge operation. Specifically, it opens a cursor to scan
** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute
** level iAbsLevel.
*/
static int fts3IncrmergeCsr(
Fts3Table *p, /* FTS3 table handle */
sqlite3_int64 iAbsLevel, /* Absolute level to open */
int nSeg, /* Number of segments to merge */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
int rc; /* Return Code */
sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */
int nByte; /* Bytes allocated at pCsr->apSegment[] */
/* Allocate space for the Fts3MultiSegReader.aCsr[] array */
memset(pCsr, 0, sizeof(*pCsr));
nByte = sizeof(Fts3SegReader *) * nSeg;
pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
if( pCsr->apSegment==0 ){
rc = SQLITE_NOMEM;
}else{
memset(pCsr->apSegment, 0, nByte);
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
}
if( z[0]==',' && z[1]!='\0' ){
z++;
nMin = fts3Getint(&z);
}
if( z[0]!='\0' || nMin<2 ){
rc = SQLITE_ERROR;
}else{
rc = SQLITE_OK;
if( !p->bHasStat ){
assert( p->bFts4==0 );
sqlite3Fts3CreateStatTable(&rc, p);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
}
sqlite3Fts3SegmentsClose(p);
}
return rc;
}
/*
** Process statements of the form:
**
** INSERT INTO table(table) VALUES('automerge=X');
**
** where X is an integer. X==0 means to turn automerge off. X!=0 means
** turn it on. The setting is persistent.
*/
static int fts3DoAutoincrmerge(
Fts3Table *p, /* FTS3 table handle */
const char *zParam /* Nul-terminated string containing boolean */
){
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
p->nAutoincrmerge = fts3Getint(&zParam);
if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){
p->nAutoincrmerge = 8;
}
if( !p->bHasStat ){
assert( p->bFts4==0 );
sqlite3Fts3CreateStatTable(&rc, p);
if( rc ) return rc;
}
rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
if( rc ) return rc;
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
return rc;
}
/*
** Return a 64-bit checksum for the FTS index entry specified by the
** arguments to this function.
*/
static u64 fts3ChecksumEntry(
const char *zTerm, /* Pointer to buffer containing term */
int nTerm, /* Size of zTerm in bytes */
int iLangid, /* Language id for current row */
int iIndex, /* Index (0..Fts3Table.nIndex-1) */
i64 iDocid, /* Docid for current row. */
int iCol, /* Column number */
int iPos /* Position */
){
int i;
u64 ret = (u64)iDocid;
ret += (ret<<3) + iLangid;
ret += (ret<<3) + iIndex;
ret += (ret<<3) + iCol;
ret += (ret<<3) + iPos;
for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
return ret;
}
/*
** Return a checksum of all entries in the FTS index that correspond to
** language id iLangid. The checksum is calculated by XORing the checksums
** of each individual entry (see fts3ChecksumEntry()) together.
**
** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
** return value is undefined in this case.
*/
static u64 fts3ChecksumIndex(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id to return cksum for */
int iIndex, /* Index to cksum (0..p->nIndex-1) */
int *pRc /* OUT: Return code */
){
Fts3SegFilter filter;
Fts3MultiSegReader csr;
int rc;
u64 cksum = 0;
assert( *pRc==SQLITE_OK );
memset(&filter, 0, sizeof(filter));
memset(&csr, 0, sizeof(csr));
filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
filter.flags |= FTS3_SEGMENT_SCAN;
rc = sqlite3Fts3SegReaderCursor(
p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
}
if( rc==SQLITE_OK ){
while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
char *pCsr = csr.aDoclist;
char *pEnd = &pCsr[csr.nDoclist];
i64 iDocid = 0;
i64 iCol = 0;
i64 iPos = 0;
pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
while( pCsr<pEnd ){
i64 iVal = 0;
pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
if( pCsr<pEnd ){
if( iVal==0 || iVal==1 ){
iCol = 0;
iPos = 0;
if( iVal ){
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
}else{
pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
iDocid += iVal;
}
}else{
iPos += (iVal - 2);
cksum = cksum ^ fts3ChecksumEntry(
csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
(int)iCol, (int)iPos
);
}
}
}
}
}
sqlite3Fts3SegReaderFinish(&csr);
*pRc = rc;
return cksum;
}
/*
** Check if the contents of the FTS index match the current contents of the
** content table. If no error occurs and the contents do match, set *pbOk
** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
** to false before returning.
**
** If an error occurs (e.g. an OOM or IO error), return an SQLite error
** code. The final value of *pbOk is undefined in this case.
*/
static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
int rc = SQLITE_OK; /* Return code */
u64 cksum1 = 0; /* Checksum based on FTS index contents */
u64 cksum2 = 0; /* Checksum based on %_content contents */
sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
/* This block calculates the checksum according to the FTS index. */
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
int iLangid = sqlite3_column_int(pAllLangid, 0);
int i;
for(i=0; i<p->nIndex; i++){
cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
/* This block calculates the checksum according to the %_content table */
if( rc==SQLITE_OK ){
sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
sqlite3_stmt *pStmt = 0;
char *zSql;
zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
sqlite3_free(zSql);
}
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iDocid = sqlite3_column_int64(pStmt, 0);
int iLang = langidFromSelect(p, pStmt);
int iCol;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
int nText = sqlite3_column_bytes(pStmt, iCol+1);
sqlite3_tokenizer_cursor *pT = 0;
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken = 0; /* Number of bytes in token */
int iDum1 = 0, iDum2 = 0; /* Dummy variables */
int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
if( rc==SQLITE_OK ){
int i;
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, nToken, iLang, 0, iDocid, iCol, iPos
);
for(i=1; i<p->nIndex; i++){
if( p->aIndex[i].nPrefix<=nToken ){
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
);
}
}
}
}
if( pT ) pModule->xClose(pT);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
}
}
sqlite3_finalize(pStmt);
}
*pbOk = (cksum1==cksum2);
return rc;
}
/*
** Run the integrity-check. If no error occurs and the current contents of
** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
**
** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite
** error code.
**
** The integrity-check works as follows. For each token and indexed token
** prefix in the document set, a 64-bit checksum is calculated (by code
** in fts3ChecksumEntry()) based on the following:
**
** + The index number (0 for the main index, 1 for the first prefix
** index etc.),
** + The token (or token prefix) text itself,
** + The language-id of the row it appears in,
** + The docid of the row it appears in,
}
return rc;
}
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
*/
SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
Fts3DeferredToken *pDef;
for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
fts3PendingListDelete(pDef->pList);
pDef->pList = 0;
}
}
/*
** Free all entries in the pCsr->pDeffered list. Entries are added to
** this list using sqlite3Fts3DeferToken().
*/
SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
Fts3DeferredToken *pDef;
Fts3DeferredToken *pNext;
for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
pNext = pDef->pNext;
fts3PendingListDelete(pDef->pList);
sqlite3_free(pDef);
}
pCsr->pDeferred = 0;
}
/*
** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
** based on the row that pCsr currently points to.
**
** A deferred-doclist is like any other doclist with position information
** included, except that it only contains entries for a single row of the
** table, not for all rows.
*/
SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
int rc = SQLITE_OK; /* Return code */
if( pCsr->pDeferred ){
int i; /* Used to iterate through table columns */
sqlite3_int64 iDocid; /* Docid of the row pCsr points to */
Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
sqlite3_tokenizer *pT = p->pTokenizer;
sqlite3_tokenizer_module const *pModule = pT->pModule;
assert( pCsr->isRequireSeek==0 );
iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
if( p->abNotindexed[i]==0 ){
const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
sqlite3_tokenizer_cursor *pTC = 0;
rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken = 0; /* Number of bytes in token */
int iDum1 = 0, iDum2 = 0; /* Dummy variables */
int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
Fts3PhraseToken *pPT = pDef->pToken;
if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
&& (pPT->bFirst==0 || iPos==0)
&& (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
&& (0==memcmp(zToken, pPT->z, pPT->n))
){
fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
}
}
}
if( pTC ) pModule->xClose(pTC);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
}
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
if( pDef->pList ){
rc = fts3PendingListAppendVarint(&pDef->pList, 0);
}
}
}
return rc;
}
SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(
Fts3DeferredToken *p,
char **ppData,
int *pnData
){
char *pRet;
int nSkip;
sqlite3_int64 dummy;
*ppData = 0;
*pnData = 0;
if( p->pList==0 ){
return SQLITE_OK;
}
pRet = (char *)sqlite3_malloc(p->pList->nData);
if( !pRet ) return SQLITE_NOMEM;
nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy);
*pnData = p->pList->nData - nSkip;
*ppData = pRet;
memcpy(pRet, &p->pList->aData[nSkip], *pnData);
return SQLITE_OK;
}
pDeferred->pNext = pCsr->pDeferred;
pDeferred->iCol = iCol;
pCsr->pDeferred = pDeferred;
assert( pToken->pDeferred==0 );
pToken->pDeferred = pDeferred;
return SQLITE_OK;
}
#endif
/*
** SQLite value pRowid contains the rowid of a row that may or may not be
** present in the FTS3 table. If it is, delete it and adjust the contents
** of subsiduary data structures accordingly.
*/
static int fts3DeleteByRowid(
Fts3Table *p,
sqlite3_value *pRowid,
int *pnChng, /* IN/OUT: Decrement if row is deleted */
u32 *aSzDel
){
int rc = SQLITE_OK; /* Return code */
int bFound = 0; /* True if *pRowid really is in the table */
fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
if( bFound && rc==SQLITE_OK ){
int isEmpty = 0; /* Deleting *pRowid leaves the table empty */
rc = fts3IsEmpty(p, pRowid, &isEmpty);
if( rc==SQLITE_OK ){
if( isEmpty ){
/* Deleting this row means the whole table is empty. In this case
** delete the contents of all three tables and throw away any
** data in the pendingTerms hash table. */
rc = fts3DeleteAll(p, 1);
*pnChng = 0;
memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
}else{
*pnChng = *pnChng - 1;
if( p->zContentTbl==0 ){
fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
}
if( p->bHasDocsize ){
fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
}
}
}
}
return rc;
}
/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables. The schema of the virtual table being:
**
** CREATE TABLE <table name>(
** <user columns>,
** <table name> HIDDEN,
** docid HIDDEN,
** <langid> HIDDEN
** );
**
**
*/
SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
sqlite3_vtab *pVtab, /* FTS3 vtab object */
int nArg, /* Size of argument array */
sqlite3_value **apVal, /* Array of arguments */
sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
){
Fts3Table *p = (Fts3Table *)pVtab;
int rc = SQLITE_OK; /* Return Code */
u32 *aSzIns = 0; /* Sizes of inserted documents */
u32 *aSzDel = 0; /* Sizes of deleted documents */
int nChng = 0; /* Net change in number of documents */
int bInsertDone = 0;
/* At this point it must be known if the %_stat table exists or not.
** So bHasStat may not be 2. */
assert( p->bHasStat==0 || p->bHasStat==1 );
assert( p->pSegments==0 );
assert(
nArg==1 /* DELETE operations */
|| nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
);
/* Check for a "special" INSERT operation. One of the form:
**
** INSERT INTO xyz(xyz) VALUES('command');
*/
if( nArg>1
&& sqlite3_value_type(apVal[0])==SQLITE_NULL
&& sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL
){
rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
goto update_out;
}
if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
rc = SQLITE_CONSTRAINT;
goto update_out;
}
/* Allocate space to hold the change in document sizes */
aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
if( aSzDel==0 ){
rc = SQLITE_NOMEM;
goto update_out;
}
aSzIns = &aSzDel[p->nColumn+1];
memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
rc = fts3Writelock(p);
if( rc!=SQLITE_OK ) goto update_out;
/* If this is an INSERT operation, or an UPDATE that modifies the rowid
** value, then this operation requires constraint handling.
**
** If the on-conflict mode is REPLACE, this means that the existing row
** should be deleted from the database before inserting the new row. Or,
** if the on-conflict mode is other than REPLACE, then this method must
** detect the conflict and return SQLITE_CONSTRAINT before beginning to
** modify the database file.
*/
if( nArg>1 && p->zContentTbl==0 ){
/* Find the value object that holds the new rowid value. */
sqlite3_value *pNewRowid = apVal[3+p->nColumn];
if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
pNewRowid = apVal[1];
}
if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && (
sqlite3_value_type(apVal[0])==SQLITE_NULL
|| sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid)
)){
/* The new rowid is not NULL (in this case the rowid will be
** automatically assigned and there is no chance of a conflict), and
** the statement is either an INSERT or an UPDATE that modifies the
** rowid column. So if the conflict mode is REPLACE, then delete any
** existing row with rowid=pNewRowid.
**
** Or, if the conflict mode is not REPLACE, insert the new record into
** the %_content table. If we hit the duplicate rowid constraint (or any
** other error) while doing so, return immediately.
**
** This branch may also run if pNewRowid contains a value that cannot
** be losslessly converted to an integer. In this case, the eventual
** call to fts3InsertData() (either just below or further on in this
** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is
** invoked, it will delete zero rows (since no row will have
** docid=$pNewRowid if $pNewRowid is not an integer value).
*/
if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){
rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel);
}else{
rc = fts3InsertData(p, apVal, pRowid);
bInsertDone = 1;
}
}
}
if( rc!=SQLITE_OK ){
goto update_out;
}
/* If this is a DELETE or UPDATE operation, remove the old record. */
if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
}
/* If this is an INSERT or UPDATE operation, insert the new record. */
if( nArg>1 && rc==SQLITE_OK ){
int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
if( bInsertDone==0 ){
rc = fts3InsertData(p, apVal, pRowid);
if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
rc = FTS_CORRUPT_VTAB;
}
}
if( rc==SQLITE_OK ){
rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid);
}
if( rc==SQLITE_OK ){
assert( p->iPrevDocid==*pRowid );
rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSzIns);
}
nChng++;
}
if( p->bFts4 ){
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
}
update_out:
sqlite3_free(aSzDel);
sqlite3Fts3SegmentsClose(p);
return rc;
}
/*
** Flush any data in the pending-terms hash table to disk. If successful,
** merge all segments in the database (including the new segment, if
** there was any data to flush) into a single segment.
*/
SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){
int rc;
rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
if( rc==SQLITE_OK ){
rc = fts3DoOptimize(p, 1);
if( rc==SQLITE_OK || rc==SQLITE_DONE ){
int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
if( rc2!=SQLITE_OK ) rc = rc2;
}else{
sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
}
}
sqlite3Fts3SegmentsClose(p);
return rc;
}
#endif
/************** End of fts3_write.c ******************************************/
/************** Begin file fts3_snippet.c ************************************/
/*
** 2009 Oct 23
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/* #include "fts3Int.h" */
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
/* #include <string.h> */
/*
** Append a string to the string-buffer passed as the first argument.
**
** If nAppend is negative, then the length of the string zAppend is
** determined using strlen().
*/
static int fts3StringAppend(
StrBuffer *pStr, /* Buffer to append to */
const char *zAppend, /* Pointer to data to append to buffer */
int nAppend /* Size of zAppend in bytes (or -1) */
){
if( nAppend<0 ){
nAppend = (int)strlen(zAppend);
}
/* If there is insufficient space allocated at StrBuffer.z, use realloc()
** to grow the buffer until so that it is big enough to accomadate the
** appended data.
*/
if( pStr->n+nAppend+1>=pStr->nAlloc ){
int nAlloc = pStr->nAlloc+nAppend+100;
char *zNew = sqlite3_realloc(pStr->z, nAlloc);
if( !zNew ){
return SQLITE_NOMEM;
}
pStr->z = zNew;
pStr->nAlloc = nAlloc;
}
assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
/* Append the data to the string buffer. */
memcpy(&pStr->z[pStr->n], zAppend, nAppend);
pStr->n += nAppend;
pStr->z[pStr->n] = '\0';
return SQLITE_OK;
}
/*
** The fts3BestSnippet() function often selects snippets that end with a
** query term. That is, the final term of the snippet is always a term
** that requires highlighting. For example, if 'X' is a highlighted term
** and '.' is a non-highlighted term, BestSnippet() may select:
**
** ........X.....X
**
** This function "shifts" the beginning of the snippet forward in the
** document so that there are approximately the same number of
** non-highlighted terms to the right of the final highlighted term as there
** are to the left of the first highlighted term. For example, to this:
**
** ....X.....X....
**
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document
** actually contains terms that follow the final highlighted term.
*/
static int fts3SnippetShift(
Fts3Table *pTab, /* FTS3 table snippet comes from */
int iLangid, /* Language id to use in tokenizing */
int nSnippet, /* Number of tokens desired for snippet */
const char *zDoc, /* Document text to extract snippet from */
int nDoc, /* Size of buffer zDoc in bytes */
int *piPos, /* IN/OUT: First token of snippet */
u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
){
u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
if( hlmask ){
int nLeft; /* Tokens to the left of first highlight */
int nRight; /* Tokens to the right of last highlight */
int nDesired; /* Ideal number of tokens to shift forward */
for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
nDesired = (nLeft-nRight)/2;
/* Ideally, the start of the snippet should be pushed forward in the
** document nDesired tokens. This block checks if there are actually
** nDesired tokens to the right of the snippet. If so, *piPos and
** *pHlMask are updated to shift the snippet nDesired tokens to the
** right. Otherwise, the snippet is shifted by the number of tokens
** available.
*/
if( nDesired>0 ){
int nShift; /* Number of tokens to shift snippet by */
int iCurrent = 0; /* Token counter */
int rc; /* Return Code */
sqlite3_tokenizer_module *pMod;
sqlite3_tokenizer_cursor *pC;
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
** or more tokens in zDoc/nDoc.
*/
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
return rc;
}
while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
}
pMod->xClose(pC);
if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
assert( nShift<=nDesired );
if( nShift>0 ){
*piPos += nShift;
*pHlmask = hlmask >> nShift;
}
}
}
return SQLITE_OK;
}
/*
** Extract the snippet text for fragment pFragment from cursor pCsr and
** append it to string buffer pOut.
*/
static int fts3SnippetText(
Fts3Cursor *pCsr, /* FTS3 Cursor */
SnippetFragment *pFragment, /* Snippet to extract */
int iFragment, /* Fragment number */
int isLast, /* True for final fragment in snippet */
int nSnippet, /* Number of tokens in extracted snippet */
const char *zOpen, /* String inserted before highlighted term */
const char *zClose, /* String inserted after highlighted term */
const char *zEllipsis, /* String inserted between snippets */
StrBuffer *pOut /* Write output here */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc; /* Return code */
const char *zDoc; /* Document text to extract snippet from */
int nDoc; /* Size of zDoc in bytes */
int iCurrent = 0; /* Current token number of document */
int iEnd = 0; /* Byte offset of end of current token */
int isShiftDone = 0; /* True after snippet is shifted */
int iPos = pFragment->iPos; /* First token of snippet */
u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
int iCol = pFragment->iCol+1; /* Query column to extract text from */
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
if( zDoc==0 ){
if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
return SQLITE_OK;
}
nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
if( rc!=SQLITE_OK ){
return rc;
}
while( rc==SQLITE_OK ){
const char *ZDUMMY; /* Dummy argument used with tokenizer */
int DUMMY1 = -1; /* Dummy argument used with tokenizer */
int iBegin = 0; /* Offset in zDoc of start of token */
int iFin = 0; /* Offset in zDoc of end of token */
int isHighlight = 0; /* True for highlighted terms */
/* Variable DUMMY1 is initialized to a negative value above. Elsewhere
** in the FTS code the variable that the third argument to xNext points to
** is initialized to zero before the first (*but not necessarily
** subsequent*) call to xNext(). This is done for a particular application
** that needs to know whether or not the tokenizer is being used for
** snippet generation or for some other purpose.
**
** Extreme care is required when writing code to depend on this
** initialization. It is not a documented part of the tokenizer interface.
** If a tokenizer is used directly by any code outside of FTS, this
** convention might not be respected. */
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
/* Special case - the last token of the snippet is also the last token
** of the column. Append any punctuation that occurred between the end
** of the previous token and the end of the document to the output.
** Then break out of the loop. */
rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
}
break;
}
if( iCurrent<iPos ){ continue; }
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(
pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
);
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
** required. They are required if (a) this is not the first fragment,
** or (b) this fragment does not begin at position 0 of its column.
*/
if( rc==SQLITE_OK ){
if( iPos>0 || iFragment>0 ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}else if( iBegin ){
rc = fts3StringAppend(pOut, zDoc, iBegin);
}
}
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
}
if( iCurrent>=(iPos+nSnippet) ){
if( isLast ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}
break;
}
/* Set isHighlight to true if this term should be highlighted. */
isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
iEnd = iFin;
}
pMod->xClose(pC);
return rc;
}
/*
** This function is used to count the entries in a column-list (a
** delta-encoded list of term offsets within a single column of a single
** row). When this function is called, *ppCollist should point to the
** beginning of the first varint in the column-list (the varint that
** contains the position of the first matching term in the column data).
** Before returning, *ppCollist is set to point to the first byte after
** the last varint in the column-list (either the 0x00 signifying the end
** of the position-list, or the 0x01 that precedes the column number of
** the next column in the position-list).
**
** The number of elements in the column-list is returned.
*/
static int fts3ColumnlistCount(char **ppCollist){
char *pEnd = *ppCollist;
char c = 0;
int nEntry = 0;
/* A column-list is terminated by either a 0x01 or 0x00. */
while( 0xFE & (*pEnd | c) ){
if( !pCsr->pExpr ){
sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
return;
}
memset(&sCtx, 0, sizeof(sCtx));
assert( pCsr->isRequireSeek==0 );
/* Count the number of terms in the query */
rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
if( rc!=SQLITE_OK ) goto offsets_out;
/* Allocate the array of TermOffset iterators. */
sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
if( 0==sCtx.aTerm ){
rc = SQLITE_NOMEM;
goto offsets_out;
}
sCtx.iDocid = pCsr->iPrevId;
sCtx.pCsr = pCsr;
/* Loop through the table columns, appending offset information to
** string-buffer res for each column.
*/
for(iCol=0; iCol<pTab->nColumn; iCol++){
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
const char *ZDUMMY; /* Dummy argument used with xNext() */
int NDUMMY = 0; /* Dummy argument used with xNext() */
int iStart = 0;
int iEnd = 0;
int iCurrent = 0;
const char *zDoc;
int nDoc;
/* Initialize the contents of sCtx.aTerm[] for column iCol. There is
** no way that this operation can fail, so the return code from
** fts3ExprIterate() can be discarded.
*/
sCtx.iCol = iCol;
sCtx.iTerm = 0;
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
/* Retreive the text stored in column iCol. If an SQL NULL is stored
** in column iCol, jump immediately to the next iteration of the loop.
** If an OOM occurs while retrieving the data (this can happen if SQLite
** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
** to the caller.
*/
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
if( zDoc==0 ){
if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
continue;
}
rc = SQLITE_NOMEM;
goto offsets_out;
}
/* Initialize a tokenizer iterator to iterate through column iCol. */
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
zDoc, nDoc, &pC
);
if( rc!=SQLITE_OK ) goto offsets_out;
rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
while( rc==SQLITE_OK ){
int i; /* Used to loop through terms */
int iMinPos = 0x7FFFFFFF; /* Position of next token */
TermOffset *pTerm = 0; /* TermOffset associated with next token */
for(i=0; i<nToken; i++){
TermOffset *pT = &sCtx.aTerm[i];
if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
iMinPos = pT->iPos-pT->iOff;
pTerm = pT;
}
}
if( !pTerm ){
/* All offsets for this column have been gathered. */
rc = SQLITE_DONE;
}else{
assert( iCurrent<=iMinPos );
if( 0==(0xFE&*pTerm->pList) ){
pTerm->pList = 0;
}else{
fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
}
while( rc==SQLITE_OK && iCurrent<iMinPos ){
rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
}
if( rc==SQLITE_OK ){
char aBuffer[64];
sqlite3_snprintf(sizeof(aBuffer), aBuffer,
"%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
);
rc = fts3StringAppend(&res, aBuffer, -1);
}else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
rc = FTS_CORRUPT_VTAB;
}
}
}
if( rc==SQLITE_DONE ){
rc = SQLITE_OK;
}
pMod->xClose(pC);
if( rc!=SQLITE_OK ) goto offsets_out;
}
offsets_out:
sqlite3_free(sCtx.aTerm);
assert( rc!=SQLITE_DONE );
sqlite3Fts3SegmentsClose(pTab);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
sqlite3_free(res.z);
}else{
sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
}
**
** * No foreign key violations are detected or reported.
**
** * CHECK constraints are not enforced.
**
** * No constraint handling mode except for "OR ROLLBACK" is supported.
**
**
** PREPARATION
**
** An "RBU update" is stored as a separate SQLite database. A database
** containing an RBU update is an "RBU database". For each table in the
** target database to be updated, the RBU database should contain a table
** named "data_<target name>" containing the same set of columns as the
** target table, and one more - "rbu_control". The data_% table should
** have no PRIMARY KEY or UNIQUE constraints, but each column should have
** the same type as the corresponding column in the target database.
** The "rbu_control" column should have no type at all. For example, if
** the target database contains:
**
** CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c UNIQUE);
**
** Then the RBU database should contain:
**
** CREATE TABLE data_t1(a INTEGER, b TEXT, c, rbu_control);
**
** The order of the columns in the data_% table does not matter.
**
** Instead of a regular table, the RBU database may also contain virtual
** tables or view named using the data_<target> naming scheme.
**
** Instead of the plain data_<target> naming scheme, RBU database tables
** may also be named data<integer>_<target>, where <integer> is any sequence
** of zero or more numeric characters (0-9). This can be significant because
** tables within the RBU database are always processed in order sorted by
** name. By judicious selection of the <integer> portion of the names
** of the RBU tables the user can therefore control the order in which they
** are processed. This can be useful, for example, to ensure that "external
** content" FTS4 tables are updated before their underlying content tables.
**
** If the target database table is a virtual table or a table that has no
** PRIMARY KEY declaration, the data_% table must also contain a column
** named "rbu_rowid". This column is mapped to the tables implicit primary
** key column - "rowid". Virtual tables for which the "rowid" column does
** not function like a primary key value cannot be updated using RBU. For
** example, if the target db contains either of the following:
**
** CREATE VIRTUAL TABLE x1 USING fts3(a, b);
** CREATE TABLE x1(a, b)
**
** then the RBU database should contain:
**
** CREATE TABLE data_x1(a, b, rbu_rowid, rbu_control);
**
** All non-hidden columns (i.e. all columns matched by "SELECT *") of the
** target table must be present in the input table. For virtual tables,
** hidden columns are optional - they are updated by RBU if present in
** the input table, or not otherwise. For example, to write to an fts4
** table with a hidden languageid column such as:
**
** CREATE VIRTUAL TABLE ft1 USING fts4(a, b, languageid='langid');
**
** Either of the following input table schemas may be used:
**
** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control);
** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control);
**
** For each row to INSERT into the target database as part of the RBU
** update, the corresponding data_% table should contain a single record
** with the "rbu_control" column set to contain integer value 0. The
** other columns should be set to the values that make up the new record
** to insert.
**
** If the target database table has an INTEGER PRIMARY KEY, it is not
** possible to insert a NULL value into the IPK column. Attempting to
** do so results in an SQLITE_MISMATCH error.
**
** For each row to DELETE from the target database as part of the RBU
** update, the corresponding data_% table should contain a single record
** with the "rbu_control" column set to contain integer value 1. The
** real primary key values of the row to delete should be stored in the
** corresponding columns of the data_% table. The values stored in the
** other columns are not used.
**
** For each row to UPDATE from the target database as part of the RBU
** update, the corresponding data_% table should contain a single record
** with the "rbu_control" column set to contain a value of type text.
** The real primary key values identifying the row to update should be
** stored in the corresponding columns of the data_% table row, as should
** the new values of all columns being update. The text value in the
** "rbu_control" column must contain the same number of characters as
** there are columns in the target database table, and must consist entirely
** of 'x' and '.' characters (or in some special cases 'd' - see below). For
** each column that is being updated, the corresponding character is set to
** 'x'. For those that remain as they are, the corresponding character of the
** rbu_control value should be set to '.'. For example, given the tables
** above, the update statement:
**
** UPDATE t1 SET c = 'usa' WHERE a = 4;
**
** is represented by the data_t1 row created by:
**
** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..x');
**
** Instead of an 'x' character, characters of the rbu_control value specified
** for UPDATEs may also be set to 'd'. In this case, instead of updating the
** target table with the value stored in the corresponding data_% column, the
** user-defined SQL function "rbu_delta()" is invoked and the result stored in
** the target table column. rbu_delta() is invoked with two arguments - the
** original value currently stored in the target table column and the
** value specified in the data_xxx table.
**
** For example, this row:
**
** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d');
**
** is similar to an UPDATE statement such as:
**
** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4;
**
** Finally, if an 'f' character appears in place of a 'd' or 's' in an
** ota_control string, the contents of the data_xxx table column is assumed
** to be a "fossil delta" - a patch to be applied to a blob value in the
** format used by the fossil source-code management system. In this case
** the existing value within the target database table must be of type BLOB.
**
** + the first term on each page is stored in the same way as the
** very first term of the segment:
**
** varint : size of first term
** blob: first term data
**
** 5. Segment doclist indexes:
**
** Doclist indexes are themselves b-trees, however they usually consist of
** a single leaf record only. The format of each doclist index leaf page
** is:
**
** * Flags byte. Bits are:
** 0x01: Clear if leaf is also the root page, otherwise set.
**
** * Page number of fts index leaf page. As a varint.
**
** * First rowid on page indicated by previous field. As a varint.
**
** * A list of varints, one for each subsequent termless page. A
** positive delta if the termless page contains at least one rowid,
** or an 0x00 byte otherwise.
**
** Internal doclist index nodes are:
**
** * Flags byte. Bits are:
** 0x01: Clear for root page, otherwise set.
**
** * Page number of first child page. As a varint.
**
** * Copy of first rowid on page indicated by previous field. As a varint.
**
** * A list of delta-encoded varints - the first rowid on each subsequent
** child page.
**
*/
/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
/*
** Macros determining the rowids used by segment leaves and dlidx leaves
** and nodes. All nodes and leaves are stored in the %_data table with large
** positive rowids.
**
** Each segment has a unique non-zero 16-bit id.
**
** The rowid for each segment leaf is found by passing the segment id and
** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
** sequentially starting from 1.
*/
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
#define fts5_dri(segid, dlidx, height, pgno) ( \
((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
((i64)(height) << (FTS5_DATA_PAGE_B)) + \
((i64)(pgno)) \
)
#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
/*
** Maximum segments permitted in a single index
*/
#define FTS5_MAX_SEGMENT 2000
#ifdef SQLITE_DEBUG
static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif
/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8
#define FTS5_DATA_PADDING 20
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5Iter Fts5Iter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;
struct Fts5Data {
u8 *p; /* Pointer to buffer containing record */
int nn; /* Size of record in bytes */
int szLeaf; /* Size of leaf without page-index */
};
/*
** One object per %_data table.
*/
struct Fts5Index {
Fts5Config *pConfig; /* Virtual table configuration */
char *zDataTbl; /* Name of %_data table */
int nWorkUnit; /* Leaf pages in a "unit" of work */
/*
** Variables related to the accumulation of tokens and doclists within the
** in-memory hash tables before they are flushed to disk.
*/
Fts5Hash *pHash; /* Hash table for in-memory data */
int nPendingData; /* Current bytes of pending data */
i64 iWriteRowid; /* Rowid for current doc being written */
int bDelete; /* Current write is a delete */
/* Error state. */
int rc; /* Current error code */
/* State used by the fts5DataXXX() functions. */
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
sqlite3_stmt *pIdxSelect;
int nRead; /* Total number of blocks read */
sqlite3_stmt *pDataVersion;
i64 iStructVersion; /* data_version when pStruct read */
Fts5Structure *pStruct; /* Current db structure (or NULL) */
};
struct Fts5DoclistIter {
u8 *aEof; /* Pointer to 1 byte past end of doclist */
/* Output variables. aPoslist==0 at EOF */
i64 iRowid;
u8 *aPoslist;
int nPoslist;
int nSize;
};
/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
int iSegid; /* Segment id */
int pgnoFirst; /* First leaf page number in segment */
int pgnoLast; /* Last leaf page number in segment */
};
struct Fts5StructureLevel {
int nMerge; /* Number of segments in incr-merge */
int nSeg; /* Total number of segments on level */
Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
};
struct Fts5Structure {
int nRef; /* Object reference count */
u64 nWriteCounter; /* Total leaves written to level 0 */
int nSegment; /* Total segments in this structure */
int nLevel; /* Number of levels in this index */
Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
};
/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
int pgno; /* Page number for this page */
int iPrevPgidx; /* Previous value written into pgidx */
Fts5Buffer buf; /* Buffer containing leaf data */
Fts5Buffer pgidx; /* Buffer containing page-index */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
int pgno; /* Page number for this page */
int bPrevValid; /* True if iPrev is valid */
i64 iPrev; /* Previous rowid value written to page */
Fts5Buffer buf; /* Buffer containing page data */
};
struct Fts5SegWriter {
int iSegid; /* Segid to write to */
Fts5PageWriter writer; /* PageWriter object */
i64 iPrevRowid; /* Previous rowid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
/* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
int nDlidx; /* Allocated size of aDlidx[] array */
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
/* Values to insert into the %_idx table */
Fts5Buffer btterm; /* Next term to insert into %_idx table */
int iBtPage; /* Page number corresponding to btterm */
};
typedef struct Fts5CResult Fts5CResult;
struct Fts5CResult {
u16 iFirst; /* aSeg[] index of firstest iterator */
u8 bTermEq; /* True if the terms are equal */
};
/*
** Object for iterating through a single segment, visiting each term/rowid
** pair in the segment.
**
** pSeg:
** The segment to iterate through.
**
** iLeafPgno:
** Current leaf page number within segment.
**
** iLeafOffset:
** Byte offset within the current leaf that is the first byte of the
** position list data (one byte passed the position-list size field).
** rowid field of the current entry. Usually this is the size field of the
** position list data. The exception is if the rowid for the current entry
** is the last thing on the leaf page.
**
** pLeaf:
** Buffer containing current leaf page data. Set to NULL at EOF.
**
** iTermLeafPgno, iTermLeafOffset:
** Leaf page number containing the last term read from the segment. And
** the offset immediately following the term data.
**
** flags:
** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
**
** FTS5_SEGITER_ONETERM:
** If set, set the iterator to point to EOF after the current doclist
** has been exhausted. Do not proceed to the next term in the segment.
**
** FTS5_SEGITER_REVERSE:
** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
** it is set, iterate through rowid in descending order instead of the
** default ascending order.
**
** iRowidOffset/nRowidOffset/aRowidOffset:
** These are used if the FTS5_SEGITER_REVERSE flag is set.
**
** For each rowid on the page corresponding to the current term, the
** corresponding aRowidOffset[] entry is set to the byte offset of the
** start of the "position-list-size" field within the page.
**
** iTermIdx:
** Index of current term on iTermLeafPgno.
*/
struct Fts5SegIter {
Fts5StructureSegment *pSeg; /* Segment to iterate through */
int flags; /* Mask of configuration flags */
int iLeafPgno; /* Current leaf page number */
Fts5Data *pLeaf; /* Current leaf data */
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
int iLeafOffset; /* Byte offset within current leaf */
/* Next method */
void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
/* The page and offset from which the current term was read. The offset
** is the offset of the first rowid in the current doclist. */
int iTermLeafPgno;
int iTermLeafOffset;
int iPgidxOff; /* Next offset in pgidx */
int iEndofDoclist;
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
int iRowidOffset; /* Current entry in aRowidOffset[] */
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
int *aRowidOffset; /* Array of offset to rowid fields */
Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
/* Variables populated based on current entry. */
Fts5Buffer term; /* Current term */
i64 iRowid; /* Current rowid */
int nPos; /* Number of bytes in current position list */
u8 bDel; /* True if the delete flag is set */
};
/*
** Argument is a pointer to an Fts5Data structure that contains a
** leaf page.
*/
#define ASSERT_SZLEAF_OK(x) assert( \
(x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
)
#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02
/*
** Argument is a pointer to an Fts5Data structure that contains a leaf
** page. This macro evaluates to true if the leaf contains no terms, or
** false if it contains at least one term.
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/rowid pair in the merged data.
**
** nSeg is always a power of two greater than or equal to the number of
** segments that this object is merging data from. Both the aSeg[] and
** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
** with zeroed objects - these are handled as if they were iterators opened
** on empty segments.
**
** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
** comparison in this context is the index of the iterator that currently
** points to the smaller term/rowid combination. Iterators at EOF are
** considered to be greater than all other iterators.
**
** aFirst[1] contains the index in aSeg[] of the iterator that points to
** the smallest key overall. aFirst[0] is unused.
**
** poslist:
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
** There is no way to tell if this is populated or not.
*/
struct Fts5Iter {
Fts5IndexIter base; /* Base class containing output vars */
Fts5Index *pIndex; /* Index that owns this iterator */
Fts5Structure *pStruct; /* Database structure for this iterator */
Fts5Buffer poslist; /* Buffer containing current poslist */
Fts5Colset *pColset; /* Restrict matches to these columns */
/* Invoked to set output variables. */
void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
int nSeg; /* Size of aSeg[] array */
int bRev; /* True to iterate in reverse order */
u8 bSkipEmpty; /* True to skip deleted entries */
i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
Fts5CResult *aFirst; /* Current merge state (see above) */
Fts5SegIter aSeg[1]; /* Array of segment iterators */
};
/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
** Record containing the doclist-index data.
**
** bEof:
** Set to true once iterator has reached EOF.
**
** iOff:
** Set to the current offset within record pData.
*/
struct Fts5DlidxLvl {
Fts5Data *pData; /* Data for current page of this level */
int iOff; /* Current offset into pData */
int bEof; /* At EOF already */
int iFirstOff; /* Used by reverse iterators */
/* Output variables */
int iLeafPgno; /* Page number of current leaf page */
i64 iRowid; /* First rowid on leaf iLeafPgno */
};
struct Fts5DlidxIter {
int nLvl;
int iSegid;
Fts5DlidxLvl aLvl[1];
};
static void fts5PutU16(u8 *aOut, u16 iVal){
aOut[0] = (iVal>>8);
aOut[1] = (iVal&0xFF);
}
static u16 fts5GetU16(const u8 *aIn){
return ((u16)aIn[0] << 8) + aIn[1];
}
/*
** Allocate and return a buffer at least nByte bytes in size.
**
** If an OOM error is encountered, return NULL and set the error code in
** the Fts5Index handle passed as the first argument.
*/
static void *fts5IdxMalloc(Fts5Index *p, int nByte){
return sqlite3Fts5MallocZero(&p->rc, nByte);
}
/*
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
** res = *pLeft - *pRight
*/
#ifdef SQLITE_DEBUG
static int fts5BufferCompareBlob(
Fts5Buffer *pLeft, /* Left hand side of comparison */
const u8 *pRight, int nRight /* Right hand side of comparison */
){
int nCmp = MIN(pLeft->n, nRight);
int res = memcmp(pLeft->p, pRight, nCmp);
return (res==0 ? (pLeft->n - nRight) : res);
}
#endif
/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
** res = *pLeft - *pRight
*/
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
int nCmp = MIN(pLeft->n, pRight->n);
int res = memcmp(pLeft->p, pRight->p, nCmp);
return (res==0 ? (pLeft->n - pRight->n) : res);
}
static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
int ret;
fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
return ret;
}
/*
** INSERT OR REPLACE a record into the %_data table.
*/
static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
if( p->rc!=SQLITE_OK ) return;
if( p->pWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
"REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
pConfig->zDb, pConfig->zName
));
if( p->rc ) return;
}
sqlite3_bind_int64(p->pWriter, 1, iRowid);
sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
sqlite3_step(p->pWriter);
p->rc = sqlite3_reset(p->pWriter);
sqlite3_bind_null(p->pWriter, 2);
}
/*
** Execute the following SQL:
**
** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
*/
static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
if( p->rc!=SQLITE_OK ) return;
if( p->pDeleter==0 ){
int rc;
Fts5Config *pConfig = p->pConfig;
char *zSql = sqlite3_mprintf(
"DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
pConfig->zDb, pConfig->zName
);
if( zSql==0 ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
SQLITE_PREPARE_PERSISTENT, &p->pDeleter, 0);
sqlite3_free(zSql);
}
if( rc!=SQLITE_OK ){
p->rc = rc;
return;
}
}
sqlite3_bind_int64(p->pDeleter, 1, iFirst);
sqlite3_bind_int64(p->pDeleter, 2, iLast);
sqlite3_step(p->pDeleter);
p->rc = sqlite3_reset(p->pDeleter);
}
/*
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
fts5DataDelete(p, iFirst, iLast);
if( p->pIdxDeleter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
"DELETE FROM '%q'.'%q_idx' WHERE segid=?",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
sqlite3_step(p->pIdxDeleter);
p->rc = sqlite3_reset(p->pIdxDeleter);
}
}
/*
** Release a reference to an Fts5Structure object returned by an earlier
** call to fts5StructureRead() or fts5StructureDecode().
*/
static void fts5StructureRelease(Fts5Structure *pStruct){
if( pStruct && 0>=(--pStruct->nRef) ){
int i;
assert( pStruct->nRef==0 );
for(i=0; i<pStruct->nLevel; i++){
sqlite3_free(pStruct->aLevel[i].aSeg);
}
sqlite3_free(pStruct);
}
}
static void fts5StructureRef(Fts5Structure *pStruct){
pStruct->nRef++;
}
/*
** Deserialize and return the structure record currently stored in serialized
** form within buffer pData/nData.
**
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
** are over-allocated by one slot. This allows the structure contents
** to be more easily edited.
**
** If an error occurs, *ppOut is set to NULL and an SQLite error code
** returned. Otherwise, *ppOut is set to point to the new object and
** SQLITE_OK returned.
*/
static int fts5StructureDecode(
const u8 *pData, /* Buffer containing serialized structure */
int nData, /* Size of buffer pData in bytes */
int *piCookie, /* Configuration cookie value */
Fts5Structure **ppOut /* OUT: Deserialized object */
){
int rc = SQLITE_OK;
int i = 0;
int iLvl;
int nLevel = 0;
int nSegment = 0;
int nByte; /* Bytes of space to allocate at pRet */
Fts5Structure *pRet = 0; /* Structure object to return */
/* Grab the cookie value */
if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
i = 4;
/* Read the total number of levels and segments from the start of the
** structure record. */
i += fts5GetVarint32(&pData[i], nLevel);
i += fts5GetVarint32(&pData[i], nSegment);
nByte = (
sizeof(Fts5Structure) + /* Main structure */
sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
);
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
if( pRet ){
pRet->nRef = 1;
pRet->nLevel = nLevel;
pRet->nSegment = nSegment;
i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
int nTotal = 0;
int iSeg;
if( i>=nData ){
rc = FTS5_CORRUPT;
}else{
i += fts5GetVarint32(&pData[i], pLvl->nMerge);
i += fts5GetVarint32(&pData[i], nTotal);
assert( nTotal>=pLvl->nMerge );
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
nTotal * sizeof(Fts5StructureSegment)
);
}
if( rc==SQLITE_OK ){
pLvl->nSeg = nTotal;
for(iSeg=0; iSeg<nTotal; iSeg++){
if( i>=nData ){
rc = FTS5_CORRUPT;
break;
}
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
}
}
}
if( rc!=SQLITE_OK ){
fts5StructureRelease(pRet);
pRet = 0;
}
}
*ppOut = pRet;
return rc;
}
/*
**
*/
static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
if( *pRc==SQLITE_OK ){
Fts5Structure *pStruct = *ppStruct;
int nLevel = pStruct->nLevel;
int nByte = (
sizeof(Fts5Structure) + /* Main structure */
sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
);
pStruct = sqlite3_realloc(pStruct, nByte);
if( pStruct ){
memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
pStruct->nLevel++;
*ppStruct = pStruct;
}else{
*pRc = SQLITE_NOMEM;
}
}
}
/*
** Extend level iLvl so that there is room for at least nExtra more
** segments.
*/
static void fts5StructureExtendLevel(
int *pRc,
Fts5Structure *pStruct,
int iLvl,
int nExtra,
int bInsert
){
if( *pRc==SQLITE_OK ){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
Fts5StructureSegment *aNew;
int nByte;
nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
aNew = sqlite3_realloc(pLvl->aSeg, nByte);
if( aNew ){
if( bInsert==0 ){
memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
}else{
pRet = 0;
}
}
return pRet;
}
static i64 fts5IndexDataVersion(Fts5Index *p){
i64 iVersion = 0;
if( p->rc==SQLITE_OK ){
if( p->pDataVersion==0 ){
p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
);
if( p->rc ) return 0;
}
if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
iVersion = sqlite3_column_int64(p->pDataVersion, 0);
}
p->rc = sqlite3_reset(p->pDataVersion);
}
return iVersion;
}
/*
** Read, deserialize and return the structure record.
**
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
** are over-allocated as described for function fts5StructureDecode()
** above.
**
** If an error occurs, NULL is returned and an error code left in the
** Fts5Index handle. If an error has already occurred when this function
** is called, it is a no-op.
*/
static Fts5Structure *fts5StructureRead(Fts5Index *p){
if( p->pStruct==0 ){
p->iStructVersion = fts5IndexDataVersion(p);
if( p->rc==SQLITE_OK ){
p->pStruct = fts5StructureReadUncached(p);
}
}
#if 0
else{
Fts5Structure *pTest = fts5StructureReadUncached(p);
if( pTest ){
int i, j;
assert_nc( p->pStruct->nSegment==pTest->nSegment );
assert_nc( p->pStruct->nLevel==pTest->nLevel );
for(i=0; i<pTest->nLevel; i++){
assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
for(j=0; j<pTest->aLevel[i].nSeg; j++){
Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
assert_nc( p1->iSegid==p2->iSegid );
assert_nc( p1->pgnoFirst==p2->pgnoFirst );
assert_nc( p1->pgnoLast==p2->pgnoLast );
}
}
fts5StructureRelease(pTest);
}
}
#endif
if( p->rc!=SQLITE_OK ) return 0;
assert( p->iStructVersion!=0 );
assert( p->pStruct!=0 );
fts5StructureRef(p->pStruct);
return p->pStruct;
}
static void fts5StructureInvalidate(Fts5Index *p){
if( p->pStruct ){
fts5StructureRelease(p->pStruct);
p->pStruct = 0;
}
}
/*
** Return the total number of segments in index structure pStruct. This
** function is only ever used as part of assert() conditions.
*/
#ifdef SQLITE_DEBUG
static int fts5StructureCountSegments(Fts5Structure *pStruct){
int nSegment = 0; /* Total number of segments */
if( pStruct ){
int iLvl; /* Used to iterate through levels */
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
nSegment += pStruct->aLevel[iLvl].nSeg;
}
}
return nSegment;
}
#endif
#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
(pBuf)->n += nBlob; \
}
#define fts5BufferSafeAppendVarint(pBuf, iVal) { \
(pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
assert( (pBuf)->nSpace>=(pBuf)->n ); \
}
/*
** Serialize and store the "structure" record.
**
** If an error occurs, leave an error code in the Fts5Index object. If an
** error has already occurred, this function is a no-op.
*/
static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
if( p->rc==SQLITE_OK ){
Fts5Buffer buf; /* Buffer to serialize record into */
int iLvl; /* Used to iterate through levels */
int iCookie; /* Cookie value to store */
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
memset(&buf, 0, sizeof(Fts5Buffer));
/* Append the current configuration cookie */
iCookie = p->pConfig->iCookie;
if( iCookie<0 ) iCookie = 0;
if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
sqlite3Fts5Put32(buf.p, iCookie);
buf.n = 4;
fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
}
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
int iSeg; /* Used to iterate through segments */
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
assert( pLvl->nMerge<=pLvl->nSeg );
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
}
}
fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
fts5BufferFree(&buf);
}
}
#if 0
static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
int rc = SQLITE_OK;
Fts5Buffer buf;
memset(&buf, 0, sizeof(buf));
fts5DebugStructure(&rc, &buf, pStruct);
fprintf(stdout, "%s: %s\n", zCaption, buf.p);
fflush(stdout);
fts5BufferFree(&buf);
}
#else
# define fts5PrintStructure(x,y)
#endif
static int fts5SegmentSize(Fts5StructureSegment *pSeg){
return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
}
/*
** Return a copy of index structure pStruct. Except, promote as many
** segments as possible to level iPromote. If an OOM occurs, NULL is
** returned.
*/
static void fts5StructurePromoteTo(
Fts5Index *p,
int iPromote,
int szPromote,
Fts5Structure *pStruct
){
int il, is;
Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
if( pOut->nMerge==0 ){
for(il=iPromote+1; il<pStruct->nLevel; il++){
Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
if( pLvl->nMerge ) return;
for(is=pLvl->nSeg-1; is>=0; is--){
int sz = fts5SegmentSize(&pLvl->aSeg[is]);
if( sz>szPromote ) return;
fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
if( p->rc ) return;
memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
pOut->nSeg++;
pLvl->nSeg--;
}
}
}
}
}
/* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
** is a no-op if it is not. */
if( iPromote<0 ){
iPromote = iLvl;
szPromote = szSeg;
}
fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
}
}
/*
** Advance the iterator passed as the only argument. If the end of the
** doclist-index page is reached, return non-zero.
*/
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
Fts5Data *pData = pLvl->pData;
if( pLvl->iOff==0 ){
assert( pLvl->bEof==0 );
pLvl->iOff = 1;
pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
pLvl->iFirstOff = pLvl->iOff;
}else{
int iOff;
for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
if( pData->p[iOff] ) break;
}
if( iOff<pData->nn ){
i64 iVal;
pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
pLvl->iRowid += iVal;
pLvl->iOff = iOff;
}else{
pLvl->bEof = 1;
}
}
return pLvl->bEof;
}
/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
assert( iLvl<pIter->nLvl );
if( fts5DlidxLvlNext(pLvl) ){
if( (iLvl+1) < pIter->nLvl ){
fts5DlidxIterNextR(p, pIter, iLvl+1);
if( pLvl[1].bEof==0 ){
fts5DataRelease(pLvl->pData);
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
);
if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
}
}
}
return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
return fts5DlidxIterNextR(p, pIter, 0);
}
/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
** pData:
** pointer to doclist-index record,
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
int i;
for(i=0; i<pIter->nLvl; i++){
fts5DlidxLvlNext(&pIter->aLvl[i]);
}
return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
}
static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
int i;
/* Advance each level to the last entry on the last page */
for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
while( fts5DlidxLvlNext(pLvl)==0 );
pLvl->bEof = 0;
if( i>0 ){
Fts5DlidxLvl *pChild = &pLvl[-1];
fts5DataRelease(pChild->pData);
memset(pChild, 0, sizeof(Fts5DlidxLvl));
pChild->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
);
}
}
}
/*
** Move the iterator passed as the only argument to the previous entry.
*/
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
int iOff = pLvl->iOff;
assert( pLvl->bEof==0 );
if( iOff<=pLvl->iFirstOff ){
pLvl->bEof = 1;
}else{
u8 *a = pLvl->pData->p;
i64 iVal;
int iLimit;
int ii;
int nZero = 0;
/* Currently iOff points to the first byte of a varint. This block
** decrements iOff until it points to the first byte of the previous
** varint. Taking care not to read any memory locations that occur
** before the buffer in memory. */
iLimit = (iOff>9 ? iOff-9 : 0);
for(iOff--; iOff>iLimit; iOff--){
if( (a[iOff-1] & 0x80)==0 ) break;
}
fts5GetVarint(&a[iOff], (u64*)&iVal);
pLvl->iRowid -= iVal;
pLvl->iLeafPgno--;
/* Skip backwards past any 0x00 varints. */
for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
nZero++;
}
if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
/* The byte immediately before the last 0x00 byte has the 0x80 bit
** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
** bytes before a[ii]. */
int bZero = 0; /* True if last 0x00 counts */
if( (ii-8)>=pLvl->iFirstOff ){
int j;
for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
bZero = (j>8);
}
if( bZero==0 ) nZero--;
}
pLvl->iLeafPgno -= nZero;
pLvl->iOff = iOff - nZero;
}
return pLvl->bEof;
}
static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
assert( iLvl<pIter->nLvl );
if( fts5DlidxLvlPrev(pLvl) ){
if( (iLvl+1) < pIter->nLvl ){
fts5DlidxIterPrevR(p, pIter, iLvl+1);
if( pLvl[1].bEof==0 ){
fts5DataRelease(pLvl->pData);
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p,
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
);
if( pLvl->pData ){
while( fts5DlidxLvlNext(pLvl)==0 );
pLvl->bEof = 0;
}
}
}
}
return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
return fts5DlidxIterPrevR(p, pIter, 0);
}
/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
if( pIter ){
int i;
for(i=0; i<pIter->nLvl; i++){
fts5DataRelease(pIter->aLvl[i].pData);
}
sqlite3_free(pIter);
}
}
static Fts5DlidxIter *fts5DlidxIterInit(
Fts5Index *p, /* Fts5 Backend to iterate within */
int bRev, /* True for ORDER BY ASC */
int iSegid, /* Segment id */
int iLeafPg /* Leaf page number to load dlidx for */
){
Fts5DlidxIter *pIter = 0;
int i;
int bDone = 0;
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
Fts5DlidxIter *pNew;
pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
if( pNew==0 ){
p->rc = SQLITE_NOMEM;
}else{
i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
pIter = pNew;
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
pLvl->pData = fts5DataRead(p, iRowid);
if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
bDone = 1;
}
pIter->nLvl = i+1;
}
}
if( p->rc==SQLITE_OK ){
pIter->iSegid = iSegid;
if( bRev==0 ){
fts5DlidxIterFirst(pIter);
}else{
fts5DlidxIterLast(p, pIter);
}
}
if( p->rc!=SQLITE_OK ){
fts5DlidxIterFree(pIter);
pIter = 0;
}
return pIter;
}
static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
return pIter->aLvl[0].iRowid;
}
static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
return pIter->aLvl[0].iLeafPgno;
}
/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter /* Iterator to advance to next page */
){
Fts5Data *pLeaf;
Fts5StructureSegment *pSeg = pIter->pSeg;
fts5DataRelease(pIter->pLeaf);
pIter->iLeafPgno++;
if( pIter->pNextLeaf ){
pIter->pLeaf = pIter->pNextLeaf;
pIter->pNextLeaf = 0;
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
pIter->pLeaf = fts5LeafRead(p,
FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
);
}else{
pIter->pLeaf = 0;
}
pLeaf = pIter->pLeaf;
if( pLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf;
if( fts5LeafIsTermless(pLeaf) ){
pIter->iEndofDoclist = pLeaf->nn+1;
}else{
pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
pIter->iEndofDoclist
);
}
}
}
/*
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
int nSz;
int n = 0;
fts5FastGetVarint32(p, n, nSz);
assert_nc( nSz>=0 );
*pnSz = nSz/2;
*pbDel = nSz & 0x0001;
return n;
}
/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
** Fts5SegIter.nPos
** Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
if( p->rc==SQLITE_OK ){
int iOff = pIter->iLeafOffset; /* Offset to read at */
ASSERT_SZLEAF_OK(pIter->pLeaf);
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
pIter->bDel = 0;
pIter->nPos = 1;
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
pIter->bDel = 1;
iOff++;
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
pIter->nPos = 1;
iOff++;
}else{
pIter->nPos = 0;
}
}
}else{
int nSz;
fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
pIter->bDel = (nSz & 0x0001);
pIter->nPos = nSz>>1;
assert_nc( pIter->nPos>=0 );
}
pIter->iLeafOffset = iOff;
}
}
static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
int iOff = pIter->iLeafOffset;
ASSERT_SZLEAF_OK(pIter->pLeaf);
if( iOff>=pIter->pLeaf->szLeaf ){
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ){
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
return;
}
iOff = 4;
a = pIter->pLeaf->p;
}
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
}
/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**
** This function populates:
**
** Fts5SegIter.term
** Fts5SegIter.rowid
**
** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
** the first position list. The position list belonging to document
** (Fts5SegIter.iRowid).
*/
static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
int iOff = pIter->iLeafOffset; /* Offset to read at */
int nNew; /* Bytes of new data */
iOff += fts5GetVarint32(&a[iOff], nNew);
if( iOff+nNew>pIter->pLeaf->nn ){
p->rc = FTS5_CORRUPT;
return;
}
pIter->term.n = nKeep;
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
iOff += nNew;
pIter->iTermLeafOffset = iOff;
pIter->iTermLeafPgno = pIter->iLeafPgno;
pIter->iLeafOffset = iOff;
if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
}else{
int nExtra;
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
pIter->iEndofDoclist += nExtra;
}
fts5SegIterLoadRowid(p, pIter);
}
static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
if( pIter->flags & FTS5_SEGITER_REVERSE ){
pIter->xNext = fts5SegIterNext_Reverse;
}else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
pIter->xNext = fts5SegIterNext_None;
}else{
pIter->xNext = fts5SegIterNext;
}
}
/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when
** this function returns.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterInit(
Fts5Index *p, /* FTS index object */
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){
if( pSeg->pgnoFirst==0 ){
/* This happens if the segment is being used as an input to an incremental
** merge and all data has already been "trimmed". See function
** fts5TrimSegments() for details. In this case leave the iterator empty.
** The caller will see the (pIter->pLeaf==0) and assume the iterator is
** at EOF already. */
assert( pIter->pLeaf==0 );
return;
}
if( p->rc==SQLITE_OK ){
memset(pIter, 0, sizeof(*pIter));
fts5SegIterSetNext(p, pIter);
pIter->pSeg = pSeg;
pIter->iLeafPgno = pSeg->pgnoFirst-1;
fts5SegIterNextPage(p, pIter);
}
if( p->rc==SQLITE_OK ){
pIter->iLeafOffset = 4;
assert_nc( pIter->pLeaf->nn>4 );
assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
fts5SegIterLoadTerm(p, pIter, 0);
fts5SegIterLoadNPos(p, pIter);
}
}
/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
**
** The iterator is in an unusual state when this function is called: the
** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
** the position-list size field for the first relevant rowid on the page.
** Fts5SegIter.rowid is set, but nPos and bDel are not.
**
** This function advances the iterator so that it points to the last
** relevant rowid on the page and, if necessary, initializes the
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
int eDetail = p->pConfig->eDetail;
int n = pIter->pLeaf->szLeaf;
int i = pIter->iLeafOffset;
u8 *a = pIter->pLeaf->p;
int iRowidOffset = 0;
if( n>pIter->iEndofDoclist ){
n = pIter->iEndofDoclist;
}
ASSERT_SZLEAF_OK(pIter->pLeaf);
while( 1 ){
i64 iDelta = 0;
if( eDetail==FTS5_DETAIL_NONE ){
/* todo */
if( i<n && a[i]==0 ){
i++;
if( i<n && a[i]==0 ) i++;
}
}else{
int nPos;
int bDummy;
i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
i += nPos;
}
if( i>=n ) break;
i += fts5GetVarint(&a[i], (u64*)&iDelta);
pIter->iRowid += iDelta;
/* If necessary, grow the pIter->aRowidOffset[] array. */
if( iRowidOffset>=pIter->nRowidOffset ){
int nNew = pIter->nRowidOffset + 8;
int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
if( aNew==0 ){
p->rc = SQLITE_NOMEM;
break;
}
pIter->aRowidOffset = aNew;
pIter->nRowidOffset = nNew;
}
pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
pIter->iLeafOffset = i;
}
pIter->iRowidOffset = iRowidOffset;
fts5SegIterLoadNPos(p, pIter);
}
/*
**
*/
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
assert( pIter->flags & FTS5_SEGITER_REVERSE );
assert( pIter->flags & FTS5_SEGITER_ONETERM );
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
Fts5Data *pNew;
pIter->iLeafPgno--;
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
pIter->pSeg->iSegid, pIter->iLeafPgno
));
if( pNew ){
/* iTermLeafOffset may be equal to szLeaf if the term is the last
** thing on the page - i.e. the first rowid is on the following page.
** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
assert( pIter->pLeaf==0 );
if( pIter->iTermLeafOffset<pNew->szLeaf ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = pIter->iTermLeafOffset;
}
}else{
int iRowidOff;
iRowidOff = fts5LeafFirstRowidOff(pNew);
if( iRowidOff ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = iRowidOff;
}
}
if( pIter->pLeaf ){
u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
break;
}else{
fts5DataRelease(pNew);
}
}
}
if( pIter->pLeaf ){
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
fts5SegIterReverseInitPage(p, pIter);
}
}
/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
}
/*
** Advance iterator pIter to the next entry.
**
** This version of fts5SegIterNext() is only used by reverse iterators.
*/
static void fts5SegIterNext_Reverse(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */
int *pbUnused /* Unused */
){
assert( pIter->flags & FTS5_SEGITER_REVERSE );
assert( pIter->pNextLeaf==0 );
UNUSED_PARAM(pbUnused);
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
/* Search for the end of the position list within the current page. */
a = pLeaf->p;
n = pLeaf->szLeaf;
ASSERT_SZLEAF_OK(pLeaf);
iOff = pIter->iLeafOffset + pIter->nPos;
if( iOff<n ){
/* The next entry is on the current page. */
assert_nc( iOff<=pIter->iEndofDoclist );
if( iOff>=pIter->iEndofDoclist ){
bNewTerm = 1;
if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}else{
u64 iDelta;
iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
pIter->iRowid += iDelta;
assert_nc( iDelta>0 );
}
pIter->iLeafOffset = iOff;
}else if( pIter->pSeg==0 ){
const u8 *pList = 0;
const char *zTerm = 0;
int nList = 0;
assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
sqlite3Fts5HashScanNext(p->pHash);
sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
}
if( pList==0 ){
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
}else{
pIter->pLeaf->p = (u8*)pList;
pIter->pLeaf->nn = nList;
pIter->pLeaf->szLeaf = nList;
pIter->iEndofDoclist = nList+1;
sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
(u8*)zTerm);
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
*pbNewTerm = 1;
}
}else{
iOff = 0;
/* Next entry is not on the current page */
while( iOff==0 ){
fts5SegIterNextPage(p, pIter);
pLeaf = pIter->pLeaf;
if( pLeaf==0 ) break;
ASSERT_SZLEAF_OK(pLeaf);
if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
if( pLeaf->nn>pLeaf->szLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
&pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
);
}
}
else if( pLeaf->nn>pLeaf->szLeaf ){
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
&pLeaf->p[pLeaf->szLeaf], iOff
);
pIter->iLeafOffset = iOff;
pIter->iEndofDoclist = iOff;
bNewTerm = 1;
}
assert_nc( iOff<pLeaf->szLeaf );
if( iOff>pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
return;
}
}
}
/* Check if the iterator is now at EOF. If so, return early. */
if( pIter->pLeaf ){
if( bNewTerm ){
if( pIter->flags & FTS5_SEGITER_ONETERM ){
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
}else{
fts5SegIterLoadTerm(p, pIter, nKeep);
fts5SegIterLoadNPos(p, pIter);
if( pbNewTerm ) *pbNewTerm = 1;
}
}else{
/* The following could be done by calling fts5SegIterLoadNPos(). But
** this block is particularly performance critical, so equivalent
** code is inlined.
**
** Later: Switched back to fts5SegIterLoadNPos() because it supports
** detail=none mode. Not ideal.
*/
int nSz;
assert( p->rc==SQLITE_OK );
assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
pIter->bDel = (nSz & 0x0001);
pIter->nPos = nSz>>1;
assert_nc( pIter->nPos>=0 );
}
}
}
#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
#define fts5IndexSkipVarint(a, iOff) { \
int iEnd = iOff+9; \
while( (a[iOff++] & 0x80) && iOff<iEnd ); \
}
/*
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
Fts5DlidxIter *pDlidx = pIter->pDlidx;
Fts5Data *pLast = 0;
int pgnoLast = 0;
if( pDlidx ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = fts5DlidxIterPgno(pDlidx);
pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
}else{
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of
** position-list content for the current rowid. Back it up so that it
** points to the start of the position-list size field. */
int iPoslist;
if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
iPoslist = pIter->iTermLeafOffset;
}else{
iPoslist = 4;
}
fts5IndexSkipVarint(pLeaf->p, iPoslist);
pIter->iLeafOffset = iPoslist;
/* If this condition is true then the largest rowid for the current
** term may not be stored on the current page. So search forward to
** see where said rowid really is. */
if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
int pgno;
Fts5StructureSegment *pSeg = pIter->pSeg;
/* The last rowid in the doclist may not be on the current page. Search
** forward to find the page containing the last rowid. */
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
Fts5Data *pNew = fts5DataRead(p, iAbs);
if( pNew ){
int iRowid, bTermless;
iRowid = fts5LeafFirstRowidOff(pNew);
bTermless = fts5LeafIsTermless(pNew);
if( iRowid ){
SWAPVAL(Fts5Data*, pNew, pLast);
pgnoLast = pgno;
}
fts5DataRelease(pNew);
if( bTermless==0 ) break;
}
}
}
}
/* If pLast is NULL at this point, then the last rowid for this doclist
** lies on the page currently indicated by the iterator. In this case
** pIter->iLeafOffset is already set to point to the position-list size
** field associated with the first relevant rowid on the page.
**
** Or, if pLast is non-NULL, then it is the page that contains the last
** rowid. In this case configure the iterator so that it points to the
** first rowid on this page.
*/
if( pLast ){
int iOff;
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = pLast;
pIter->iLeafPgno = pgnoLast;
iOff = fts5LeafFirstRowidOff(pLast);
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
if( fts5LeafIsTermless(pLast) ){
pIter->iEndofDoclist = pLast->nn+1;
}else{
pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
}
}
fts5SegIterReverseInitPage(p, pIter);
}
/*
** Iterator pIter currently points to the first rowid of a doclist.
** There is a doclist-index associated with the final term on the current
** page. If the current term is the last term on the page, load the
** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
*/
static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
int iSeg = pIter->pSeg->iSegid;
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
assert( pIter->flags & FTS5_SEGITER_ONETERM );
assert( pIter->pDlidx==0 );
/* Check if the current doclist ends on this page. If it does, return
** early without loading the doclist-index (as it belongs to a different
** term. */
if( pIter->iTermLeafPgno==pIter->iLeafPgno
&& pIter->iEndofDoclist<pLeaf->szLeaf
){
return;
}
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}
/*
** The iterator object passed as the second argument currently contains
** no valid values except for the Fts5SegIter.pLeaf member variable. This
** function searches the leaf page for a term matching (pTerm/nTerm).
**
** If the specified term is found on the page, then the iterator is left
** pointing to it. If argument bGe is zero and the term is not found,
** the iterator is left pointing at EOF.
**
** If bGe is non-zero and the specified term is not found, then the
** iterator is left pointing to the smallest term in the segment that
** is larger than the specified term, even if this term is not on the
** current page.
*/
static void fts5LeafSeek(
Fts5Index *p, /* Leave any error code here */
int bGe, /* True for a >= search */
Fts5SegIter *pIter, /* Iterator to seek */
const u8 *pTerm, int nTerm /* Term to search for */
){
int iOff;
const u8 *a = pIter->pLeaf->p;
int szLeaf = pIter->pLeaf->szLeaf;
int n = pIter->pLeaf->nn;
int nMatch = 0;
int nKeep = 0;
int nNew = 0;
int iTermOff;
int iPgidx; /* Current offset in pgidx */
int bEndOfPage = 0;
assert( p->rc==SQLITE_OK );
iPgidx = szLeaf;
iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
iOff = iTermOff;
if( iOff>n ){
p->rc = FTS5_CORRUPT;
return;
}
while( 1 ){
/* Figure out how many new bytes are in this term */
fts5FastGetVarint32(a, iOff, nNew);
if( nKeep<nMatch ){
goto search_failed;
}
assert( nKeep>=nMatch );
if( nKeep==nMatch ){
int nCmp;
int i;
nCmp = MIN(nNew, nTerm-nMatch);
for(i=0; i<nCmp; i++){
if( a[iOff+i]!=pTerm[nMatch+i] ) break;
}
nMatch += i;
if( nTerm==nMatch ){
if( i==nNew ){
goto search_success;
}else{
goto search_failed;
}
}else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
goto search_failed;
}
}
if( iPgidx>=n ){
bEndOfPage = 1;
break;
}
iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
iTermOff += nKeep;
iOff = iTermOff;
if( iOff>=n ){
p->rc = FTS5_CORRUPT;
return;
}
/* Read the nKeep field of the next term. */
fts5FastGetVarint32(a, iOff, nKeep);
}
search_failed:
if( bGe==0 ){
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
return;
}else if( bEndOfPage ){
do {
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ) return;
a = pIter->pLeaf->p;
if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
iPgidx = pIter->pLeaf->szLeaf;
iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
nKeep = 0;
iTermOff = iOff;
n = pIter->pLeaf->nn;
iOff += fts5GetVarint32(&a[iOff], nNew);
break;
}
}
}while( 1 );
}
search_success:
pIter->iLeafOffset = iOff + nNew;
pIter->iTermLeafOffset = pIter->iLeafOffset;
pIter->iTermLeafPgno = pIter->iLeafPgno;
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
if( iPgidx>=n ){
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
}else{
int nExtra;
iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
pIter->iEndofDoclist = iTermOff + nExtra;
}
pIter->iPgidxOff = iPgidx;
fts5SegIterLoadRowid(p, pIter);
fts5SegIterLoadNPos(p, pIter);
}
static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
if( p->pIdxSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
"segid=? AND term<=? ORDER BY term DESC LIMIT 1",
pConfig->zDb, pConfig->zName
));
}
return p->pIdxSelect;
}
/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterSeekInit(
Fts5Index *p, /* FTS5 backend */
const u8 *pTerm, int nTerm, /* Term to seek to */
int flags, /* Mask of FTS5INDEX_XXX flags */
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){
int iPg = 1;
int bGe = (flags & FTS5INDEX_QUERY_SCAN);
int bDlidx = 0; /* True if there is a doclist-index */
sqlite3_stmt *pIdxSelect = 0;
assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
assert( pTerm && nTerm );
memset(pIter, 0, sizeof(*pIter));
pIter->pSeg = pSeg;
/* This block sets stack variable iPg to the leaf page number that may
** contain term (pTerm/nTerm), if it is present in the segment. */
pIdxSelect = fts5IdxSelectStmt(p);
if( p->rc ) return;
sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
i64 val = sqlite3_column_int(pIdxSelect, 0);
iPg = (int)(val>>1);
bDlidx = (val & 0x0001);
}
p->rc = sqlite3_reset(pIdxSelect);
sqlite3_bind_null(pIdxSelect, 2);
if( iPg<pSeg->pgnoFirst ){
iPg = pSeg->pgnoFirst;
bDlidx = 0;
}
pIter->iLeafPgno = iPg - 1;
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf ){
fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
}
if( p->rc==SQLITE_OK && bGe==0 ){
pIter->flags |= FTS5_SEGITER_ONETERM;
if( pIter->pLeaf ){
if( flags & FTS5INDEX_QUERY_DESC ){
pIter->flags |= FTS5_SEGITER_REVERSE;
}
if( bDlidx ){
fts5SegIterLoadDlidx(p, pIter);
}
if( flags & FTS5INDEX_QUERY_DESC ){
fts5SegIterReverse(p, pIter);
}
}
}
fts5SegIterSetNext(p, pIter);
/* Either:
**
** 1) an error has occurred, or
** 2) the iterator points to EOF, or
** 3) the iterator points to an entry with term (pTerm/nTerm), or
** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
** to an entry with a term greater than or equal to (pTerm/nTerm).
*/
assert( p->rc!=SQLITE_OK /* 1 */
|| pIter->pLeaf==0 /* 2 */
|| fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
|| (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
);
}
/*
** Initialize the object pIter to point to term pTerm/nTerm within the
** in-memory hash table. If there is no such term in the hash-table, the
** iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
** an error has already occurred when this function is called, it is a no-op.
}else{
pCtx->eState = 0;
}
}
do {
while( i<nChunk && pChunk[i]!=0x01 ){
while( pChunk[i] & 0x80 ) i++;
i++;
}
if( pCtx->eState ){
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
}
if( i<nChunk ){
int iCol;
iStart = i;
i++;
if( i>=nChunk ){
pCtx->eState = 2;
}else{
fts5FastGetVarint32(pChunk, i, iCol);
pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
if( pCtx->eState ){
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
iStart = i;
}
}
}
}while( i<nChunk );
}
}
static void fts5ChunkIterate(
Fts5Index *p, /* Index object */
Fts5SegIter *pSeg, /* Poslist of this iterator */
void *pCtx, /* Context pointer for xChunk callback */
void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
int nRem = pSeg->nPos; /* Number of bytes still to come */
Fts5Data *pData = 0;
u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
int pgno = pSeg->iLeafPgno;
int pgnoSave = 0;
/* This function does notmwork with detail=none databases. */
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
pgnoSave = pgno+1;
}
while( 1 ){
xChunk(p, pCtx, pChunk, nChunk);
nRem -= nChunk;
fts5DataRelease(pData);
if( nRem<=0 ){
break;
}else{
pgno++;
pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
if( pData==0 ) break;
pChunk = &pData->p[4];
nChunk = MIN(nRem, pData->szLeaf - 4);
if( pgno==pgnoSave ){
assert( pSeg->pNextLeaf==0 );
pSeg->pNextLeaf = pData;
pData = 0;
}
}
}
}
/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
Fts5Index *p,
Fts5SegIter *pSeg,
Fts5Colset *pColset,
Fts5Buffer *pBuf
){
if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
if( pColset==0 ){
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
}else{
if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
PoslistCallbackCtx sCtx;
sCtx.pBuf = pBuf;
sCtx.pColset = pColset;
sCtx.eState = fts5IndexColsetTest(pColset, 0);
assert( sCtx.eState==0 || sCtx.eState==1 );
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
}else{
PoslistOffsetsCtx sCtx;
memset(&sCtx, 0, sizeof(sCtx));
sCtx.pBuf = pBuf;
sCtx.pColset = pColset;
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
}
}
}
}
/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
*/
static int fts5IndexExtractCol(
const u8 **pa, /* IN/OUT: Pointer to poslist */
int n, /* IN: Size of poslist in bytes */
int iCol /* Column to extract from poslist */
){
int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
const u8 *p = *pa;
}
/*
** Return true if the iterator is at EOF or if an error has occurred.
** False otherwise.
*/
static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
assert( p->rc
|| (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
);
return (p->rc || pIter->base.bEof);
}
/*
** Return the rowid of the entry that the iterator currently points
** to. If the iterator points to EOF when this function is called the
** results are undefined.
*/
static i64 fts5MultiIterRowid(Fts5Iter *pIter){
assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
}
/*
** Move the iterator to the next entry at or following iMatch.
*/
static void fts5MultiIterNextFrom(
Fts5Index *p,
Fts5Iter *pIter,
i64 iMatch
){
while( 1 ){
i64 iRowid;
fts5MultiIterNext(p, pIter, 1, iMatch);
if( fts5MultiIterEof(p, pIter) ) break;
iRowid = fts5MultiIterRowid(pIter);
if( pIter->bRev==0 && iRowid>=iMatch ) break;
if( pIter->bRev!=0 && iRowid<=iMatch ) break;
}
}
/*
** Return a pointer to a buffer containing the term associated with the
** entry that the iterator currently points to.
*/
static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
*pn = p->term.n;
return p->term.p;
}
/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by
** any currently existing segment. If a free segment id cannot be found,
** SQLITE_FULL is returned.
**
** If an error has already occurred, this function is a no-op. 0 is
** returned in this case.
*/
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
int iSegid = 0;
if( p->rc==SQLITE_OK ){
if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
p->rc = SQLITE_FULL;
}else{
/* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
** array is 63 elements, or 252 bytes, in size. */
u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
int iLvl, iSeg;
int i;
u32 mask;
memset(aUsed, 0, sizeof(aUsed));
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
if( iId<=FTS5_MAX_SEGMENT ){
aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
}
}
}
for(i=0; aUsed[i]==0xFFFFFFFF; i++);
mask = aUsed[i];
for(iSegid=0; mask & (1 << iSegid); iSegid++);
iSegid += 1 + i*32;
#ifdef SQLITE_DEBUG
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
}
}
assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
{
sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
if( p->rc==SQLITE_OK ){
u8 aBlob[2] = {0xff, 0xff};
sqlite3_bind_int(pIdxSelect, 1, iSegid);
sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
p->rc = sqlite3_reset(pIdxSelect);
sqlite3_bind_null(pIdxSelect, 2);
}
}
#endif
}
}
return iSegid;
}
/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
assert( p->pHash || p->nPendingData==0 );
if( p->pHash ){
sqlite3Fts5HashClear(p->pHash);
p->nPendingData = 0;
}
}
/*
** Return the size of the prefix, in bytes, that buffer
** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
**
** Buffer (pNew/<length-unknown>) is guaranteed to be greater
** than buffer (pOld/nOld).
*/
static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
int i;
for(i=0; i<nOld; i++){
if( pOld[i]!=pNew[i] ) break;
}
return i;
}
static void fts5WriteDlidxClear(
Fts5Index *p,
Fts5SegWriter *pWriter,
int bFlush /* If true, write dlidx to disk */
){
int i;
assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
for(i=0; i<pWriter->nDlidx; i++){
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
if( pDlidx->buf.n==0 ) break;
if( bFlush ){
assert( pDlidx->pgno!=0 );
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
}
sqlite3Fts5BufferZero(&pDlidx->buf);
pDlidx->bPrevValid = 0;
}
}
/*
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
** Any new array elements are zeroed before returning.
*/
static int fts5WriteDlidxGrow(
Fts5Index *p,
Fts5SegWriter *pWriter,
int nLvl
){
if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
);
if( aDlidx==0 ){
p->rc = SQLITE_NOMEM;
}else{
int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
memset(&aDlidx[pWriter->nDlidx], 0, nByte);
pWriter->aDlidx = aDlidx;
pWriter->nDlidx = nLvl;
}
}
return p->rc;
}
/*
** If the current doclist-index accumulating in pWriter->aDlidx[] is large
** enough, flush it to disk and return 1. Otherwise discard it and return
** zero.
*/
static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag = 0;
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
** to the database, also write the doclist-index to disk. */
if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
bFlag = 1;
}
fts5WriteDlidxClear(p, pWriter, bFlag);
pWriter->nEmpty = 0;
return bFlag;
}
/*
** This function is called whenever processing of the doclist for the
** last term on leaf page (pWriter->iBtPage) is completed.
**
** The doclist-index for that term is currently stored in-memory within the
** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
** writes it out to disk. Or, if it is too small to bother with, discards
** it.
**
** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
*/
static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag;
assert( pWriter->iBtPage || pWriter->nEmpty==0 );
if( pWriter->iBtPage==0 ) return;
bFlag = fts5WriteFlushDlidx(p, pWriter);
if( p->rc==SQLITE_OK ){
const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
/* The following was already done in fts5WriteInit(): */
/* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
sqlite3_step(p->pIdxWriter);
p->rc = sqlite3_reset(p->pIdxWriter);
sqlite3_bind_null(p->pIdxWriter, 2);
}
pWriter->iBtPage = 0;
}
/*
** This is called once for each leaf page except the first that contains
** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
** is larger than all terms written to earlier leaves, and equal to or
** smaller than the first term on the new leaf.
**
** If an error occurs, an error code is left in Fts5Index.rc. If an error
** has already occurred when this function is called, it is a no-op.
*/
static void fts5WriteBtreeTerm(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter, /* Writer object */
int nTerm, const u8 *pTerm /* First term on new page */
){
fts5WriteFlushBtree(p, pWriter);
fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
pWriter->iBtPage = pWriter->writer.pgno;
}
/*
** This function is called when flushing a leaf page that contains no
** terms at all to disk.
*/
static void fts5WriteBtreeNoTerm(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter /* Writer object */
){
/* If there were no rowids on the leaf page either and the doclist-index
** has already been started, append an 0x00 byte to it. */
if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
assert( pDlidx->bPrevValid );
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
}
/* Increment the "number of sequential leaves without a term" counter. */
pWriter->nEmpty++;
}
static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
i64 iRowid;
int iOff;
iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
return iRowid;
}
/*
** Rowid iRowid has just been appended to the current leaf page. It is the
** first on the page. This function appends an appropriate entry to the current
** doclist-index.
*/
static void fts5WriteDlidxAppend(
Fts5Index *p,
Fts5SegWriter *pWriter,
i64 iRowid
){
int i;
int bDone = 0;
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
i64 iVal;
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
if( pDlidx->buf.n>=p->pConfig->pgsz ){
/* The current doclist-index page is full. Write it to disk and push
** a copy of iRowid (which will become the first rowid on the next
** doclist-index leaf page) up into the next level of the b-tree
** hierarchy. If the node being flushed is currently the root node,
** also push its first rowid upwards. */
pDlidx->buf.p[0] = 0x01; /* Not the root node */
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
fts5WriteDlidxGrow(p, pWriter, i+2);
pDlidx = &pWriter->aDlidx[i];
if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
/* This was the root node. Push its first rowid up to the new root. */
pDlidx[1].pgno = pDlidx->pgno;
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
pDlidx[1].bPrevValid = 1;
pDlidx[1].iPrev = iFirst;
}
sqlite3Fts5BufferZero(&pDlidx->buf);
pDlidx->bPrevValid = 0;
pDlidx->pgno++;
}else{
bDone = 1;
}
if( pDlidx->bPrevValid ){
iVal = iRowid - pDlidx->iPrev;
}else{
i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
assert( pDlidx->buf.n==0 );
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
iVal = iRowid;
}
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
pDlidx->bPrevValid = 1;
pDlidx->iPrev = iRowid;
}
}
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
/* Set the szLeaf header field. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
assert( pPage->pgidx.n==0 );
fts5WriteBtreeNoTerm(p, pWriter);
}else{
/* Append the pgidx to the page buffer. Set the szLeaf header field. */
fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
}
/* Write the page out to disk */
iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->pgidx);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
pPage->iPrevPgidx = 0;
pPage->pgno++;
/* Increase the leaves written counter */
pWriter->nLeafWritten++;
/* The new leaf holds no terms or rowids */
pWriter->bFirstTermInPage = 1;
pWriter->bFirstRowidInPage = 1;
}
/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static void fts5WriteAppendTerm(
Fts5Index *p,
Fts5SegWriter *pWriter,
int nTerm, const u8 *pTerm
){
int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->writer;
Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
assert( p->rc==SQLITE_OK );
assert( pPage->buf.n>=4 );
assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
/* If the current leaf page is full, flush it to disk. */
if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
if( pPage->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
}
fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
}
/* TODO1: Updating pgidx here. */
pPgidx->n += sqlite3Fts5PutVarint(
&pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
);
pPage->iPrevPgidx = pPage->buf.n;
#if 0
fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
pPgidx->n += 2;
#endif
if( pWriter->bFirstTermInPage ){
nPrefix = 0;
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
** the segment b-tree. In this case it is necessary to add a term to
** the b-tree hierarchy that is (a) larger than the largest term
** already written to the segment and (b) smaller than or equal to
** this term. In other words, a prefix of (pTerm/nTerm) that is one
** byte longer than the longest prefix (pTerm/nTerm) shares with the
** previous term.
**
** Usually, the previous term is available in pPage->term. The exception
** is if this is the first term written in an incremental-merge step.
** In this case the previous term is not available, so just write a
** copy of (pTerm/nTerm) into the parent node. This is slightly
** inefficient, but still correct. */
int n = nTerm;
if( pPage->term.n ){
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
pPage = &pWriter->writer;
}
}else{
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
}
/* Append the number of bytes of new data, then the term data itself
** to the page. */
fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
/* Update the Fts5PageWriter.term field. */
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
pWriter->bFirstTermInPage = 0;
pWriter->bFirstRowidInPage = 0;
pWriter->bFirstRowidInDoclist = 1;
assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
pWriter->aDlidx[0].pgno = pPage->pgno;
}
/*
** Append a rowid and position-list size field to the writers output.
*/
static void fts5WriteAppendRowid(
Fts5Index *p,
Fts5SegWriter *pWriter,
i64 iRowid
){
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->writer;
if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
fts5WriteFlushLeaf(p, pWriter);
}
/* If this is to be the first rowid written to the page, set the
** rowid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */
if( pWriter->bFirstRowidInPage ){
fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
fts5WriteDlidxAppend(p, pWriter, iRowid);
}
/* Write the rowid. */
if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
}else{
assert( p->rc || iRowid>pWriter->iPrevRowid );
fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
}
pWriter->iPrevRowid = iRowid;
pWriter->bFirstRowidInDoclist = 0;
pWriter->bFirstRowidInPage = 0;
}
}
static void fts5WriteAppendPoslistData(
Fts5Index *p,
Fts5SegWriter *pWriter,
const u8 *aData,
int nData
){
Fts5PageWriter *pPage = &pWriter->writer;
const u8 *a = aData;
int n = nData;
assert( p->pConfig->pgsz>0 );
while( p->rc==SQLITE_OK
&& (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
){
int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
int nCopy = 0;
while( nCopy<nReq ){
i64 dummy;
nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
}
fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
a += nCopy;
n -= nCopy;
fts5WriteFlushLeaf(p, pWriter);
}
if( n>0 ){
fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
}
}
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
Fts5Index *p,
Fts5SegWriter *pWriter, /* Writer object */
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
Fts5PageWriter *pLeaf = &pWriter->writer;
if( p->rc==SQLITE_OK ){
assert( pLeaf->pgno>=1 );
if( pLeaf->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
}
*pnLeaf = pLeaf->pgno-1;
if( pLeaf->pgno>1 ){
fts5WriteFlushBtree(p, pWriter);
}
}
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
fts5BufferFree(&pLeaf->pgidx);
fts5BufferFree(&pWriter->btterm);
for(i=0; i<pWriter->nDlidx; i++){
sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
}
sqlite3_free(pWriter->aDlidx);
}
static void fts5WriteInit(
Fts5Index *p,
Fts5SegWriter *pWriter,
int iSegid
){
const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = iSegid;
fts5WriteDlidxGrow(p, pWriter, 1);
pWriter->writer.pgno = 1;
pWriter->bFirstTermInPage = 1;
pWriter->iBtPage = 1;
assert( pWriter->writer.buf.n==0 );
assert( pWriter->writer.pgidx.n==0 );
/* Grow the two buffers to pgsz + padding bytes in size. */
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
if( p->pIdxWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
"INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
/* Initialize the 4-byte leaf-page header to 0x00. */
memset(pWriter->writer.buf.p, 0, 4);
pWriter->writer.buf.n = 4;
/* Bind the current output segment id to the index-writer. This is an
** optimization over binding the same value over and over as rows are
** inserted into %_idx by the current writer. */
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
int i;
Fts5Buffer buf;
memset(&buf, 0, sizeof(Fts5Buffer));
for(i=0; i<pIter->nSeg; i++){
Fts5SegIter *pSeg = &pIter->aSeg[i];
if( pSeg->pSeg==0 ){
/* no-op */
}else if( pSeg->pLeaf==0 ){
/* All keys from this input segment have been transfered to the output.
** Set both the first and last page-numbers to 0 to indicate that the
** segment is now empty. */
pSeg->pSeg->pgnoLast = 0;
pSeg->pSeg->pgnoFirst = 0;
}else{
int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
i64 iLeafRowid;
Fts5Data *pData;
int iId = pSeg->pSeg->iSegid;
u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
pData = fts5DataRead(p, iLeafRowid);
if( pData ){
fts5BufferZero(&buf);
fts5BufferGrow(&p->rc, &buf, pData->nn);
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
if( p->rc==SQLITE_OK ){
/* Set the szLeaf field */
fts5PutU16(&buf.p[2], (u16)buf.n);
}
/* Set up the new page-index array */
fts5BufferAppendVarint(&p->rc, &buf, 4);
if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
&& pSeg->iEndofDoclist<pData->szLeaf
){
int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
fts5BufferAppendBlob(&p->rc, &buf,
pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
);
}
fts5DataRelease(pData);
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
}
}
}
fts5BufferFree(&buf);
}
static void fts5MergeChunkCallback(
Fts5Index *p,
void *pCtx,
const u8 *pChunk, int nChunk
){
Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
}
/*
**
*/
static void fts5IndexMergeLevel(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
int iLvl, /* Level to read input from */
int *pnRem /* Write up to this many output leaves */
){
Fts5Structure *pStruct = *ppStruct;
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
Fts5StructureLevel *pLvlOut;
Fts5Iter *pIter = 0; /* Iterator to read input data */
int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
int nInput; /* Number of input segments */
Fts5SegWriter writer; /* Writer object */
Fts5StructureSegment *pSeg; /* Output segment */
Fts5Buffer term;
int bOldest; /* True if the output segment is the oldest */
int eDetail = p->pConfig->eDetail;
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
int bTermWritten = 0; /* True if current term already output */
assert( iLvl<pStruct->nLevel );
assert( pLvl->nMerge<=pLvl->nSeg );
memset(&writer, 0, sizeof(Fts5SegWriter));
memset(&term, 0, sizeof(Fts5Buffer));
if( pLvl->nMerge ){
pLvlOut = &pStruct->aLevel[iLvl+1];
assert( pLvlOut->nSeg>0 );
nInput = pLvl->nMerge;
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
fts5WriteInit(p, &writer, pSeg->iSegid);
writer.writer.pgno = pSeg->pgnoLast+1;
writer.iBtPage = 0;
}else{
int iSegid = fts5AllocateSegid(p, pStruct);
/* Extend the Fts5Structure object as required to ensure the output
** segment exists. */
if( iLvl==pStruct->nLevel-1 ){
fts5StructureAddLevel(&p->rc, ppStruct);
pStruct = *ppStruct;
}
fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
if( p->rc ) return;
pLvl = &pStruct->aLevel[iLvl];
pLvlOut = &pStruct->aLevel[iLvl+1];
fts5WriteInit(p, &writer, iSegid);
/* Add the new segment to the output level */
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
pLvlOut->nSeg++;
pSeg->pgnoFirst = 1;
pSeg->iSegid = iSegid;
pStruct->nSegment++;
/* Read input from all segments in the input level */
nInput = pLvl->nSeg;
}
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
assert( iLvl>=0 );
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
fts5MultiIterEof(p, pIter)==0;
fts5MultiIterNext(p, pIter, 0, 0)
){
Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
int nPos; /* position-list size field value */
int nTerm;
const u8 *pTerm;
pTerm = fts5MultiIterTerm(pIter, &nTerm);
if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
if( pnRem && writer.nLeafWritten>nRem ){
break;
}
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
bTermWritten =0;
}
/* Check for key annihilation. */
if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
if( p->rc==SQLITE_OK && bTermWritten==0 ){
/* This is a new term. Append a term to the output segment. */
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
bTermWritten = 1;
}
/* Append the rowid to the output */
/* WRITEPOSLISTSIZE */
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
if( eDetail==FTS5_DETAIL_NONE ){
if( pSegIter->bDel ){
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
if( pSegIter->nPos>0 ){
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
}
}
}else{
/* Append the position-list data to the output */
nPos = pSegIter->nPos*2 + pSegIter->bDel;
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
}
}
/* Flush the last leaf page to disk. Set the output segment b-tree height
** and last leaf page number at the same time. */
fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
if( fts5MultiIterEof(p, pIter) ){
int i;
/* Remove the redundant segments from the %_data table */
for(i=0; i<nInput; i++){
fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
}
/* Remove the redundant segments from the input level */
if( pLvl->nSeg!=nInput ){
int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
}
pStruct->nSegment -= nInput;
pLvl->nSeg -= nInput;
pLvl->nMerge = 0;
if( pSeg->pgnoLast==0 ){
pLvlOut->nSeg--;
pStruct->nSegment--;
}
}else{
assert( pSeg->pgnoLast>0 );
fts5TrimSegments(p, pIter);
pLvl->nMerge = nInput;
}
fts5MultiIterFree(pIter);
fts5BufferFree(&term);
if( pnRem ) *pnRem -= writer.nLeafWritten;
}
/*
** Do up to nPg pages of automerge work on the index.
**
** Return true if any changes were actually made, or false otherwise.
*/
static int fts5IndexMerge(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
int nPg, /* Pages of work to do */
int nMin /* Minimum number of segments to merge */
){
int nRem = nPg;
int bRet = 0;
Fts5Structure *pStruct = *ppStruct;
while( nRem>0 && p->rc==SQLITE_OK ){
int iLvl; /* To iterate through levels */
int iBestLvl = 0; /* Level offering the most input segments */
int nBest = 0; /* Number of input segments on best level */
/* Set iBestLvl to the level to read input segments from. */
assert( pStruct->nLevel>0 );
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
if( pLvl->nMerge ){
if( pLvl->nMerge>nBest ){
iBestLvl = iLvl;
nBest = pLvl->nMerge;
}
break;
}
if( pLvl->nSeg>nBest ){
nBest = pLvl->nSeg;
iBestLvl = iLvl;
}
}
static void fts5IndexCrisismerge(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
){
const int nCrisis = p->pConfig->nCrisisMerge;
Fts5Structure *pStruct = *ppStruct;
int iLvl = 0;
assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
fts5StructurePromote(p, iLvl+1, pStruct);
iLvl++;
}
*ppStruct = pStruct;
}
static int fts5IndexReturn(Fts5Index *p){
int rc = p->rc;
p->rc = SQLITE_OK;
return rc;
}
typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
Fts5Index *pIdx;
Fts5SegWriter writer;
};
/*
** Buffer aBuf[] contains a list of varints, all small enough to fit
** in a 32-bit integer. Return the size of the largest prefix of this
** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
int ret;
u32 dummy;
ret = fts5GetVarint32(aBuf, dummy);
if( ret<nMax ){
while( 1 ){
int i = fts5GetVarint32(&aBuf[ret], dummy);
if( (ret + i) > nMax ) break;
ret += i;
}
}
return ret;
}
/*
** Flush the contents of in-memory hash table iHash to a new level-0
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p){
Fts5Hash *pHash = p->pHash;
Fts5Structure *pStruct;
int iSegid;
int pgnoLast = 0; /* Last leaf page number in segment */
/* Obtain a reference to the index structure and allocate a new segment-id
** for the new level-0 segment. */
pStruct = fts5StructureRead(p);
iSegid = fts5AllocateSegid(p, pStruct);
fts5StructureInvalidate(p);
if( iSegid ){
const int pgsz = p->pConfig->pgsz;
int eDetail = p->pConfig->eDetail;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
Fts5SegWriter writer;
fts5WriteInit(p, &writer, iSegid);
pBuf = &writer.writer.buf;
pPgidx = &writer.writer.pgidx;
/* fts5WriteInit() should have initialized the buffers to (most likely)
** the maximum space required. */
assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
/* Begin scanning through hash table entries. This loop runs once for each
** term/doclist currently stored within the hash table. */
if( p->rc==SQLITE_OK ){
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
}
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
const char *zTerm; /* Buffer containing term */
const u8 *pDoclist; /* Pointer to doclist for this term */
int nDoclist; /* Size of doclist in bytes */
/* Write the term for this entry to disk. */
sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
assert( writer.bFirstRowidInPage==0 );
if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
/* The entire doclist will fit on the current leaf. */
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
}else{
i64 iRowid = 0;
i64 iDelta = 0;
int iOff = 0;
/* The entire doclist will not fit on this leaf. The following
** loop iterates through the poslists that make up the current
** doclist. */
while( p->rc==SQLITE_OK && iOff<nDoclist ){
iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
iRowid += iDelta;
if( writer.bFirstRowidInPage ){
fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
writer.bFirstRowidInPage = 0;
fts5WriteDlidxAppend(p, &writer, iRowid);
}else{
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
}
assert( pBuf->n<=pBuf->nSpace );
if( eDetail==FTS5_DETAIL_NONE ){
if( iOff<nDoclist && pDoclist[iOff]==0 ){
pBuf->p[pBuf->n++] = 0;
iOff++;
if( iOff<nDoclist && pDoclist[iOff]==0 ){
pBuf->p[pBuf->n++] = 0;
iOff++;
}
}
if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
}
}else{
int bDummy;
int nPos;
int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
nCopy += nPos;
if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
/* The entire poslist will fit on the current leaf. So copy
** it in one go. */
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
}else{
/* The entire poslist will not fit on this leaf. So it needs
** to be broken into sections. The only qualification being
** that each varint must be stored contiguously. */
const u8 *pPoslist = &pDoclist[iOff];
int iPos = 0;
while( p->rc==SQLITE_OK ){
int nSpace = pgsz - pBuf->n - pPgidx->n;
int n = 0;
if( (nCopy - iPos)<=nSpace ){
n = nCopy - iPos;
}else{
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
}
assert( n>0 );
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
iPos += n;
if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
}
if( iPos>=nCopy ) break;
}
}
iOff += nCopy;
}
}
}
/* TODO2: Doclist terminator written here. */
/* pBuf->p[pBuf->n++] = '\0'; */
assert( pBuf->n<=pBuf->nSpace );
sqlite3Fts5HashScanNext(pHash);
}
sqlite3Fts5HashClear(pHash);
fts5WriteFinish(p, &writer, &pgnoLast);
/* Update the Fts5Structure. It is written back to the database by the
** fts5StructureRelease() call below. */
if( pStruct->nLevel==0 ){
fts5StructureAddLevel(&p->rc, &pStruct);
}
fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
if( p->rc==SQLITE_OK ){
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
pSeg->iSegid = iSegid;
pSeg->pgnoFirst = 1;
pSeg->pgnoLast = pgnoLast;
pStruct->nSegment++;
}
fts5StructurePromote(p, 0, pStruct);
}
fts5IndexAutomerge(p, &pStruct, pgnoLast);
fts5IndexCrisismerge(p, &pStruct);
fts5StructureWrite(p, pStruct);
fts5StructureRelease(pStruct);
}
/*
** Flush any data stored in the in-memory hash tables to the database.
*/
static void fts5IndexFlush(Fts5Index *p){
/* Unless it is empty, flush the hash table to disk */
if( p->nPendingData ){
assert( p->pHash );
p->nPendingData = 0;
fts5FlushOneHash(p);
}
}
static Fts5Structure *fts5IndexOptimizeStruct(
Fts5Index *p,
Fts5Structure *pStruct
){
Fts5Structure *pNew = 0;
int nByte = sizeof(Fts5Structure);
int nSeg = pStruct->nSegment;
int i;
/* Figure out if this structure requires optimization. A structure does
** not require optimization if either:
**
** + it consists of fewer than two segments, or
** + all segments are on the same level, or
** + all segments except one are currently inputs to a merge operation.
**
** In the first case, return NULL. In the second, increment the ref-count
** on *pStruct and return a copy of the pointer to it.
*/
if( nSeg<2 ) return 0;
for(i=0; i<pStruct->nLevel; i++){
int nThis = pStruct->aLevel[i].nSeg;
if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
fts5StructureRef(pStruct);
return pStruct;
}
assert( pStruct->aLevel[i].nMerge<=nThis );
}
nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
if( pNew ){
Fts5StructureLevel *pLvl;
nByte = nSeg * sizeof(Fts5StructureSegment);
fts5IndexFlush(p);
fts5CloseReader(p);
return fts5IndexReturn(p);
}
/*
** Discard any data stored in the in-memory hash tables. Do not write it
** to the database. Additionally, assume that the contents of the %_data
** table may have changed on disk. So any in-memory caches of %_data
** records must be invalidated.
*/
static int sqlite3Fts5IndexRollback(Fts5Index *p){
fts5CloseReader(p);
fts5IndexDiscardData(p);
fts5StructureInvalidate(p);
/* assert( p->rc==SQLITE_OK ); */
return SQLITE_OK;
}
/*
** The %_data table is completely empty when this function is called. This
** function populates it with the initial structure objects for each index,
** and the initial version of the "averages" record (a zero-byte blob).
*/
static int sqlite3Fts5IndexReinit(Fts5Index *p){
Fts5Structure s;
fts5StructureInvalidate(p);
memset(&s, 0, sizeof(Fts5Structure));
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
fts5StructureWrite(p, &s);
return fts5IndexReturn(p);
}
/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying %_data table.
**
** If successful, set *pp to point to the new object and return SQLITE_OK.
** Otherwise, set *pp to NULL and return an SQLite error code.
*/
static int sqlite3Fts5IndexOpen(
Fts5Config *pConfig,
int bCreate,
Fts5Index **pp,
char **pzErr
){
int rc = SQLITE_OK;
Fts5Index *p; /* New object */
*pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
if( rc==SQLITE_OK ){
p->pConfig = pConfig;
p->nWorkUnit = FTS5_WORK_UNIT;
p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
if( p->zDataTbl && bCreate ){
rc = sqlite3Fts5CreateTable(
pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5CreateTable(pConfig, "idx",
"segid, term, pgno, PRIMARY KEY(segid, term)",
1, pzErr
);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5IndexReinit(p);
}
}
}
assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
if( rc ){
sqlite3Fts5IndexClose(p);
*pp = 0;
}
return rc;
}
/*
** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
*/
static int sqlite3Fts5IndexClose(Fts5Index *p){
int rc = SQLITE_OK;
if( p ){
assert( p->pReader==0 );
fts5StructureInvalidate(p);
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
sqlite3_finalize(p->pDataVersion);
sqlite3Fts5HashFree(p->pHash);
sqlite3_free(p->zDataTbl);
sqlite3_free(p);
}
return rc;
}
/*
** Argument p points to a buffer containing utf-8 text that is n bytes in
** size. Return the number of bytes in the nChar character prefix of the
** buffer, or 0 if there are less than nChar characters in total.
*/
static int sqlite3Fts5IndexCharlenToBytelen(
const char *p,
int nByte,
int nChar
){
int n = 0;
int i;
for(i=0; i<nChar; i++){
if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
if( (unsigned char)p[n++]>=0xc0 ){
while( (p[n] & 0xc0)==0x80 ){
n++;
if( n>=nByte ) break;
}
}
}
return n;
int rc; /* Return code */
Fts5Config *pConfig = p->pConfig; /* Configuration object */
u8 aCookie[4]; /* Binary representation of iNew */
sqlite3_blob *pBlob = 0;
assert( p->rc==SQLITE_OK );
sqlite3Fts5Put32(aCookie, iNew);
rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
"block", FTS5_STRUCTURE_ROWID, 1, &pBlob
);
if( rc==SQLITE_OK ){
sqlite3_blob_write(pBlob, aCookie, 4, 0);
rc = sqlite3_blob_close(pBlob);
}
return rc;
}
static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
Fts5Structure *pStruct;
pStruct = fts5StructureRead(p);
fts5StructureRelease(pStruct);
return fts5IndexReturn(p);
}
/*************************************************************************
**************************************************************************
** Below this point is the implementation of the integrity-check
** functionality.
*/
/*
** Return a simple checksum value based on the arguments.
*/
static u64 sqlite3Fts5IndexEntryCksum(
i64 iRowid,
int iCol,
int iPos,
int iIdx,
const char *pTerm,
int nTerm
){
int i;
u64 ret = iRowid;
ret += (ret<<3) + iCol;
ret += (ret<<3) + iPos;
if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
return ret;
}
#ifdef SQLITE_DEBUG
/*
** This function is purely an internal test. It does not contribute to
** FTS functionality, or even the integrity-check, in any way.
**
** Instead, it tests that the same set of pgno/rowid combinations are
** visited regardless of whether the doclist-index identified by parameters
** iSegid/iLeaf is iterated in forwards or reverse order.
*/
static void fts5TestDlidxReverse(
Fts5Index *p,
int iSegid, /* Segment id to load from */
int iLeaf /* Load doclist-index for this leaf */
){
Fts5DlidxIter *pDlidx = 0;
u64 cksum1 = 13;
u64 cksum2 = 13;
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(p, pDlidx)
){
i64 iRowid = fts5DlidxIterRowid(pDlidx);
int pgno = fts5DlidxIterPgno(pDlidx);
assert( pgno>iLeaf );
cksum1 += iRowid + ((i64)pgno<<32);
}
fts5DlidxIterFree(pDlidx);
pDlidx = 0;
for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterPrev(p, pDlidx)
){
i64 iRowid = fts5DlidxIterRowid(pDlidx);
int pgno = fts5DlidxIterPgno(pDlidx);
assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
cksum2 += iRowid + ((i64)pgno<<32);
}
fts5DlidxIterFree(pDlidx);
pDlidx = 0;
if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
}
static int fts5QueryCksum(
Fts5Index *p, /* Fts5 index object */
int iIdx,
const char *z, /* Index key to query for */
int n, /* Size of index key in bytes */
int flags, /* Flags for Fts5IndexQuery */
u64 *pCksum /* IN/OUT: Checksum value */
){
int eDetail = p->pConfig->eDetail;
u64 cksum = *pCksum;
Fts5IndexIter *pIter = 0;
int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
i64 rowid = pIter->iRowid;
if( eDetail==FTS5_DETAIL_NONE ){
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
}else{
Fts5PoslistReader sReader;
for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
sReader.bEof==0;
sqlite3Fts5PoslistReaderNext(&sReader)
){
int iCol = FTS5_POS2COLUMN(sReader.iPos);
int iOff = FTS5_POS2OFFSET(sReader.iPos);
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
}
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5IterNext(pIter);
}
}
sqlite3Fts5IterClose(pIter);
*pCksum = cksum;
return rc;
}
/*
** This function is also purely an internal test. It does not contribute to
** FTS functionality, or even the integrity-check, in any way.
*/
static void fts5TestTerm(
Fts5Index *p,
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
/* If this is a prefix query, check that the results returned if the
** the index is disabled are the same. In both ASC and DESC order.
**
** This check may only be performed if the hash table is empty. This
** is because the hash table only supports a single scan query at
** a time, and the multi-iter loop from which this function is called
** is already performing such a scan. */
if( p->nPendingData==0 ){
if( iIdx>0 && rc==SQLITE_OK ){
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
ck2 = 0;
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
}
if( iIdx>0 && rc==SQLITE_OK ){
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
ck2 = 0;
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
}
}
cksum3 ^= ck1;
fts5BufferSet(&rc, pPrev, n, (const u8*)z);
if( rc==SQLITE_OK && cksum3!=expected ){
rc = FTS5_CORRUPT;
}
*pCksum = cksum3;
}
p->rc = rc;
}
#else
# define fts5TestDlidxReverse(x,y,z)
# define fts5TestTerm(u,v,w,x,y,z)
#endif
/*
** Check that:
**
** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
** contain zero terms.
** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
** contain zero rowids.
*/
static void fts5IndexIntegrityCheckEmpty(
Fts5Index *p,
Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
int iFirst,
int iNoRowid,
int iLast
){
int i;
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
if( pLeaf ){
if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
}
}
static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
int iTermOff = 0;
int ii;
Fts5Buffer buf1 = {0,0,0};
Fts5Buffer buf2 = {0,0,0};
ii = pLeaf->szLeaf;
while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
int res;
int iOff;
int nIncr;
ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
iTermOff += nIncr;
iOff = iTermOff;
if( iOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else if( iTermOff==nIncr ){
int nByte;
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
if( (iOff+nByte)>pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
}
}else{
int nKeep, nByte;
iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
buf1.n = nKeep;
fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
}
if( p->rc==SQLITE_OK ){
res = fts5BufferCompare(&buf1, &buf2);
if( res<=0 ) p->rc = FTS5_CORRUPT;
}
}
fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
}
fts5BufferFree(&buf1);
fts5BufferFree(&buf2);
}
static void fts5IndexIntegrityCheckSegment(
Fts5Index *p, /* FTS5 backend object */
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
){
Fts5Config *pConfig = p->pConfig;
sqlite3_stmt *pStmt = 0;
int rc2;
int iIdxPrevLeaf = pSeg->pgnoFirst-1;
int iDlidxPrevLeaf = pSeg->pgnoLast;
if( pSeg->pgnoFirst==0 ) return;
fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
"SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
pConfig->zDb, pConfig->zName, pSeg->iSegid
));
/* Iterate through the b-tree hierarchy. */
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iRow; /* Rowid for this leaf */
Fts5Data *pLeaf; /* Data for this leaf */
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
int iIdxLeaf = sqlite3_column_int(pStmt, 2);
int bIdxDlidx = sqlite3_column_int(pStmt, 3);
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
pLeaf = fts5LeafRead(p, iRow);
if( pLeaf==0 ) break;
/* Check that the leaf contains at least one term, and that it is equal
** to or larger than the split-key in zIdxTerm. Also check that if there
** is also a rowid pointer within the leaf page header, it points to a
** location before the term. */
if( pLeaf->nn<=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
int iOff; /* Offset of first term on leaf */
int iRowidOff; /* Offset of first rowid on leaf */
int nTerm; /* Size of term on leaf in bytes */
int res; /* Comparison of term and split-key */
iOff = fts5LeafFirstTermOff(pLeaf);
iRowidOff = fts5LeafFirstRowidOff(pLeaf);
if( iRowidOff>=iOff ){
p->rc = FTS5_CORRUPT;
}else{
iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
if( res==0 ) res = nTerm - nIdxTerm;
if( res<0 ) p->rc = FTS5_CORRUPT;
}
fts5IntegrityCheckPgidx(p, pLeaf);
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
fts5IndexIntegrityCheckEmpty(
p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
);
if( p->rc ) break;
/* If there is a doclist-index, check that it looks right. */
if( bIdxDlidx ){
Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
int iPrevLeaf = iIdxLeaf;
int iSegid = pSeg->iSegid;
int iPg = 0;
i64 iKey;
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(p, pDlidx)
){
/* Check any rowid-less pages that occur before the current leaf. */
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
fts5DataRelease(pLeaf);
}
}
iPrevLeaf = fts5DlidxIterPgno(pDlidx);
/* Check that the leaf page indicated by the iterator really does
** contain the rowid suggested by the same. */
iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
i64 iRowid;
int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
ASSERT_SZLEAF_OK(pLeaf);
if( iRowidOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
}
}
iDlidxPrevLeaf = iPg;
fts5DlidxIterFree(pDlidx);
fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
}else{
iDlidxPrevLeaf = pSeg->pgnoLast;
/* TODO: Check there is no doclist index */
}
iIdxPrevLeaf = iIdxLeaf;
}
rc2 = sqlite3_finalize(pStmt);
if( p->rc==SQLITE_OK ) p->rc = rc2;
/* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
#if 0
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
p->rc = FTS5_CORRUPT;
}
#endif
}
/*
** Run internal checks to ensure that the FTS index (a) is internally
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
** checksum does not match. Return SQLITE_OK if all checks pass without
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
int eDetail = p->pConfig->eDetail;
u64 cksum2 = 0; /* Checksum based on contents of indexes */
Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
Fts5Iter *pIter; /* Used to iterate through entire index */
Fts5Structure *pStruct; /* Index structure */
#ifdef SQLITE_DEBUG
/* Used by extra internal tests only run if NDEBUG is not defined */
u64 cksum3 = 0; /* Checksum based on contents of indexes */
Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
#endif
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
/* Load the FTS index structure */
pStruct = fts5StructureRead(p);
/* Check that the internal nodes of each segment match the leaves */
if( pStruct ){
int iLvl, iSeg;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
fts5IndexIntegrityCheckSegment(p, pSeg);
}
}
}
/* The cksum argument passed to this function is a checksum calculated
** based on all expected entries in the FTS index (including prefix index
** Two versions of the same checksum are calculated. The first (stack
** variable cksum2) based on entries extracted from the full-text index
** while doing a linear scan of each individual index in turn.
**
** As each term visited by the linear scans, a separate query for the
** same term is performed. cksum3 is calculated based on the entries
** extracted by these queries.
*/
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
fts5MultiIterEof(p, pIter)==0;
fts5MultiIterNext(p, pIter, 0, 0)
){
int n; /* Size of term in bytes */
i64 iPos = 0; /* Position read from poslist */
int iOff = 0; /* Offset within poslist */
i64 iRowid = fts5MultiIterRowid(pIter);
char *z = (char*)fts5MultiIterTerm(pIter, &n);
/* If this is a new term, query for it. Update cksum3 with the results. */
fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
if( eDetail==FTS5_DETAIL_NONE ){
if( 0==fts5MultiIterIsEmpty(p, pIter) ){
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
}
}else{
poslist.n = 0;
fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
int iCol = FTS5_POS2COLUMN(iPos);
int iTokOff = FTS5_POS2OFFSET(iPos);
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
}
}
}
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
fts5MultiIterFree(pIter);
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
fts5StructureRelease(pStruct);
#ifdef SQLITE_DEBUG
fts5BufferFree(&term);
#endif
fts5BufferFree(&poslist);
return fts5IndexReturn(p);
}
/*************************************************************************
**************************************************************************
** Below this point is the implementation of the fts5_decode() scalar
** function only.
*/
/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
i64 iRowid, /* Rowid from %_data table */
int *piSegid, /* OUT: Segment id */
int *pbDlidx, /* OUT: Dlidx flag */
int *piHeight, /* OUT: Height */
int *piPgno /* OUT: Page number */
){
*piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
iRowid >>= FTS5_DATA_PAGE_B;
*piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
iRowid >>= FTS5_DATA_HEIGHT_B;
*pbDlidx = (int)(iRowid & 0x0001);
iRowid >>= FTS5_DATA_DLI_B;
*piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
}
static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
if( iSegid==0 ){
if( iKey==FTS5_AVERAGES_ROWID ){
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
}else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
}
}
else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
);
}
}
static void fts5DebugStructure(
int *pRc, /* IN/OUT: error code */
Fts5Buffer *pBuf,
Fts5Structure *p
){
int iLvl, iSeg; /* Iterate through levels, segments */
for(iLvl=0; iLvl<p->nLevel; iLvl++){
Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
" {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
);
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
);
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
}
}
/*
** This is part of the fts5_decode() debugging aid.
**
** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
** function appends a human-readable representation of the same object
** to the buffer passed as the second argument.
*/
static void fts5DecodeStructure(
int *pRc, /* IN/OUT: error code */
Fts5Buffer *pBuf,
const u8 *pBlob, int nBlob
){
int rc; /* Return code */
Fts5Structure *p = 0; /* Decoded structure object */
rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
if( rc!=SQLITE_OK ){
*pRc = rc;
return;
}
fts5DebugStructure(pRc, pBuf, p);
fts5StructureRelease(p);
}
/*
** This is part of the fts5_decode() debugging aid.
**
** Arguments pBlob/nBlob contain an "averages" record. This function
** appends a human-readable representation of record to the buffer passed
** as the second argument.
*/
static void fts5DecodeAverages(
int *pRc, /* IN/OUT: error code */
Fts5Buffer *pBuf,
const u8 *pBlob, int nBlob
){
int i = 0;
const char *zSpace = "";
while( i<nBlob ){
u64 iVal;
i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
zSpace = " ";
}
}
/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.
**
** The return value is the number of bytes read from the input buffer.
i64 iDelta;
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
iDocid += iDelta;
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
}
}
return iOff;
}
/*
** This function is part of the fts5_decode() debugging function. It is
** only ever used with detail=none tables.
**
** Buffer (pData/nData) contains a doclist in the format used by detail=none
** tables. This function appends a human-readable version of that list to
** buffer pBuf.
**
** If *pRc is other than SQLITE_OK when this function is called, it is a
** no-op. If an OOM or other error occurs within this function, *pRc is
** set to an SQLite error code before returning. The final state of buffer
** pBuf is undefined in this case.
*/
static void fts5DecodeRowidList(
int *pRc, /* IN/OUT: Error code */
Fts5Buffer *pBuf, /* Buffer to append text to */
const u8 *pData, int nData /* Data to decode list-of-rowids from */
){
int i = 0;
i64 iRowid = 0;
while( i<nData ){
const char *zApp = "";
u64 iVal;
i += sqlite3Fts5GetVarint(&pData[i], &iVal);
iRowid += iVal;
if( i<nData && pData[i]==0x00 ){
i++;
if( i<nData && pData[i]==0x00 ){
i++;
zApp = "+";
}else{
zApp = "*";
}
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
}
}
/*
** The implementation of user-defined scalar function fts5_decode().
*/
static void fts5DecodeFunction(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args (always 2) */
sqlite3_value **apVal /* Function arguments */
){
i64 iRowid; /* Rowid for record being decoded */
int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
const u8 *aBlob; int n; /* Record to decode */
u8 *a = 0;
Fts5Buffer s; /* Build up text to return here */
int rc = SQLITE_OK; /* Return code */
int nSpace = 0;
int eDetailNone = (sqlite3_user_data(pCtx)!=0);
assert( nArg==2 );
UNUSED_PARAM(nArg);
memset(&s, 0, sizeof(Fts5Buffer));
iRowid = sqlite3_value_int64(apVal[0]);
/* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
** buffer overreads even if the record is corrupt. */
n = sqlite3_value_bytes(apVal[1]);
aBlob = sqlite3_value_blob(apVal[1]);
nSpace = n + FTS5_DATA_ZERO_PADDING;
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
if( a==0 ) goto decode_out;
memcpy(a, aBlob, n);
fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
fts5DebugRowid(&rc, &s, iRowid);
if( bDlidx ){
Fts5Data dlidx;
Fts5DlidxLvl lvl;
dlidx.p = a;
dlidx.nn = n;
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
lvl.pData = &dlidx;
lvl.iLeafPgno = iPgno;
for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
sqlite3Fts5BufferAppendPrintf(&rc, &s,
" %d(%lld)", lvl.iLeafPgno, lvl.iRowid
);
}
}else if( iSegid==0 ){
if( iRowid==FTS5_AVERAGES_ROWID ){
fts5DecodeAverages(&rc, &s, a, n);
}else{
fts5DecodeStructure(&rc, &s, a, n);
}
}else if( eDetailNone ){
Fts5Buffer term; /* Current term read from page */
int szLeaf;
int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
int iTermOff;
int nKeep = 0;
int iOff;
memset(&term, 0, sizeof(Fts5Buffer));
/* Decode any entries that occur before the first term. */
if( szLeaf<n ){
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
}else{
iTermOff = szLeaf;
}
fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
iOff = iTermOff;
while( iOff<szLeaf ){
int nAppend;
/* Read the term data for the next term*/
iOff += fts5GetVarint32(&a[iOff], nAppend);
term.n = nKeep;
fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
sqlite3Fts5BufferAppendPrintf(
&rc, &s, " term=%.*s", term.n, (const char*)term.p
);
iOff += nAppend;
/* Figure out where the doclist for this term ends */
if( iPgidxOff<n ){
int nIncr;
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
iTermOff += nIncr;
}else{
iTermOff = szLeaf;
}
fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
iOff = iTermOff;
if( iOff<szLeaf ){
iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}
fts5BufferFree(&term);
}else{
Fts5Buffer term; /* Current term read from page */
int szLeaf; /* Offset of pgidx in a[] */
int iPgidxOff;
int iPgidxPrev = 0; /* Previous value read from pgidx */
int iTermOff = 0;
int iRowidOff = 0;
int iOff;
int nDoclist;
memset(&term, 0, sizeof(Fts5Buffer));
if( n<4 ){
sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
goto decode_out;
}else{
iRowidOff = fts5GetU16(&a[0]);
iPgidxOff = szLeaf = fts5GetU16(&a[2]);
if( iPgidxOff<n ){
fts5GetVarint32(&a[iPgidxOff], iTermOff);
}
}
/* Decode the position list tail at the start of the page */
if( iRowidOff!=0 ){
iOff = iRowidOff;
}else if( iTermOff!=0 ){
iOff = iTermOff;
}else{
iOff = szLeaf;
}
fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
/* Decode any more doclist data that appears on the page before the
** first term. */
nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
while( iPgidxOff<n ){
int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
int nByte; /* Bytes of data */
int iEnd;
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
iPgidxPrev += nByte;
iOff = iPgidxPrev;
if( iPgidxOff<n ){
fts5GetVarint32(&a[iPgidxOff], nByte);
iEnd = iPgidxPrev + nByte;
}else{
iEnd = szLeaf;
}
if( bFirst==0 ){
iOff += fts5GetVarint32(&a[iOff], nByte);
term.n = nByte;
}
iOff += fts5GetVarint32(&a[iOff], nByte);
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
iOff += nByte;
sqlite3Fts5BufferAppendPrintf(
&rc, &s, " term=%.*s", term.n, (const char*)term.p
);
iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
}
fts5BufferFree(&term);
}
decode_out:
sqlite3_free(a);
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_code(pCtx, rc);
}
fts5BufferFree(&s);
}
/*
** The implementation of user-defined scalar function fts5_rowid().
*/
static void fts5RowidFunction(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args (always 2) */
sqlite3_value **apVal /* Function arguments */
){
const char *zArg;
if( nArg==0 ){
sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
}else{
zArg = (const char*)sqlite3_value_text(apVal[0]);
if( 0==sqlite3_stricmp(zArg, "segment") ){
i64 iRowid;
int segid, pgno;
if( nArg!=3 ){
sqlite3_result_error(pCtx,
"should be: fts5_rowid('segment', segid, pgno))", -1
);
}else{
segid = sqlite3_value_int(apVal[1]);
pgno = sqlite3_value_int(apVal[2]);
iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
sqlite3_result_int64(pCtx, iRowid);
}
}else{
sqlite3_result_error(pCtx,
"first arg to fts5_rowid() must be 'segment'" , -1
);
}
}
}
/*
** This is called as part of registering the FTS5 module with database
** connection db. It registers several user-defined scalar functions useful
** with FTS5.
**
** If successful, SQLITE_OK is returned. If an error occurs, some other
** SQLite error code is returned instead.
*/
static int sqlite3Fts5IndexInit(sqlite3 *db){
int rc = sqlite3_create_function(
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
);
if( rc==SQLITE_OK ){
rc = sqlite3_create_function(
db, "fts5_decode_none", 2,
SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
);
}
if( rc==SQLITE_OK ){
rc = sqlite3_create_function(
db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
);
}
return rc;
}
static int sqlite3Fts5IndexReset(Fts5Index *p){
assert( p->pStruct==0 || p->iStructVersion!=0 );
if( fts5IndexDataVersion(p)!=p->iStructVersion ){
fts5StructureInvalidate(p);
}
return fts5IndexReturn(p);
}
/*
** 2014 Jun 09
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
( run in 0.942 second using v1.01-cache-2.11-cpan-97f6503c9c8 )