DBD-SQLite
view release on metacpan or search on metacpan
*/
static int kvstorageWrite(
const char *zClass,
const char *zKey,
const char *zData
){
FILE *fd;
char zXKey[KVSTORAGE_KEY_SZ];
kvstorageMakeKey(zClass, zKey, zXKey);
fd = fopen(zXKey, "wb");
if( fd ){
SQLITE_KV_TRACE(("KVVFS-WRITE %-15s (%d) %.50s%s\n", zXKey,
(int)strlen(zData), zData,
strlen(zData)>50 ? "..." : ""));
fputs(zData, fd);
fclose(fd);
return 0;
}else{
return 1;
}
}
/* Delete a key (with its corresponding data) from the key/value
** namespace given by zClass. If the key does not previously exist,
** this routine is a no-op.
*/
static int kvstorageDelete(const char *zClass, const char *zKey){
char zXKey[KVSTORAGE_KEY_SZ];
kvstorageMakeKey(zClass, zKey, zXKey);
unlink(zXKey);
SQLITE_KV_TRACE(("KVVFS-DELETE %-15s\n", zXKey));
return 0;
}
/* Read the value associated with a zKey from the key/value namespace given
** by zClass and put the text data associated with that key in the first
** nBuf bytes of zBuf[]. The value might be truncated if zBuf is not large
** enough to hold it all. The value put into zBuf must always be zero
** terminated, even if it gets truncated because nBuf is not large enough.
**
** Return the total number of bytes in the data, without truncation, and
** not counting the final zero terminator. Return -1 if the key does
** not exist or its key cannot be read.
**
** If nBuf<=0 then this routine simply returns the size of the data
** without actually reading it. Similarly, if nBuf==1 then it
** zero-terminates zBuf at zBuf[0] and returns the size of the data
** without reading it.
*/
static int kvstorageRead(
const char *zClass,
const char *zKey,
char *zBuf,
int nBuf
){
FILE *fd;
struct stat buf;
char zXKey[KVSTORAGE_KEY_SZ];
kvstorageMakeKey(zClass, zKey, zXKey);
if( access(zXKey, R_OK)!=0
|| stat(zXKey, &buf)!=0
|| !S_ISREG(buf.st_mode)
){
SQLITE_KV_TRACE(("KVVFS-READ %-15s (-1)\n", zXKey));
return -1;
}
if( nBuf<=0 ){
return (int)buf.st_size;
}else if( nBuf==1 ){
zBuf[0] = 0;
SQLITE_KV_TRACE(("KVVFS-READ %-15s (%d)\n", zXKey,
(int)buf.st_size));
return (int)buf.st_size;
}
if( nBuf > buf.st_size + 1 ){
nBuf = buf.st_size + 1;
}
fd = fopen(zXKey, "rb");
if( fd==0 ){
SQLITE_KV_TRACE(("KVVFS-READ %-15s (-1)\n", zXKey));
return -1;
}else{
sqlite3_int64 n = fread(zBuf, 1, nBuf-1, fd);
fclose(fd);
zBuf[n] = 0;
SQLITE_KV_TRACE(("KVVFS-READ %-15s (%lld) %.50s%s\n", zXKey,
n, zBuf, n>50 ? "..." : ""));
return (int)n;
}
}
/*
** An internal level of indirection which enables us to replace the
** kvvfs i/o methods with JavaScript implementations in WASM builds.
** Maintenance reminder: if this struct changes in any way, the JSON
** rendering of its structure must be updated in
** sqlite3-wasm.c:sqlite3__wasm_enum_json(). There are no binary
** compatibility concerns, so it does not need an iVersion
** member.
*/
typedef struct sqlite3_kvvfs_methods sqlite3_kvvfs_methods;
struct sqlite3_kvvfs_methods {
int (*xRead)(const char *zClass, const char *zKey, char *zBuf, int nBuf);
int (*xWrite)(const char *zClass, const char *zKey, const char *zData);
int (*xDelete)(const char *zClass, const char *zKey);
const int nKeySize;
};
/*
** This object holds the kvvfs I/O methods which may be swapped out
** for JavaScript-side implementations in WASM builds. In such builds
** it cannot be const, but in native builds it should be so that
** the compiler can hopefully optimize this level of indirection out.
** That said, kvvfs is intended primarily for use in WASM builds.
**
** This is not explicitly flagged as static because the amalgamation
** build will tag it with SQLITE_PRIVATE.
*/
#ifndef SQLITE_WASM
const
#endif
#define F2FS_IOCTL_MAGIC 0xf5
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32)
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
#endif /* __linux__ */
/*
** Different Unix systems declare open() in different ways. Same use
** open(const char*,int,mode_t). Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper
** which always has the same well-defined interface.
*/
static int posixOpen(const char *zFile, int flags, int mode){
return open(zFile, flags, mode);
}
/* Forward reference */
static int openDirectory(const char*, int*);
static int unixGetpagesize(void);
/*
** Many system calls are accessed through pointer-to-functions so that
** they may be overridden at runtime to facilitate fault injection during
** testing and sandboxing. The following array holds the names and pointers
** to all overrideable system calls.
*/
static struct unix_syscall {
const char *zName; /* Name of the system call */
sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
sqlite3_syscall_ptr pDefault; /* Default value */
} aSyscall[] = {
{ "open", (sqlite3_syscall_ptr)posixOpen, 0 },
#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
{ "close", (sqlite3_syscall_ptr)close, 0 },
#define osClose ((int(*)(int))aSyscall[1].pCurrent)
{ "access", (sqlite3_syscall_ptr)access, 0 },
#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent)
{ "getcwd", (sqlite3_syscall_ptr)getcwd, 0 },
#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
{ "stat", (sqlite3_syscall_ptr)stat, 0 },
#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
/*
** The DJGPP compiler environment looks mostly like Unix, but it
** lacks the fcntl() system call. So redefine fcntl() to be something
** that always succeeds. This means that locking does not occur under
** DJGPP. But it is DOS - what did you expect?
*/
#ifdef __DJGPP__
{ "fstat", 0, 0 },
#define osFstat(a,b,c) 0
#else
{ "fstat", (sqlite3_syscall_ptr)fstat, 0 },
#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
#endif
{ "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 },
#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
{ "fcntl", (sqlite3_syscall_ptr)fcntl, 0 },
#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent)
{ "read", (sqlite3_syscall_ptr)read, 0 },
#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pread", (sqlite3_syscall_ptr)pread, 0 },
#else
{ "pread", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
#if defined(USE_PREAD64)
{ "pread64", (sqlite3_syscall_ptr)pread64, 0 },
#else
{ "pread64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent)
{ "write", (sqlite3_syscall_ptr)write, 0 },
#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
#else
{ "pwrite", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\
aSyscall[12].pCurrent)
#if defined(USE_PREAD64)
{ "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },
#else
{ "pwrite64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\
aSyscall[13].pCurrent)
#if defined(HAVE_FCHMOD)
{ "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
#else
{ "fchmod", (sqlite3_syscall_ptr)0, 0 },
#define osFchmod(FID,MODE) 0
#endif
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
{ "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },
#else
{ "fallocate", (sqlite3_syscall_ptr)0, 0 },
#endif
}
}
for(i++; i<ArraySize(aSyscall); i++){
if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
}
return 0;
}
/*
** Do not accept any file descriptor less than this value, in order to avoid
** opening database file using file descriptors that are commonly used for
** standard input, output, and error.
*/
#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
#endif
/*
** Invoke open(). Do so multiple times, until it either succeeds or
** fails for some reason other than EINTR.
**
** If the file creation mode "m" is 0 then set it to the default for
** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
** 0644) as modified by the system umask. If m is not 0, then
** make the file creation mode be exactly m ignoring the umask.
**
** The m parameter will be non-zero only when creating -wal, -journal,
** and -shm files. We want those files to have *exactly* the same
** permissions as their original database, unadulterated by the umask.
** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
** transaction crashes and leaves behind hot journals, then any
** process that is able to write to the database will also be able to
** recover the hot journals.
*/
static int robust_open(const char *z, int f, mode_t m){
int fd;
mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
while(1){
#if defined(O_CLOEXEC)
fd = osOpen(z,f|O_CLOEXEC,m2);
#else
fd = osOpen(z,f,m2);
#endif
if( fd<0 ){
if( errno==EINTR ) continue;
break;
}
if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
if( (f & (O_EXCL|O_CREAT))==(O_EXCL|O_CREAT) ){
(void)osUnlink(z);
}
osClose(fd);
sqlite3_log(SQLITE_WARNING,
"attempt to open \"%s\" as file descriptor %d", z, fd);
fd = -1;
if( osOpen("/dev/null", O_RDONLY, m)<0 ) break;
}
if( fd>=0 ){
if( m!=0 ){
struct stat statbuf;
if( osFstat(fd, &statbuf)==0
&& statbuf.st_size==0
&& (statbuf.st_mode&0777)!=m
){
osFchmod(fd, m);
}
}
#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif
}
return fd;
}
/*
** Helper functions to obtain and relinquish the global mutex. The
** global mutex is used to protect the unixInodeInfo objects used by
** this file, all of which may be shared by multiple threads.
**
** Function unixMutexHeld() is used to assert() that the global mutex
** is held when required. This function is only used as part of assert()
** statements. e.g.
**
** unixEnterMutex()
** assert( unixMutexHeld() );
** unixEnterLeave()
**
** To prevent deadlock, the global unixBigLock must must be acquired
** before the unixInodeInfo.pLockMutex mutex, if both are held. It is
** OK to get the pLockMutex without holding unixBigLock first, but if
** that happens, the unixBigLock mutex must not be acquired until after
** pLockMutex is released.
**
** OK: enter(unixBigLock), enter(pLockInfo)
** OK: enter(unixBigLock)
** OK: enter(pLockInfo)
** ERROR: enter(pLockInfo), enter(unixBigLock)
*/
static sqlite3_mutex *unixBigLock = 0;
static void unixEnterMutex(void){
assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */
sqlite3_mutex_enter(unixBigLock);
}
static void unixLeaveMutex(void){
assert( sqlite3_mutex_held(unixBigLock) );
sqlite3_mutex_leave(unixBigLock);
}
#ifdef SQLITE_DEBUG
static int unixMutexHeld(void) {
return sqlite3_mutex_held(unixBigLock);
}
#endif
#ifdef SQLITE_HAVE_OS_TRACE
/*
** Helper function for printing out trace information from debugging
** binaries. This returns the string representation of the supplied
** integer lock-type.
*/
static const char *azFileLock(int eFileLock){
pNew->nRef = 1;
pNew->nName = n;
pNew->pNext = vxworksFileList;
vxworksFileList = pNew;
sqlite3_mutex_leave(vxworksMutex);
return pNew;
}
/*
** Decrement the reference count on a vxworksFileId object. Free
** the object when the reference count reaches zero.
*/
static void vxworksReleaseFileId(struct vxworksFileId *pId){
sqlite3_mutex_enter(vxworksMutex);
assert( pId->nRef>0 );
pId->nRef--;
if( pId->nRef==0 ){
struct vxworksFileId **pp;
for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
assert( *pp==pId );
*pp = pId->pNext;
sqlite3_free(pId);
}
sqlite3_mutex_leave(vxworksMutex);
}
#endif /* OS_VXWORKS */
/*************** End of Unique File ID Utility Used By VxWorks ****************
******************************************************************************/
/******************************************************************************
*************************** Posix Advisory Locking ****************************
**
** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the same process. It does not explicitly say so, but this implies
** that it overrides locks set by the same process using a different
** file descriptor. Consider this test case:
**
** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
**
** Suppose ./file1 and ./file2 are really the same file (because
** one is a hard or symbolic link to the other) then if you set
** an exclusive lock on fd1, then try to get an exclusive lock
** on fd2, it works. I would have expected the second lock to
** fail since there was already a lock on the file due to fd1.
** But not so. Since both locks came from the same process, the
** second overrides the first, even though they were on different
** file descriptors opened on different file names.
**
** This means that we cannot use POSIX locks to synchronize file access
** among competing threads of the same process. POSIX locks will work fine
** to synchronize access for threads in separate processes, but not
** threads within the same process.
**
** To work around the problem, SQLite has to manage file locks internally
** on its own. Whenever a new database is opened, we have to find the
** specific inode of the database file (the inode is determined by the
** st_dev and st_ino fields of the stat structure that fstat() fills in)
** and check for locks already existing on that inode. When locks are
** created or removed, we have to look at our own internal record of the
** locks to see if another thread has previously set a lock on that same
** inode.
**
** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
** For VxWorks, we have to use the alternative unique ID system based on
** canonical filename and implemented in the previous division.)
**
** The sqlite3_file structure for POSIX is no longer just an integer file
** descriptor. It is now a structure that holds the integer file
** descriptor and a pointer to a structure that describes the internal
** locks on the corresponding inode. There is one locking structure
** per inode, so if the same inode is opened twice, both unixFile structures
** point to the same locking structure. The locking structure keeps
** a reference count (so we will know when to delete it) and a "cnt"
** field that tells us its internal lock status. cnt==0 means the
** file is unlocked. cnt==-1 means the file has an exclusive lock.
** cnt>0 means there are cnt shared locks on the file.
**
** Any attempt to lock or unlock a file first checks the locking
** structure. The fcntl() system call is only invoked to set a
** POSIX lock if the internal lock structure transitions between
** a locked and an unlocked state.
**
** But wait: there are yet more problems with POSIX advisory locks.
**
** If you close a file descriptor that points to a file that has locks,
** all locks on that file that are owned by the current process are
** released. To work around this problem, each unixInodeInfo object
** maintains a count of the number of pending locks on the inode.
** When an attempt is made to close an unixFile, if there are
** other unixFile open on the same inode that are holding locks, the call
** to close() the file descriptor is deferred until all of the locks clear.
** The unixInodeInfo structure keeps a list of file descriptors that need to
** be closed and that list is walked (and cleared) when the last lock
** clears.
**
** Yet another problem: LinuxThreads do not play well with posix locks.
**
** Many older versions of linux use the LinuxThreads library which is
** not posix compliant. Under LinuxThreads, a lock created by thread
** A cannot be modified or overridden by a different thread B.
** Only thread A can modify the lock. Locking behavior is correct
** if the application uses the newer Native Posix Thread Library (NPTL)
** on linux - with NPTL a lock created by thread A can override locks
** in thread B. But there is no way to know at compile-time which
** threading library is being used. So there is no way to know at
** compile-time whether or not thread A can override locks on thread B.
** One has to do a run-time check to discover the behavior of the
** current process.
**
** SQLite used to support LinuxThreads. But support for LinuxThreads
** was dropped beginning with version 3.7.0. SQLite will still work with
** LinuxThreads provided that (1) there is no more than one connection
** per database file in the same process and (2) database connections
** do not move across threads.
*/
/*
/*
** Release a unixInodeInfo structure previously allocated by findInodeInfo().
**
** The global mutex must be held when this routine is called, but the mutex
** on the inode being deleted must NOT be held.
*/
static void releaseInodeInfo(unixFile *pFile){
unixInodeInfo *pInode = pFile->pInode;
assert( unixMutexHeld() );
assert( unixFileMutexNotheld(pFile) );
if( ALWAYS(pInode) ){
pInode->nRef--;
if( pInode->nRef==0 ){
assert( pInode->pShmNode==0 );
sqlite3_mutex_enter(pInode->pLockMutex);
closePendingFds(pFile);
sqlite3_mutex_leave(pInode->pLockMutex);
if( pInode->pPrev ){
assert( pInode->pPrev->pNext==pInode );
pInode->pPrev->pNext = pInode->pNext;
}else{
assert( inodeList==pInode );
inodeList = pInode->pNext;
}
if( pInode->pNext ){
assert( pInode->pNext->pPrev==pInode );
pInode->pNext->pPrev = pInode->pPrev;
}
sqlite3_mutex_free(pInode->pLockMutex);
sqlite3_free(pInode);
}
}
}
/*
** Given a file descriptor, locate the unixInodeInfo object that
** describes that file descriptor. Create a new one if necessary. The
** return value might be uninitialized if an error occurs.
**
** The global mutex must held when calling this routine.
**
** Return an appropriate error code.
*/
static int findInodeInfo(
unixFile *pFile, /* Unix file with file desc used in the key */
unixInodeInfo **ppInode /* Return the unixInodeInfo object here */
){
int rc; /* System call return code */
int fd; /* The file descriptor for pFile */
struct unixFileId fileId; /* Lookup key for the unixInodeInfo */
struct stat statbuf; /* Low-level file information */
unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */
assert( unixMutexHeld() );
/* Get low-level information about the file that we can used to
** create a unique name for the file.
*/
fd = pFile->h;
rc = osFstat(fd, &statbuf);
if( rc!=0 ){
storeLastErrno(pFile, errno);
#if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS)
if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
#endif
return SQLITE_IOERR;
}
#ifdef __APPLE__
/* On OS X on an msdos filesystem, the inode number is reported
** incorrectly for zero-size files. See ticket #3260. To work
** around this problem (we consider it a bug in OS X, not SQLite)
** we always increase the file size to 1 by writing a single byte
** prior to accessing the inode number. The one byte written is
** an ASCII 'S' character which also happens to be the first byte
** in the header of every SQLite database. In this way, if there
** is a race condition such that another thread has already populated
** the first page of the database, no damage is done.
*/
if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );
if( rc!=1 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR;
}
if( fsync(fd) ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR_FSYNC;
}
rc = osFstat(fd, &statbuf);
if( rc!=0 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR;
}
}
#endif
memset(&fileId, 0, sizeof(fileId));
fileId.dev = statbuf.st_dev;
#if OS_VXWORKS
fileId.pId = pFile->pId;
#else
fileId.ino = (u64)statbuf.st_ino;
#endif
assert( unixMutexHeld() );
pInode = inodeList;
while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
pInode = pInode->pNext;
}
if( pInode==0 ){
pInode = sqlite3_malloc64( sizeof(*pInode) );
if( pInode==0 ){
return SQLITE_NOMEM_BKPT;
}
memset(pInode, 0, sizeof(*pInode));
memcpy(&pInode->fileId, &fileId, sizeof(fileId));
if( sqlite3GlobalConfig.bCoreMutex ){
pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
if( pInode->pLockMutex==0 ){
sqlite3_free(pInode);
return SQLITE_NOMEM_BKPT;
}
}
pInode->nRef = 1;
assert( unixMutexHeld() );
pInode->pNext = inodeList;
pInode->pPrev = 0;
if( inodeList ) inodeList->pPrev = pInode;
inodeList = pInode;
}else{
pInode->nRef++;
}
*ppInode = pInode;
return SQLITE_OK;
}
/*
** Return TRUE if pFile has been renamed or unlinked since it was first opened.
*/
static int fileHasMoved(unixFile *pFile){
#if OS_VXWORKS
return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;
#else
struct stat buf;
return pFile->pInode!=0 &&
(osStat(pFile->zPath, &buf)!=0
|| (u64)buf.st_ino!=pFile->pInode->fileId.ino);
#endif
}
/*
** Check a unixFile that is a database. Verify the following:
**
** (1) There is exactly one hard link on the file
** (2) The file is not a symbolic link
** (3) The file has not been renamed or unlinked
**
** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
*/
static void verifyDbFile(unixFile *pFile){
struct stat buf;
int rc;
/* These verifications occurs for the main database only */
if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return;
rc = osFstat(pFile->h, &buf);
if( rc!=0 ){
sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
return;
}
if( buf.st_nlink==0 ){
sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
return;
}
if( buf.st_nlink>1 ){
sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
return;
}
if( fileHasMoved(pFile) ){
sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
return;
}
}
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
assert( pFile->eFileLock<=SHARED_LOCK );
sqlite3_mutex_enter(pFile->pInode->pLockMutex);
/* Check if a thread in this process holds such a lock */
if( pFile->pInode->eFileLock>SHARED_LOCK ){
reserved = 1;
}
/* Otherwise see if some other process holds it.
*/
#ifndef __DJGPP__
if( !reserved && !pFile->pInode->bProcessLock ){
struct flock lock;
lock.l_whence = SEEK_SET;
lock.l_start = RESERVED_BYTE;
lock.l_len = 1;
lock.l_type = F_WRLCK;
if( osFcntl(pFile->h, F_GETLK, &lock) ){
rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
storeLastErrno(pFile, errno);
} else if( lock.l_type!=F_UNLCK ){
reserved = 1;
}
}
#endif
sqlite3_mutex_leave(pFile->pInode->pLockMutex);
*/
#ifdef F_FULLFSYNC
# define HAVE_FULLFSYNC 1
#else
# define HAVE_FULLFSYNC 0
#endif
/*
** The fsync() system call does not work as advertised on many
** unix systems. The following procedure is an attempt to make
** it work better.
**
** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
** for testing when we want to run through the test suite quickly.
** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
** or power failure will likely corrupt the database file.
**
** SQLite sets the dataOnly flag if the size of the file is unchanged.
** The idea behind dataOnly is that it should only write the file content
** to disk, not the inode. We only set dataOnly if the file size is
** unchanged since the file size is part of the inode. However,
** Ted Ts'o tells us that fdatasync() will also write the inode if the
** file size has changed. The only real difference between fdatasync()
** and fsync(), Ted tells us, is that fdatasync() will not flush the
** inode if the mtime or owner or other inode attributes have changed.
** We only care about the file size, not the other file attributes, so
** as far as SQLite is concerned, an fdatasync() is always adequate.
** So, we always use fdatasync() if it is available, regardless of
** the value of the dataOnly flag.
*/
static int full_fsync(int fd, int fullSync, int dataOnly){
int rc;
/* The following "ifdef/elif/else/" block has the same structure as
** the one below. It is replicated here solely to avoid cluttering
** up the real code with the UNUSED_PARAMETER() macros.
*/
#ifdef SQLITE_NO_SYNC
UNUSED_PARAMETER(fd);
UNUSED_PARAMETER(fullSync);
UNUSED_PARAMETER(dataOnly);
#elif HAVE_FULLFSYNC
UNUSED_PARAMETER(dataOnly);
#else
UNUSED_PARAMETER(fullSync);
UNUSED_PARAMETER(dataOnly);
#endif
/* Record the number of times that we do a normal fsync() and
** FULLSYNC. This is used during testing to verify that this procedure
** gets called with the correct arguments.
*/
#ifdef SQLITE_TEST
if( fullSync ) sqlite3_fullsync_count++;
sqlite3_sync_count++;
#endif
/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
** no-op. But go ahead and call fstat() to validate the file
** descriptor as we need a method to provoke a failure during
** coverage testing.
*/
#ifdef SQLITE_NO_SYNC
{
struct stat buf;
rc = osFstat(fd, &buf);
}
#elif HAVE_FULLFSYNC
if( fullSync ){
rc = osFcntl(fd, F_FULLFSYNC, 0);
}else{
rc = 1;
}
/* If the FULLFSYNC failed, fall back to attempting an fsync().
** It shouldn't be possible for fullfsync to fail on the local
** file system (on OSX), so failure indicates that FULLFSYNC
** isn't supported for this file system. So, attempt an fsync
** and (for now) ignore the overhead of a superfluous fcntl call.
** It'd be better to detect fullfsync support once and avoid
** the fcntl call every time sync is called.
*/
if( rc ) rc = fsync(fd);
#elif defined(__APPLE__)
/* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly
** so currently we default to the macro that redefines fdatasync to fsync
*/
rc = fsync(fd);
#else
rc = fdatasync(fd);
#if OS_VXWORKS
if( rc==-1 && errno==ENOTSUP ){
rc = fsync(fd);
}
#endif /* OS_VXWORKS */
#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
if( OS_VXWORKS && rc!= -1 ){
rc = 0;
}
return rc;
}
/*
** Open a file descriptor to the directory containing file zFilename.
** If successful, *pFd is set to the opened file descriptor and
** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
** value.
**
** The directory file descriptor is used for only one thing - to
** fsync() a directory to make sure file creation and deletion events
** are flushed to disk. Such fsyncs are not needed on newer
** journaling filesystems, but are required on older filesystems.
**
** This routine can be overridden using the xSetSysCall interface.
** The ability to override this routine was added in support of the
** chromium sandbox. Opening a directory is a security risk (we are
** told) so making it overrideable allows the chromium sandbox to
** replace this routine with a harmless no-op. To make this routine
** a no-op, replace it with a stub that returns SQLITE_OK but leaves
** *pFd set to a negative number.
**
** If SQLITE_OK is returned, the caller is responsible for closing
** the file descriptor *pFd using close().
*/
return rc;
}
/*
** Truncate an open file to a specified size
*/
static int unixTruncate(sqlite3_file *id, i64 nByte){
unixFile *pFile = (unixFile *)id;
int rc;
assert( pFile );
SimulateIOError( return SQLITE_IOERR_TRUNCATE );
/* If the user has configured a chunk-size for this file, truncate the
** file so that it consists of an integer number of chunks (i.e. the
** actual file size after the operation may be larger than the requested
** size).
*/
if( pFile->szChunk>0 ){
nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
}
rc = robust_ftruncate(pFile->h, nByte);
if( rc ){
storeLastErrno(pFile, errno);
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
}else{
#ifdef SQLITE_DEBUG
/* If we are doing a normal write to a database file (as opposed to
** doing a hot-journal rollback or a write to some file other than a
** normal database file) and we truncate the file to zero length,
** that effectively updates the change counter. This might happen
** when restoring a database using the backup API from a zero-length
** source.
*/
if( pFile->inNormalWrite && nByte==0 ){
pFile->transCntrChng = 1;
}
#endif
#if SQLITE_MAX_MMAP_SIZE>0
/* If the file was just truncated to a size smaller than the currently
** mapped region, reduce the effective mapping size as well. SQLite will
** use read() and write() to access data beyond this point from now on.
*/
if( nByte<pFile->mmapSize ){
pFile->mmapSize = nByte;
}
#endif
return SQLITE_OK;
}
}
/*
** Determine the current size of a file in bytes
*/
static int unixFileSize(sqlite3_file *id, i64 *pSize){
int rc;
struct stat buf;
assert( id );
rc = osFstat(((unixFile*)id)->h, &buf);
SimulateIOError( rc=1 );
if( rc!=0 ){
storeLastErrno((unixFile*)id, errno);
return SQLITE_IOERR_FSTAT;
}
*pSize = buf.st_size;
/* When opening a zero-size database, the findInodeInfo() procedure
** writes a single byte into that file in order to work around a bug
** in the OS-X msdos filesystem. In order to avoid problems with upper
** layers, we need to report this file size as zero even though it is
** really 1. Ticket #3260.
*/
if( *pSize==1 ) *pSize = 0;
return SQLITE_OK;
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
/*
** Handler for proxy-locking file-control verbs. Defined below in the
** proxying locking division.
*/
static int proxyFileControl(sqlite3_file*,int,void*);
#endif
/*
** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
** file-control operation. Enlarge the database to nBytes in size
** (rounded up to the next chunk-size). If the database is already
** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
if( pFile->szChunk>0 ){
i64 nSize; /* Required file size */
struct stat buf; /* Used to hold return values of fstat() */
if( osFstat(pFile->h, &buf) ){
return SQLITE_IOERR_FSTAT;
}
nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
if( nSize>(i64)buf.st_size ){
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
/* The code below is handling the return value of osFallocate()
** correctly. posix_fallocate() is defined to "returns zero on success,
** or an error number on failure". See the manpage for details. */
int err;
do{
err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);
}while( err==EINTR );
if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE;
#else
/* If the OS does not have posix_fallocate(), fake it. Write a
** single byte to the last byte in each block that falls entirely
** within the extended region. Then, if required, a single byte
** at offset (nSize-1), to set the size of the file correctly.
** This is a similar technique to that used by glibc on systems
** that do not have a real fallocate() call.
*/
int nBlk = buf.st_blksize; /* File-system block size */
int nWrite = 0; /* Number of bytes written by seekAndWrite */
i64 iWrite; /* Next offset to write to */
iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1;
assert( iWrite>=buf.st_size );
assert( ((iWrite+1)%nBlk)==0 );
for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){
if( iWrite>=nSize ) iWrite = nSize - 1;
nWrite = seekAndWrite(pFile, iWrite, "", 1);
if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
}
#endif
}
}
#if SQLITE_MAX_MMAP_SIZE>0
if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
int rc;
if( pFile->szChunk<=0 ){
if( robust_ftruncate(pFile->h, nByte) ){
storeLastErrno(pFile, errno);
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
}
}
rc = unixMapfile(pFile, nByte);
return rc;
}
#endif
return SQLITE_OK;
}
/*
** If *pArg is initially negative then this is a query. Set *pArg to
** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
return rc;
}
/*
** Open a shared-memory area associated with open database file pDbFd.
** This particular implementation uses mmapped files.
**
** The file used to implement shared-memory is in the same directory
** as the open database file and has the same name as the open database
** file with the "-shm" suffix added. For example, if the database file
** is "/home/user1/config.db" then the file that is created and mmapped
** for shared memory will be called "/home/user1/config.db-shm".
**
** Another approach to is to use files in /dev/shm or /dev/tmp or an
** some other tmpfs mount. But if a file in a different directory
** from the database file is used, then differing access permissions
** or a chroot() might cause two different processes on the same
** database to end up using different files for shared memory -
** meaning that their memory would not really be shared - resulting
** in database corruption. Nevertheless, this tmpfs file usage
** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"
** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time
** option results in an incompatible build of SQLite; builds of SQLite
** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the
** same database file at the same time, database corruption will likely
** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
** "unsupported" and may go away in a future SQLite release.
**
** When opening a new shared-memory file, if no other instances of that
** file are currently open, in this process or in other processes, then
** the file must be truncated to zero length or have its header cleared.
**
** If the original database file (pDbFd) is using the "unix-excl" VFS
** that means that an exclusive lock is held on the database file and
** that no other processes are able to read or write the database. In
** that case, we do not really need shared memory. No shared memory
** file is created. The shared memory will be simulated with heap memory.
*/
static int unixOpenSharedMemory(unixFile *pDbFd){
struct unixShm *p = 0; /* The connection to be opened */
struct unixShmNode *pShmNode; /* The underlying mmapped file */
int rc = SQLITE_OK; /* Result code */
unixInodeInfo *pInode; /* The inode of fd */
char *zShm; /* Name of the file used for SHM */
int nShmFilename; /* Size of the SHM filename in bytes */
/* Allocate space for the new unixShm object. */
p = sqlite3_malloc64( sizeof(*p) );
if( p==0 ) return SQLITE_NOMEM_BKPT;
memset(p, 0, sizeof(*p));
assert( pDbFd->pShm==0 );
/* Check to see if a unixShmNode object already exists. Reuse an existing
** one if present. Create a new one if necessary.
*/
assert( unixFileMutexNotheld(pDbFd) );
unixEnterMutex();
pInode = pDbFd->pInode;
pShmNode = pInode->pShmNode;
if( pShmNode==0 ){
struct stat sStat; /* fstat() info for database file */
#ifndef SQLITE_SHM_DIRECTORY
const char *zBasePath = pDbFd->zPath;
#endif
/* Call fstat() to figure out the permissions on the database file. If
** a new *-shm file is created, an attempt will be made to create it
** with the same permissions.
*/
if( osFstat(pDbFd->h, &sStat) ){
rc = SQLITE_IOERR_FSTAT;
goto shm_open_err;
}
#ifdef SQLITE_SHM_DIRECTORY
nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
#else
nShmFilename = 6 + (int)strlen(zBasePath);
#endif
pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename );
if( pShmNode==0 ){
rc = SQLITE_NOMEM_BKPT;
goto shm_open_err;
}
memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
zShm = pShmNode->zFilename = (char*)&pShmNode[1];
#ifdef SQLITE_SHM_DIRECTORY
sqlite3_snprintf(nShmFilename, zShm,
SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
(u32)sStat.st_ino, (u32)sStat.st_dev);
#else
sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath);
sqlite3FileSuffix3(pDbFd->zPath, zShm);
#endif
pShmNode->hShm = -1;
pDbFd->pInode->pShmNode = pShmNode;
pShmNode->pInode = pDbFd->pInode;
if( sqlite3GlobalConfig.bCoreMutex ){
pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
if( pShmNode->pShmMutex==0 ){
rc = SQLITE_NOMEM_BKPT;
goto shm_open_err;
}
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
{
int ii;
for(ii=0; ii<SQLITE_SHM_NLOCK; ii++){
pShmNode->aMutex[ii] = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
if( pShmNode->aMutex[ii]==0 ){
rc = SQLITE_NOMEM_BKPT;
goto shm_open_err;
}
}
}
#endif
}
if( pInode->bProcessLock==0 ){
if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW,
(sStat.st_mode&0777));
}
if( pShmNode->hShm<0 ){
pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW,
(sStat.st_mode&0777));
if( pShmNode->hShm<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm);
goto shm_open_err;
}
pShmNode->isReadonly = 1;
}
/*
** This function is called to obtain a pointer to region iRegion of the
** shared-memory associated with the database file fd. Shared-memory regions
** are numbered starting from zero. Each shared-memory region is szRegion
** bytes in size.
**
** If an error occurs, an error code is returned and *pp is set to NULL.
**
** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
** region has not been allocated (by any client, including one running in a
** separate process), then *pp is set to NULL and SQLITE_OK returned. If
** bExtend is non-zero and the requested shared-memory region has not yet
** been allocated, it is allocated by this function.
**
** If the shared-memory region has already been allocated or is allocated by
** this call as described above, then it is mapped into this processes
** address space (if it is not already), *pp is set to point to the mapped
** memory and SQLITE_OK returned.
*/
static int unixShmMap(
sqlite3_file *fd, /* Handle open on database file */
int iRegion, /* Region to retrieve */
int szRegion, /* Size of regions */
int bExtend, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){
unixFile *pDbFd = (unixFile*)fd;
unixShm *p;
unixShmNode *pShmNode;
int rc = SQLITE_OK;
int nShmPerMap = unixShmRegionPerMap();
int nReqRegion;
/* If the shared-memory file has not yet been opened, open it now. */
if( pDbFd->pShm==0 ){
rc = unixOpenSharedMemory(pDbFd);
if( rc!=SQLITE_OK ) return rc;
}
p = pDbFd->pShm;
pShmNode = p->pShmNode;
sqlite3_mutex_enter(pShmNode->pShmMutex);
if( pShmNode->isUnlocked ){
rc = unixLockSharedMemory(pDbFd, pShmNode);
if( rc!=SQLITE_OK ) goto shmpage_out;
pShmNode->isUnlocked = 0;
}
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
assert( pShmNode->pInode==pDbFd->pInode );
assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 );
assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 );
/* Minimum number of regions required to be mapped. */
nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
if( pShmNode->nRegion<nReqRegion ){
char **apNew; /* New apRegion[] array */
int nByte = nReqRegion*szRegion; /* Minimum required file size */
struct stat sStat; /* Used by fstat() */
pShmNode->szRegion = szRegion;
if( pShmNode->hShm>=0 ){
/* The requested region is not mapped into this processes address space.
** Check to see if it has been allocated (i.e. if the wal-index file is
** large enough to contain the requested region).
*/
if( osFstat(pShmNode->hShm, &sStat) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}
if( sStat.st_size<nByte ){
/* The requested memory region does not exist. If bExtend is set to
** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
*/
if( !bExtend ){
goto shmpage_out;
}
/* Alternatively, if bExtend is true, extend the file. Do this by
** writing a single byte to the end of each (OS) page being
** allocated or extended. Technically, we need only write to the
** last page in order to extend the file. But writing to all new
** pages forces the OS to allocate them immediately, which reduces
** the chances of SIGBUS while accessing the mapped region later on.
*/
else{
static const int pgsz = 4096;
int iPg;
/* Write to the last byte of each newly allocated or extended page */
assert( (nByte % pgsz)==0 );
for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
int x = 0;
if( seekAndWriteFd(pShmNode->hShm, iPg*pgsz + pgsz-1,"",1,&x)!=1 ){
const char *zFile = pShmNode->zFilename;
rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
goto shmpage_out;
}
}
}
}
}
/* Map the requested memory region into this processes address space. */
apNew = (char **)sqlite3_realloc(
pShmNode->apRegion, nReqRegion*sizeof(char *)
);
if( !apNew ){
rc = SQLITE_IOERR_NOMEM_BKPT;
goto shmpage_out;
}
pShmNode->apRegion = apNew;
while( pShmNode->nRegion<nReqRegion ){
int nMap = szRegion*nShmPerMap;
int i;
void *pMem;
if( pShmNode->hShm>=0 ){
pMem = osMmap(0, nMap,
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion
);
if( pMem==MAP_FAILED ){
rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
goto shmpage_out;
}
}else{
#else
pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
if( pNew!=MAP_FAILED ){
if( pNew!=pReq ){
osMunmap(pNew, nNew - nReuse);
pNew = 0;
}else{
pNew = pOrig;
}
}
#endif
/* The attempt to extend the existing mapping failed. Free it. */
if( pNew==MAP_FAILED || pNew==0 ){
osMunmap(pOrig, nReuse);
}
}
/* If pNew is still NULL, try to create an entirely new mapping. */
if( pNew==0 ){
pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
}
if( pNew==MAP_FAILED ){
pNew = 0;
nNew = 0;
unixLogError(SQLITE_OK, zErr, pFd->zPath);
/* If the mmap() above failed, assume that all subsequent mmap() calls
** will probably fail too. Fall back to using xRead/xWrite exclusively
** in this case. */
pFd->mmapSizeMax = 0;
}
pFd->pMapRegion = (void *)pNew;
pFd->mmapSize = pFd->mmapSizeActual = nNew;
}
/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if
** there already exists a mapping for this file, and there are still
** outstanding xFetch() references to it, this function is a no-op.
**
** If parameter nByte is non-negative, then it is the requested size of
** the mapping to create. Otherwise, if nByte is less than zero, then the
** requested size is the size of the file on disk. The actual size of the
** created mapping is either the requested size or the value configured
** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
**
** SQLITE_OK is returned if no error occurs (even if the mapping is not
** recreated as a result of outstanding references) or an SQLite error
** code otherwise.
*/
static int unixMapfile(unixFile *pFd, i64 nMap){
assert( nMap>=0 || pFd->nFetchOut==0 );
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
if( pFd->nFetchOut>0 ) return SQLITE_OK;
if( nMap<0 ){
struct stat statbuf; /* Low-level file information */
if( osFstat(pFd->h, &statbuf) ){
return SQLITE_IOERR_FSTAT;
}
nMap = statbuf.st_size;
}
if( nMap>pFd->mmapSizeMax ){
nMap = pFd->mmapSizeMax;
}
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
if( nMap!=pFd->mmapSize ){
unixRemapfile(pFd, nMap);
}
return SQLITE_OK;
}
#endif /* SQLITE_MAX_MMAP_SIZE>0 */
/*
** If possible, return a pointer to a mapping of file fd starting at offset
** iOff. The mapping must be valid for at least nAmt bytes.
**
** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
** Finally, if an error does occur, return an SQLite error code. The final
** value of *pp is undefined in this case.
**
** If this function does return a pointer, the caller must eventually
** release the reference by calling unixUnfetch().
*/
static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
#if SQLITE_MAX_MMAP_SIZE>0
unixFile *pFd = (unixFile *)fd; /* The underlying database file */
#endif
*pp = 0;
#if SQLITE_MAX_MMAP_SIZE>0
if( pFd->mmapSizeMax>0 ){
/* Ensure that there is always at least a 256 byte buffer of addressable
** memory following the returned page. If the database is corrupt,
** SQLite may overread the page slightly (in practice only a few bytes,
** but 256 is safe, round, number). */
const int nEofBuffer = 256;
if( pFd->pMapRegion==0 ){
int rc = unixMapfile(pFd, -1);
if( rc!=SQLITE_OK ) return rc;
}
if( pFd->mmapSize >= (iOff+nAmt+nEofBuffer) ){
*pp = &((u8 *)pFd->pMapRegion)[iOff];
pFd->nFetchOut++;
}
}
#endif
return SQLITE_OK;
}
/*
** If the third argument is non-NULL, then this function releases a
** reference obtained by an earlier call to unixFetch(). The second
** argument passed to this function must be the same as the corresponding
** argument that was passed to the unixFetch() invocation.
const char *zFilename, /* Name of the file being opened */
int ctrlFlags /* Zero or more UNIXFILE_* values */
){
const sqlite3_io_methods *pLockingStyle;
unixFile *pNew = (unixFile *)pId;
int rc = SQLITE_OK;
assert( pNew->pInode==NULL );
/* No locking occurs in temporary files */
assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
pNew->h = h;
pNew->pVfs = pVfs;
pNew->zPath = zFilename;
pNew->ctrlFlags = (u8)ctrlFlags;
#if SQLITE_MAX_MMAP_SIZE>0
pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
#endif
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
pNew->ctrlFlags |= UNIXFILE_PSOW;
}
if( strcmp(pVfs->zName,"unix-excl")==0 ){
pNew->ctrlFlags |= UNIXFILE_EXCL;
}
#if OS_VXWORKS
pNew->pId = vxworksFindFileId(zFilename);
if( pNew->pId==0 ){
ctrlFlags |= UNIXFILE_NOLOCK;
rc = SQLITE_NOMEM_BKPT;
}
#endif
if( ctrlFlags & UNIXFILE_NOLOCK ){
pLockingStyle = &nolockIoMethods;
}else{
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
#if SQLITE_ENABLE_LOCKING_STYLE
/* Cache zFilename in the locking context (AFP and dotlock override) for
** proxyLock activation is possible (remote proxy is based on db name)
** zFilename remains valid until file is closed, to support */
pNew->lockingContext = (void*)zFilename;
#endif
}
if( pLockingStyle == &posixIoMethods
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|| pLockingStyle == &nfsIoMethods
#endif
){
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( rc!=SQLITE_OK ){
/* If an error occurred in findInodeInfo(), close the file descriptor
** immediately, before releasing the mutex. findInodeInfo() may fail
** in two scenarios:
**
** (a) A call to fstat() failed.
** (b) A malloc failed.
**
** Scenario (b) may only occur if the process is holding no other
** file descriptors open on the same file. If there were other file
** descriptors on this file, then no malloc would be required by
** findInodeInfo(). If this is the case, it is quite safe to close
** handle h - as it is guaranteed that no posix locks will be released
** by doing so.
**
** If scenario (a) caused the error then things are not so safe. The
** implicit assumption here is that if fstat() fails, things are in
** such bad shape that dropping a lock or two doesn't matter much.
*/
robust_close(pNew, h, __LINE__);
h = -1;
}
unixLeaveMutex();
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
else if( pLockingStyle == &afpIoMethods ){
/* AFP locking uses the file path so it needs to be included in
** the afpLockingContext.
*/
afpLockingContext *pCtx;
pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) );
if( pCtx==0 ){
rc = SQLITE_NOMEM_BKPT;
}else{
/* NB: zFilename exists and remains valid until the file is closed
** according to requirement F11141. So we do not need to make a
** copy of the filename. */
pCtx->dbPath = zFilename;
pCtx->reserved = 0;
srandomdev();
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( rc!=SQLITE_OK ){
sqlite3_free(pNew->lockingContext);
robust_close(pNew, h, __LINE__);
h = -1;
}
unixLeaveMutex();
}
}
#endif
else if( pLockingStyle == &dotlockIoMethods ){
/* Dotfile locking uses the file path so it needs to be included in
** the dotlockLockingContext
*/
char *zLockFile;
int nFilename;
assert( zFilename!=0 );
nFilename = (int)strlen(zFilename) + 6;
zLockFile = (char *)sqlite3_malloc64(nFilename);
if( zLockFile==0 ){
rc = SQLITE_NOMEM_BKPT;
}else{
sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
}
pNew->lockingContext = zLockFile;
}
#if OS_VXWORKS
else if( pLockingStyle == &semIoMethods ){
/* Named semaphore locking uses the file path so it needs to be
** included in the semLockingContext
*/
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
*/
zBuf[0] = 0;
SimulateIOError( return SQLITE_IOERR );
sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR));
zDir = unixTempFileDir();
if( zDir==0 ){
rc = SQLITE_IOERR_GETTEMPPATH;
}else{
do{
u64 r;
sqlite3_randomness(sizeof(r), &r);
assert( nBuf>2 );
zBuf[nBuf-2] = 0;
sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c",
zDir, r, 0);
if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ){
rc = SQLITE_ERROR;
break;
}
}while( osAccess(zBuf,0)==0 );
}
sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR));
return rc;
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
/*
** Routine to transform a unixFile into a proxy-locking unixFile.
** Implementation in the proxy-lock division, but used by unixOpen()
** if SQLITE_PREFER_PROXY_LOCKING is defined.
*/
static int proxyTransformUnixFile(unixFile*, const char*);
#endif
/*
** Search for an unused file descriptor that was opened on the database
** file (not a journal or super-journal file) identified by pathname
** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
** argument to this function.
**
** Such a file descriptor may exist if a database connection was closed
** but the associated file descriptor could not be closed because some
** other file descriptor open on the same file is holding a file-lock.
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
UnixUnusedFd *pUnused = 0;
/* Do not search for an unused file descriptor on vxworks. Not because
** vxworks would not benefit from the change (it might, we're not sure),
** but because no way to test it is currently available. It is better
** not to risk breaking vxworks support for the sake of such an obscure
** feature. */
#if !OS_VXWORKS
struct stat sStat; /* Results of stat() call */
unixEnterMutex();
/* A stat() call may fail for various reasons. If this happens, it is
** almost certain that an open() call on the same path will also fail.
** For this reason, if an error occurs in the stat() call here, it is
** ignored and -1 is returned. The caller will try to open a new file
** descriptor on the same path, fail, and return an error to SQLite.
**
** Even if a subsequent open() call does succeed, the consequences of
** not searching for a reusable file descriptor are not dire. */
if( inodeList!=0 && 0==osStat(zPath, &sStat) ){
unixInodeInfo *pInode;
pInode = inodeList;
while( pInode && (pInode->fileId.dev!=sStat.st_dev
|| pInode->fileId.ino!=(u64)sStat.st_ino) ){
pInode = pInode->pNext;
}
if( pInode ){
UnixUnusedFd **pp;
assert( sqlite3_mutex_notheld(pInode->pLockMutex) );
sqlite3_mutex_enter(pInode->pLockMutex);
flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE);
for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
pUnused = *pp;
if( pUnused ){
*pp = pUnused->pNext;
}
sqlite3_mutex_leave(pInode->pLockMutex);
}
}
unixLeaveMutex();
#endif /* if !OS_VXWORKS */
return pUnused;
}
/*
** Find the mode, uid and gid of file zFile.
*/
static int getFileMode(
const char *zFile, /* File name */
mode_t *pMode, /* OUT: Permissions of zFile */
uid_t *pUid, /* OUT: uid of zFile. */
gid_t *pGid /* OUT: gid of zFile. */
){
struct stat sStat; /* Output of stat() on database file */
int rc = SQLITE_OK;
if( 0==osStat(zFile, &sStat) ){
*pMode = sStat.st_mode & 0777;
*pUid = sStat.st_uid;
*pGid = sStat.st_gid;
}else{
rc = SQLITE_IOERR_FSTAT;
}
return rc;
}
/*
** This function is called by unixOpen() to determine the unix permissions
** to create new files with. If no error occurs, then SQLITE_OK is returned
** and a value suitable for passing as the third argument to open(2) is
** written to *pMode. If an IO error occurs, an SQLite error code is
** returned and the value of *pMode is not modified.
**
** In most cases, this routine sets *pMode to 0, which will become
** an indication to robust_open() to create the file using
** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
** But if the file being opened is a WAL or regular journal file, then
** this function queries the file-system for the permissions on the
** corresponding database file and sets *pMode to this value. Whenever
** possible, WAL and journal files are created using the same permissions
** as the associated database file.
**
** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
** original filename is unavailable. But 8_3_NAMES is only used for
** FAT filesystems and permissions do not matter there, so just use
** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero.
*/
static int findCreateFileMode(
const char *zPath, /* Path of file (possibly) being created */
int flags, /* Flags passed as 4th argument to xOpen() */
mode_t *pMode, /* OUT: Permissions to open file with */
uid_t *pUid, /* OUT: uid to set on the file */
gid_t *pGid /* OUT: gid to set on the file */
){
int rc = SQLITE_OK; /* Return Code */
*pMode = 0;
*pUid = 0;
*pGid = 0;
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
char zDb[MAX_PATHNAME+1]; /* Database file path */
int nDb; /* Number of valid bytes in zDb */
/* zPath is a path to a WAL or journal file. The following block derives
** the path to the associated database file from zPath. This block handles
** the following naming conventions:
**
** "<path to db>-journal"
** "<path to db>-wal"
** "<path to db>-journalNN"
** "<path to db>-walNN"
**
** where NN is a decimal number. The NN naming schemes are
** used by the test_multiplex.c module.
**
** In normal operation, the journal file name will always contain
assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE );
if( flags==SQLITE_ACCESS_EXISTS ){
struct stat buf;
*pResOut = 0==osStat(zPath, &buf) &&
(!S_ISREG(buf.st_mode) || buf.st_size>0);
}else{
*pResOut = osAccess(zPath, W_OK|R_OK)==0;
}
return SQLITE_OK;
}
/*
** A pathname under construction
*/
typedef struct DbPath DbPath;
struct DbPath {
int rc; /* Non-zero following any error */
int nSymlink; /* Number of symlinks resolved */
char *zOut; /* Write the pathname here */
int nOut; /* Bytes of space available to zOut[] */
int nUsed; /* Bytes of zOut[] currently being used */
};
/* Forward reference */
static void appendAllPathElements(DbPath*,const char*);
/*
** Append a single path element to the DbPath under construction
*/
static void appendOnePathElement(
DbPath *pPath, /* Path under construction, to which to append zName */
const char *zName, /* Name to append to pPath. Not zero-terminated */
int nName /* Number of significant bytes in zName */
){
assert( nName>0 );
assert( zName!=0 );
if( zName[0]=='.' ){
if( nName==1 ) return;
if( zName[1]=='.' && nName==2 ){
if( pPath->nUsed>1 ){
assert( pPath->zOut[0]=='/' );
while( pPath->zOut[--pPath->nUsed]!='/' ){}
}
return;
}
}
if( pPath->nUsed + nName + 2 >= pPath->nOut ){
pPath->rc = SQLITE_ERROR;
return;
}
pPath->zOut[pPath->nUsed++] = '/';
memcpy(&pPath->zOut[pPath->nUsed], zName, nName);
pPath->nUsed += nName;
#if defined(HAVE_READLINK) && defined(HAVE_LSTAT)
if( pPath->rc==SQLITE_OK ){
const char *zIn;
struct stat buf;
pPath->zOut[pPath->nUsed] = 0;
zIn = pPath->zOut;
if( osLstat(zIn, &buf)!=0 ){
if( errno!=ENOENT ){
pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn);
}
}else if( S_ISLNK(buf.st_mode) ){
ssize_t got;
char zLnk[SQLITE_MAX_PATHLEN+2];
if( pPath->nSymlink++ > SQLITE_MAX_SYMLINK ){
pPath->rc = SQLITE_CANTOPEN_BKPT;
return;
}
got = osReadlink(zIn, zLnk, sizeof(zLnk)-2);
if( got<=0 || got>=(ssize_t)sizeof(zLnk)-2 ){
pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn);
return;
}
zLnk[got] = 0;
if( zLnk[0]=='/' ){
pPath->nUsed = 0;
}else{
pPath->nUsed -= nName + 1;
}
appendAllPathElements(pPath, zLnk);
}
}
#endif
}
/*
** Append all path elements in zPath to the DbPath under construction.
*/
static void appendAllPathElements(
DbPath *pPath, /* Path under construction, to which to append zName */
const char *zPath /* Path to append to pPath. Is zero-terminated */
){
int i = 0;
int j = 0;
do{
while( zPath[i] && zPath[i]!='/' ){ i++; }
if( i>j ){
appendOnePathElement(pPath, &zPath[j], i-j);
}
j = i+1;
}while( zPath[i++] );
}
/*
** Turn a relative pathname into a full pathname. The relative path
** is stored as a nul-terminated string in the buffer pointed to by
** zPath.
**
** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
** (in this case, MAX_PATHNAME bytes). The full-path is written to
** this buffer before returning.
*/
static int unixFullPathname(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
const char *zPath, /* Possibly relative input path */
int nOut, /* Size of output buffer in bytes */
char *zOut /* Output buffer */
){
}
/* read the conch content */
readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);
if( readLen<PROXY_PATHINDEX ){
sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);
goto end_breaklock;
}
/* write it out to the temporary break file */
fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW), 0);
if( fd<0 ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
goto end_breaklock;
}
if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);
goto end_breaklock;
}
if( rename(tPath, cPath) ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);
goto end_breaklock;
}
rc = 0;
fprintf(stderr, "broke stale lock on %s\n", cPath);
robust_close(pFile, conchFile->h, __LINE__);
conchFile->h = fd;
conchFile->openFlags = O_RDWR | O_CREAT;
end_breaklock:
if( rc ){
if( fd>=0 ){
osUnlink(tPath);
robust_close(pFile, fd, __LINE__);
}
fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
}
return rc;
}
/* Take the requested lock on the conch file and break a stale lock if the
** host id matches.
*/
static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
unixFile *conchFile = pCtx->conchFile;
int rc = SQLITE_OK;
int nTries = 0;
struct timespec conchModTime;
memset(&conchModTime, 0, sizeof(conchModTime));
do {
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
nTries ++;
if( rc==SQLITE_BUSY ){
/* If the lock failed (busy):
* 1st try: get the mod time of the conch, wait 0.5s and try again.
* 2nd try: fail if the mod time changed or host id is different, wait
* 10 sec and try again
* 3rd try: break the lock unless the mod time has changed.
*/
struct stat buf;
if( osFstat(conchFile->h, &buf) ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR_LOCK;
}
if( nTries==1 ){
conchModTime = buf.st_mtimespec;
unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/
continue;
}
assert( nTries>1 );
if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec ||
conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){
return SQLITE_BUSY;
}
if( nTries==2 ){
char tBuf[PROXY_MAXCONCHLEN];
int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);
if( len<0 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR_LOCK;
}
if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){
/* don't break the lock if the host id doesn't match */
if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){
return SQLITE_BUSY;
}
}else{
/* don't break the lock on short read or a version mismatch */
return SQLITE_BUSY;
}
unixSleep(0,10000000); /* wait 10 sec and try the lock again */
continue;
}
assert( nTries==3 );
if( 0==proxyBreakConchLock(pFile, myHostID) ){
rc = SQLITE_OK;
if( lockType==EXCLUSIVE_LOCK ){
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);
}
if( !rc ){
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
}
}
}
} while( rc==SQLITE_BUSY && nTries<3 );
return rc;
}
/* Takes the conch by taking a shared lock and read the contents conch, if
** lockPath is non-NULL, the host ID and lock file path must match. A NULL
** lockPath means that the lockPath in the conch file will be used if the
** host IDs match, or a new lock path will be generated automatically
** and written to the conch file.
*/
static int proxyTakeConch(unixFile *pFile){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
}
}else if( hostIdMatch
&& !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],
readLen-PROXY_PATHINDEX)
){
/* conch host and lock path match */
goto end_takeconch;
}
}
/* if the conch isn't writable and doesn't match, we can't take it */
if( (conchFile->openFlags&O_RDWR) == 0 ){
rc = SQLITE_BUSY;
goto end_takeconch;
}
/* either the conch didn't match or we need to create a new one */
if( !pCtx->lockProxyPath ){
proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
tempLockPath = lockPath;
/* create a copy of the lock path _only_ if the conch is taken */
}
/* update conch with host and path (this will fail if other process
** has a shared lock already), if the host id matches, use the big
** stick.
*/
futimes(conchFile->h, NULL);
if( hostIdMatch && !createConch ){
if( conchFile->pInode && conchFile->pInode->nShared>1 ){
/* We are trying for an exclusive lock but another thread in this
** same process is still holding a shared lock. */
rc = SQLITE_BUSY;
} else {
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
}
}else{
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
}
if( rc==SQLITE_OK ){
char writeBuffer[PROXY_MAXCONCHLEN];
int writeSize = 0;
writeBuffer[0] = (char)PROXY_CONCHVERSION;
memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
if( pCtx->lockProxyPath!=NULL ){
strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath,
MAXPATHLEN);
}else{
strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
}
writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
robust_ftruncate(conchFile->h, writeSize);
rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
full_fsync(conchFile->h,0,0);
/* If we created a new conch file (not just updated the contents of a
** valid conch file), try to match the permissions of the database
*/
if( rc==SQLITE_OK && createConch ){
struct stat buf;
int err = osFstat(pFile->h, &buf);
if( err==0 ){
mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
S_IROTH|S_IWOTH);
/* try to match the database file R/W permissions, ignore failure */
#ifndef SQLITE_PROXY_DEBUG
osFchmod(conchFile->h, cmode);
#else
do{
rc = osFchmod(conchFile->h, cmode);
}while( rc==(-1) && errno==EINTR );
if( rc!=0 ){
int code = errno;
fprintf(stderr, "fchmod %o FAILED with %d %s\n",
cmode, code, strerror(code));
} else {
fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
}
}else{
int code = errno;
fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
err, code, strerror(code));
#endif
}
}
}
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
end_takeconch:
OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
if( rc==SQLITE_OK && pFile->openFlags ){
int fd;
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
}
pFile->h = -1;
fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
if( fd>=0 ){
pFile->h = fd;
}else{
rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called
during locking */
}
}
if( rc==SQLITE_OK && !pCtx->lockProxy ){
char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;
rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){
/* we couldn't create the proxy lock file with the old lock file path
** so try again via auto-naming
*/
forceNewLockPath = 1;
tryOldLockPath = 0;
continue; /* go back to the do {} while start point, try again */
}
}
if( rc==SQLITE_OK ){
/* Need to make a copy of path if we extracted the value
** from the conch file or the path was allocated on the stack
*/
int nPath = sqlite3Strlen30(zPath);
int iOff = 0;
if( zPath[0]!='/' ){
if( osGetcwd(zOut, nOut-2)==0 ){
return winLogError(SQLITE_CANTOPEN_BKPT, (DWORD)osErrno, "getcwd", zPath);
}
iOff = sqlite3Strlen30(zOut);
zOut[iOff++] = '/';
}
if( (iOff+nPath+1)>nOut ){
/* SQLite assumes that xFullPathname() nul-terminates the output buffer
** even if it returns an error. */
zOut[iOff] = '\0';
return SQLITE_CANTOPEN_BKPT;
}
sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath);
return SQLITE_OK;
}
#endif /* __CYGWIN__ */
/*
** Turn a relative pathname into a full pathname. Write the full
** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname
** bytes in size.
*/
static int winFullPathnameNoMutex(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
const char *zRelative, /* Possibly relative input path */
int nFull, /* Size of output buffer in bytes */
char *zFull /* Output buffer */
){
#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
int nByte;
void *zConverted;
char *zOut;
#endif
/* If this path name begins with "/X:" or "\\?\", where "X" is any
** alphabetic character, discard the initial "/" from the pathname.
*/
if( zRelative[0]=='/' && (winIsDriveLetterAndColon(zRelative+1)
|| winIsLongPathPrefix(zRelative+1)) ){
zRelative++;
}
SimulateIOError( return SQLITE_ERROR );
#ifdef __CYGWIN__
if( osGetcwd ){
zFull[nFull-1] = '\0';
if( !winIsDriveLetterAndColon(zRelative) || !winIsDirSep(zRelative[2]) ){
int rc = SQLITE_OK;
int nLink = 1; /* Number of symbolic links followed so far */
const char *zIn = zRelative; /* Input path for each iteration of loop */
char *zDel = 0;
struct stat buf;
UNUSED_PARAMETER(pVfs);
do {
/* Call lstat() on path zIn. Set bLink to true if the path is a symbolic
** link, or false otherwise. */
int bLink = 0;
if( osLstat && osReadlink ) {
if( osLstat(zIn, &buf)!=0 ){
int myErrno = osErrno;
if( myErrno!=ENOENT ){
rc = winLogError(SQLITE_CANTOPEN_BKPT, (DWORD)myErrno, "lstat", zIn);
}
}else{
bLink = ((buf.st_mode & 0170000) == 0120000);
}
if( bLink ){
if( zDel==0 ){
zDel = sqlite3MallocZero(nFull);
if( zDel==0 ) rc = SQLITE_NOMEM;
}else if( ++nLink>SQLITE_MAX_SYMLINKS ){
rc = SQLITE_CANTOPEN_BKPT;
}
if( rc==SQLITE_OK ){
nByte = osReadlink(zIn, zDel, nFull-1);
if( nByte ==(DWORD)-1 ){
rc = winLogError(SQLITE_CANTOPEN_BKPT, (DWORD)osErrno, "readlink", zIn);
}else{
if( zDel[0]!='/' ){
int n;
for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--);
if( nByte+n+1>nFull ){
rc = SQLITE_CANTOPEN_BKPT;
}else{
memmove(&zDel[n], zDel, nByte+1);
memcpy(zDel, zIn, n);
nByte += n;
}
}
zDel[nByte] = '\0';
}
}
zIn = zDel;
}
}
assert( rc!=SQLITE_OK || zIn!=zFull || zIn[0]=='/' );
if( rc==SQLITE_OK && zIn!=zFull ){
rc = mkFullPathname(zIn, zFull, nFull);
}
if( bLink==0 ) break;
zIn = zFull;
}while( rc==SQLITE_OK );
sqlite3_free(zDel);
winSimplifyName(zFull);
return rc;
}
}
#endif /* __CYGWIN__ */
#if (SQLITE_OS_WINCE || SQLITE_OS_WINRT) && defined(_WIN32)
SimulateIOError( return SQLITE_ERROR );
/* WinCE has no concept of a relative pathname, or so I am told. */
/* WinRT has no way to convert a relative path to an absolute one. */
if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){
/* Find and remove the row */
i = percentBinarySearch(p, y, 1);
if( i>=0 ){
p->nUsed--;
if( i<(int)p->nUsed ){
memmove(&p->a[i], &p->a[i+1], (p->nUsed - i)*sizeof(p->a[0]));
}
}
}
/*
** Compute the final output of percentile(). Clean up all allocated
** memory if and only if bIsFinal is true.
*/
static void percentCompute(sqlite3_context *pCtx, int bIsFinal){
Percentile *p;
int settings = SQLITE_PTR_TO_INT(sqlite3_user_data(pCtx))&1; /* Discrete? */
unsigned i1, i2;
double v1, v2;
double ix, vx;
p = (Percentile*)sqlite3_aggregate_context(pCtx, 0);
if( p==0 ) return;
if( p->a==0 ) return;
if( p->nUsed ){
if( p->bSorted==0 ){
assert( p->nUsed>1 );
percentSort(p->a, p->nUsed);
p->bSorted = 1;
}
ix = p->rPct*(p->nUsed-1);
i1 = (unsigned)ix;
if( settings & 1 ){
vx = p->a[i1];
}else{
i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1;
v1 = p->a[i1];
v2 = p->a[i2];
vx = v1 + (v2-v1)*(ix-i1);
}
sqlite3_result_double(pCtx, vx);
}
if( bIsFinal ){
sqlite3_free(p->a);
memset(p, 0, sizeof(*p));
}else{
p->bKeepSorted = 1;
}
}
static void percentFinal(sqlite3_context *pCtx){
percentCompute(pCtx, 1);
}
static void percentValue(sqlite3_context *pCtx){
percentCompute(pCtx, 0);
}
/****** End of percentile family of functions ******/
#endif /* SQLITE_ENABLE_PERCENTILE */
#if defined(SQLITE_DEBUG) || defined(SQLITE_ENABLE_FILESTAT)
/*
** Implementation of sqlite_filestat(SCHEMA).
**
** Return JSON text that describes low-level debug/diagnostic information
** about the sqlite3_file object associated with SCHEMA.
*/
static void filestatFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
sqlite3 *db = sqlite3_context_db_handle(context);
const char *zDbName;
sqlite3_str *pStr;
Btree *pBtree;
zDbName = (const char*)sqlite3_value_text(argv[0]);
pBtree = sqlite3DbNameToBtree(db, zDbName);
if( pBtree ){
Pager *pPager;
sqlite3_file *fd;
int rc;
sqlite3BtreeEnter(pBtree);
pPager = sqlite3BtreePager(pBtree);
assert( pPager!=0 );
fd = sqlite3PagerFile(pPager);
pStr = sqlite3_str_new(db);
if( pStr==0 ){
sqlite3_result_error_nomem(context);
}else{
sqlite3_str_append(pStr, "{\"db\":", 6);
rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_FILESTAT, pStr);
if( rc ) sqlite3_str_append(pStr, "null", 4);
fd = sqlite3PagerJrnlFile(pPager);
if( fd && fd->pMethods!=0 ){
sqlite3_str_appendall(pStr, ",\"journal\":");
rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_FILESTAT, pStr);
if( rc ) sqlite3_str_append(pStr, "null", 4);
}
sqlite3_str_append(pStr, "}", 1);
sqlite3_result_text(context, sqlite3_str_finish(pStr), -1,
sqlite3_free);
}
sqlite3BtreeLeave(pBtree);
}else{
sqlite3_result_text(context, "{}", 2, SQLITE_STATIC);
}
}
#endif /* SQLITE_DEBUG || SQLITE_ENABLE_FILESTAT */
#ifdef SQLITE_DEBUG
/*
** Implementation of fpdecode(x,y,z) function.
**
** x is a real number that is to be decoded. y is the precision.
** z is the maximum real precision. Return a string that shows the
** results of the sqlite3FpDecode() function.
**
** Used for testing and debugging only, specifically testing and debugging
** of the sqlite3FpDecode() function. This SQL function does not appear
** in production builds. This function is not an API and is subject to
** modification or removal in future versions of SQLite.
( run in 0.582 second using v1.01-cache-2.11-cpan-39bf76dae61 )