Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -204,10 +204,11 @@ sqlite3_io_methods const *pMethod; /* Always the first entry */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ int dirfd; /* File descriptor for the directory */ unsigned char eFileLock; /* The type of lock held on this fd */ + unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ @@ -240,10 +241,15 @@ */ char aPadding[32]; #endif }; +/* +** Allowed values for the unixFile.ctrlFlags bitmask: +*/ +#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ + /* ** Include code that is common to all os_*.c files */ #include "os_common.h" @@ -885,11 +891,12 @@ ** object keeps a count of the number of unixFile pointing to it. */ struct unixInodeInfo { struct unixFileId fileId; /* The lookup key */ int nShared; /* Number of SHARED locks held */ - int eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ + unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ + unsigned char bProcessLock; /* An exclusive process lock is held */ int nRef; /* Number of pointers to this structure */ unixShmNode *pShmNode; /* Shared memory associated with this inode */ int nLock; /* Number of outstanding file locks */ UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ @@ -1156,11 +1163,11 @@ } /* Otherwise see if some other process holds it. */ #ifndef __DJGPP__ - if( !reserved ){ + if( !reserved && !pFile->pInode->bProcessLock ){ struct flock lock; lock.l_whence = SEEK_SET; lock.l_start = RESERVED_BYTE; lock.l_len = 1; lock.l_type = F_WRLCK; @@ -1178,10 +1185,48 @@ OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } + +/* +** Attempt to set a system-lock on the file pFile. The lock is +** described by pLock. +** +** If the pFile was opened from unix-excl, then the only lock ever +** obtained is an exclusive lock, and it is obtained exactly once +** the first time any lock is attempted. All subsequent system locking +** operations become no-ops. Locking operations still happen internally, +** in order to coordinate access between separate database connections +** within this process, but all of that is handled in memory and the +** operating system does not participate. +*/ +static int unixFileLock(unixFile *pFile, struct flock *pLock){ + int rc; + unixInodeInfo *pInode = pFile->pInode; + assert( unixMutexHeld() ); + assert( pInode!=0 ); + if( (pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock ){ + if( pInode->bProcessLock==0 ){ + struct flock lock; + assert( pInode->nLock==0 ); + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + lock.l_type = F_WRLCK; + rc = osFcntl(pFile->h, F_SETLK, &lock); + if( rc<0 ) return rc; + pInode->bProcessLock = 1; + pInode->nLock++; + }else{ + rc = 0; + } + }else{ + rc = osFcntl(pFile->h, F_SETLK, pLock); + } + return rc; +} /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** @@ -1315,11 +1360,11 @@ if( eFileLock==SHARED_LOCK || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLockh, F_SETLK, &lock); + s = unixFileLock(pFile, &lock); if( s==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1337,18 +1382,18 @@ assert( pInode->eFileLock==0 ); /* Now get the read-lock */ lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; - if( (s = osFcntl(pFile->h, F_SETLK, &lock))==(-1) ){ + if( (s = unixFileLock(pFile, &lock))==(-1) ){ tErrno = errno; } /* Drop the temporary PENDING lock */ lock.l_start = PENDING_BYTE; lock.l_len = 1L; lock.l_type = F_UNLCK; - if( osFcntl(pFile->h, F_SETLK, &lock)!=0 ){ + if( unixFileLock(pFile, &lock)!=0 ){ if( s != -1 ){ /* This could happen with a network mount */ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ @@ -1387,11 +1432,11 @@ lock.l_len = SHARED_SIZE; break; default: assert(0); } - s = osFcntl(pFile->h, F_SETLK, &lock); + s = unixFileLock(pFile, &lock); if( s==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1456,11 +1501,11 @@ ** the byte range is divided into 2 parts and the first part is unlocked then ** set to a read lock, then the other part is simply unlocked. This works ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to ** remove the write lock on a region when a read lock is set. */ -static int _posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ +static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ unixFile *pFile = (unixFile*)id; unixInodeInfo *pInode; struct flock lock; int rc = SQLITE_OK; int h; @@ -1523,11 +1568,11 @@ lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = divSize; - if( osFcntl(h, F_SETLK, &lock)==(-1) ){ + if( unixFileLock(pFile,, &lock)==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; } @@ -1535,11 +1580,11 @@ } lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = divSize; - if( osFcntl(h, F_SETLK, &lock)==(-1) ){ + if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; } @@ -1547,11 +1592,11 @@ } lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST+divSize; lock.l_len = SHARED_SIZE-divSize; - if( osFcntl(h, F_SETLK, &lock)==(-1) ){ + if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; } @@ -1562,11 +1607,11 @@ { lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; - if( osFcntl(h, F_SETLK, &lock)==(-1) ){ + if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; } @@ -1576,11 +1621,11 @@ } lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = PENDING_BYTE; lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); - if( osFcntl(h, F_SETLK, &lock)!=(-1) ){ + if( unixFileLock(pFile, &lock)!=(-1) ){ pInode->eFileLock = SHARED_LOCK; }else{ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ @@ -1600,11 +1645,11 @@ lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; SimulateIOErrorBenign(1); SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); - if( osFcntl(h, F_SETLK, &lock)!=(-1) ){ + if( unixFileLock(pFile, &lock)!=(-1) ){ pInode->eFileLock = NO_LOCK; }else{ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ @@ -1638,11 +1683,11 @@ ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ - return _posixUnlock(id, eFileLock, 0); + return posixUnlock(id, eFileLock, 0); } /* ** This function performs the parts of the "close file" operation ** common to all locking schemes. It closes the directory and file @@ -1688,10 +1733,12 @@ int rc = SQLITE_OK; if( id ){ unixFile *pFile = (unixFile *)id; unixUnlock(id, NO_LOCK); unixEnterMutex(); + assert( pFile->pInode==0 || pFile->pInode->nLock>0 + || pFile->pInode->bProcessLock==0 ); if( pFile->pInode && pFile->pInode->nLock ){ /* If there are outstanding locks, do not actually close the file just ** yet because that would clear those locks. Instead, add the file ** descriptor to pInode->pUnused list. It will be automatically closed ** when the last lock is cleared. @@ -2819,11 +2866,11 @@ ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int nfsUnlock(sqlite3_file *id, int eFileLock){ - return _posixUnlock(id, eFileLock, 1); + return posixUnlock(id, eFileLock, 1); } #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ /* ** The code above is the NFS lock implementation. The code is specific @@ -3516,19 +3563,21 @@ assert( n==1 || lockType!=F_RDLCK ); /* Locks are within range */ assert( n>=1 && nh, F_SETLK, &f); - rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; + if( pShmNode->h>=0 ){ + /* Initialize the locking parameters */ + memset(&f, 0, sizeof(f)); + f.l_type = lockType; + f.l_whence = SEEK_SET; + f.l_start = ofst; + f.l_len = n; + + rc = osFcntl(pShmNode->h, F_SETLK, &f); + rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; + } /* Update the global lock state and do debug tracing */ #ifdef SQLITE_DEBUG { u16 mask; OSTRACE(("SHM-LOCK ")); @@ -3579,11 +3628,15 @@ if( p && p->nRef==0 ){ int i; assert( p->pInode==pFd->pInode ); if( p->mutex ) sqlite3_mutex_free(p->mutex); for(i=0; inRegion; i++){ - munmap(p->apRegion[i], p->szRegion); + if( p->h>=0 ){ + munmap(p->apRegion[i], p->szRegion); + }else{ + sqlite3_free(p->apRegion[i]); + } } sqlite3_free(p->apRegion); if( p->h>=0 ){ robust_close(pFd, p->h, __LINE__); p->h = -1; @@ -3619,10 +3672,16 @@ ** "unsupported" and may go away in a future SQLite release. ** ** When opening a new shared-memory file, if no other instances of that ** file are currently open, in this process or in other processes, then ** the file must be truncated to zero length or have its header cleared. +** +** If the original database file (pDbFd) is using the "unix-excl" VFS +** that means that an exclusive lock is held on the database file and +** that no other processes are able to read or write the database. In +** that case, we do not really need shared memory. No shared memory +** file is created. The shared memory will be simulated with heap memory. */ static int unixOpenSharedMemory(unixFile *pDbFd){ struct unixShm *p = 0; /* The connection to be opened */ struct unixShmNode *pShmNode; /* The underlying mmapped file */ int rc; /* Result code */ @@ -3648,11 +3707,11 @@ /* Call fstat() to figure out the permissions on the database file. If ** a new *-shm file is created, an attempt will be made to create it ** with the same permissions. The actual permissions the file is created ** with are subject to the current umask setting. */ - if( osFstat(pDbFd->h, &sStat) ){ + if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ rc = SQLITE_IOERR_FSTAT; goto shm_open_err; } #ifdef SQLITE_SHM_DIRECTORY @@ -3681,30 +3740,32 @@ if( pShmNode->mutex==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } - pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT, - (sStat.st_mode & 0777)); - if( pShmNode->h<0 ){ - rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); - goto shm_open_err; - } - - /* Check to see if another process is holding the dead-man switch. - ** If not, truncate the file to zero length. - */ - rc = SQLITE_OK; - if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ - if( robust_ftruncate(pShmNode->h, 0) ){ - rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); - } - } - if( rc==SQLITE_OK ){ - rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); - } - if( rc ) goto shm_open_err; + if( pInode->bProcessLock==0 ){ + pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT, + (sStat.st_mode & 0777)); + if( pShmNode->h<0 ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); + goto shm_open_err; + } + + /* Check to see if another process is holding the dead-man switch. + ** If not, truncate the file to zero length. + */ + rc = SQLITE_OK; + if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ + if( robust_ftruncate(pShmNode->h, 0) ){ + rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); + } + } + if( rc==SQLITE_OK ){ + rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); + } + if( rc ) goto shm_open_err; + } } /* Make the new connection a child of the unixShmNode */ p->pShmNode = pShmNode; #ifdef SQLITE_DEBUG @@ -3774,38 +3835,44 @@ p = pDbFd->pShm; pShmNode = p->pShmNode; sqlite3_mutex_enter(pShmNode->mutex); assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + assert( pShmNode->pInode==pDbFd->pInode ); + assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); if( pShmNode->nRegion<=iRegion ){ char **apNew; /* New apRegion[] array */ int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ struct stat sStat; /* Used by fstat() */ pShmNode->szRegion = szRegion; - /* The requested region is not mapped into this processes address space. - ** Check to see if it has been allocated (i.e. if the wal-index file is - ** large enough to contain the requested region). - */ - if( osFstat(pShmNode->h, &sStat) ){ - rc = SQLITE_IOERR_SHMSIZE; - goto shmpage_out; - } - - if( sStat.st_sizeh, nByte) ){ - rc = unixLogError(SQLITE_IOERR_SHMSIZE,"ftruncate",pShmNode->zFilename); - goto shmpage_out; + if( pShmNode->h>=0 ){ + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + if( osFstat(pShmNode->h, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + + if( sStat.st_sizeh, nByte) ){ + rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate", + pShmNode->zFilename); + goto shmpage_out; + } } } /* Map the requested memory region into this processes address space. */ apNew = (char **)sqlite3_realloc( @@ -3815,16 +3882,26 @@ rc = SQLITE_IOERR_NOMEM; goto shmpage_out; } pShmNode->apRegion = apNew; while(pShmNode->nRegion<=iRegion){ - void *pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, - MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion - ); - if( pMem==MAP_FAILED ){ - rc = SQLITE_IOERR; - goto shmpage_out; + void *pMem; + if( pShmNode->h>=0 ){ + pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion + ); + if( pMem==MAP_FAILED ){ + rc = SQLITE_IOERR; + goto shmpage_out; + } + }else{ + pMem = sqlite3_malloc(szRegion); + if( pMem==0 ){ + rc = SQLITE_NOMEM; + goto shmpage_out; + } + memset(pMem, 0, szRegion); } pShmNode->apRegion[pShmNode->nRegion] = pMem; pShmNode->nRegion++; } } @@ -3867,10 +3944,12 @@ assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); + assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); mask = (1<<(ofst+n)) - (1<1 || mask==(1<mutex); if( flags & SQLITE_SHM_UNLOCK ){ @@ -4004,11 +4083,11 @@ ** shared-memory file, too */ unixEnterMutex(); assert( pShmNode->nRef>0 ); pShmNode->nRef--; if( pShmNode->nRef==0 ){ - if( deleteFlag ) unlink(pShmNode->zFilename); + if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename); unixShmPurge(pDbFd); } unixLeaveMutex(); return SQLITE_OK; @@ -4349,10 +4428,15 @@ OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; pNew->dirfd = dirfd; pNew->zPath = zFilename; + if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ + pNew->ctrlFlags = UNIXFILE_EXCL; + }else{ + pNew->ctrlFlags = 0; + } #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); if( pNew->pId==0 ){ noLock = 1; @@ -6548,10 +6632,11 @@ #else UNIXVFS("unix", posixIoFinder ), #endif UNIXVFS("unix-none", nolockIoFinder ), UNIXVFS("unix-dotfile", dotlockIoFinder ), + UNIXVFS("unix-excl", posixIoFinder ), #if OS_VXWORKS UNIXVFS("unix-namedsem", semIoFinder ), #endif #if SQLITE_ENABLE_LOCKING_STYLE UNIXVFS("unix-posix", posixIoFinder ), Index: src/shell.c ================================================================== --- src/shell.c +++ src/shell.c @@ -417,10 +417,11 @@ struct previous_mode_data explainPrev; /* Holds the mode information just before ** .explain ON */ char outfile[FILENAME_MAX]; /* Filename for *out */ const char *zDbFilename; /* name of the database file */ + const char *zVfs; /* Name of VFS to use */ sqlite3_stmt *pStmt; /* Current statement if any. */ FILE *pLog; /* Write log output here */ }; /* @@ -2598,10 +2599,11 @@ " -list set output mode to 'list'\n" " -separator 'x' set output field separator (|)\n" " -stats print memory stats before each finalize\n" " -nullvalue 'text' set text string for NULL values\n" " -version show SQLite version\n" + " -vfs NAME use NAME as the default VFS\n" ; static void usage(int showDetail){ fprintf(stderr, "Usage: %s [OPTIONS] FILENAME [SQL]\n" "FILENAME is the name of an SQLite database. A new database is created\n" @@ -2682,10 +2684,19 @@ } if( szHeap>0x7fff0000 ) szHeap = 0x7fff0000; #if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) sqlite3_config(SQLITE_CONFIG_HEAP, malloc((int)szHeap), (int)szHeap, 64); #endif + }else if( strcmp(argv[i],"-vfs")==0 ){ + i++; + sqlite3_vfs *pVfs = sqlite3_vfs_find(argv[i]); + if( pVfs ){ + sqlite3_vfs_register(pVfs, 1); + }else{ + fprintf(stderr, "no such VFS: \"%s\"\n", argv[i]); + exit(1); + } } } if( i