Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -2384,11 +2384,11 @@ freeTempSpace(pBt); rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, pageSize-usableSize); return rc; } - if( nPageHeader>nPageFile ){ + if( (pBt->db->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){ rc = SQLITE_CORRUPT_BKPT; goto page1_init_failed; } if( usableSize<480 ){ goto page1_init_failed; Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -392,10 +392,23 @@ } #define fcntl lockTrace #endif /* SQLITE_LOCK_TRACE */ +/* +** Retry ftruncate() calls that fail due to EINTR +*/ +#ifdef EINTR +static int robust_ftruncate(int h, sqlite3_int64 sz){ + int rc; + do{ rc = ftruncate(h,sz); }while( rc<0 && errno==EINTR ); + return rc; +} +#else +# define robust_ftruncate(a,b) ftruncate(a,b) +#endif + /* ** This routine translates a standard POSIX errno code into something ** useful to the clients of the sqlite3 functions. Specifically, it is ** intended to translate a variety of "try again" errors into SQLITE_BUSY @@ -733,10 +746,79 @@ /* ** A lists of all unixInodeInfo objects. */ static unixInodeInfo *inodeList = 0; + +/* +** +** This function - unixLogError_x(), is only ever called via the macro +** unixLogError(). +** +** It is invoked after an error occurs in an OS function and errno has been +** set. It logs a message using sqlite3_log() containing the current value of +** errno and, if possible, the human-readable equivalent from strerror() or +** strerror_r(). +** +** The first argument passed to the macro should be the error code that +** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). +** The two subsequent arguments should be the name of the OS function that +** failed (e.g. "unlink", "open") and the the associated file-system path, +** if any. +*/ +#define unixLogError(a,b,c) unixLogError_x(a,b,c,__LINE__) +static int unixLogError_x( + int errcode, /* SQLite error code */ + const char *zFunc, /* Name of OS function that failed */ + const char *zPath, /* File path associated with error */ + int iLine /* Source line number where error occurred */ +){ + char *zErr; /* Message from strerror() or equivalent */ + + /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use + ** the strerror() function to obtain the human-readable error message + ** equivalent to errno. Otherwise, use strerror_r(). + */ +#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) + char aErr[80]; + memset(aErr, 0, sizeof(aErr)); + zErr = aErr; + + /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, + ** assume that the system provides the the GNU version of strerror_r() that + ** returns a pointer to a buffer containing the error message. That pointer + ** may point to aErr[], or it may point to some static storage somewhere. + ** Otherwise, assume that the system provides the POSIX version of + ** strerror_r(), which always writes an error message into aErr[]. + ** + ** If the code incorrectly assumes that it is the POSIX version that is + ** available, the error message will often be an empty string. Not a + ** huge problem. Incorrectly concluding that the GNU version is available + ** could lead to a segfault though. + */ +#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) + zErr = +# endif + strerror_r(errno, aErr, sizeof(aErr)-1); + +#elif SQLITE_THREADSAFE + /* This is a threadsafe build, but strerror_r() is not available. */ + zErr = ""; +#else + /* Non-threadsafe build, use strerror(). */ + zErr = strerror(errno); +#endif + + assert( errcode!=SQLITE_OK ); + sqlite3_log(errcode, + "os_unix.c: %s() at line %d - \"%s\" errno=%d path=%s", + zFunc, iLine, zErr, errno, (zPath ? zPath : "n/a") + ); + + return errcode; +} + /* ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. ** If all such file descriptors are closed without error, the list is ** cleared and SQLITE_OK returned. @@ -753,11 +835,11 @@ UnixUnusedFd *pNext; for(p=pInode->pUnused; p; p=pNext){ pNext = p->pNext; if( close(p->fd) ){ pFile->lastErrno = errno; - rc = SQLITE_IOERR_CLOSE; + rc = unixLogError(SQLITE_IOERR_CLOSE, "close", pFile->zPath); p->pNext = pError; pError = p; }else{ sqlite3_free(p); } @@ -841,11 +923,11 @@ ** in the header of every SQLite database. In this way, if there ** is a race condition such that another thread has already populated ** the first page of the database, no damage is done. */ if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ - rc = write(fd, "S", 1); + do{ rc = write(fd, "S", 1); }while( rc<0 && errno==EINTR ); if( rc!=1 ){ pFile->lastErrno = errno; return SQLITE_IOERR; } rc = fstat(fd, &statbuf); @@ -1406,20 +1488,20 @@ if( pFile ){ if( pFile->dirfd>=0 ){ int err = close(pFile->dirfd); if( err ){ pFile->lastErrno = errno; - return SQLITE_IOERR_DIR_CLOSE; + return unixLogError(SQLITE_IOERR_DIR_CLOSE, "close", pFile->zPath); }else{ pFile->dirfd=-1; } } if( pFile->h>=0 ){ int err = close(pFile->h); if( err ){ pFile->lastErrno = errno; - return SQLITE_IOERR_CLOSE; + return unixLogError(SQLITE_IOERR_CLOSE, "close", pFile->zPath); } } #if OS_VXWORKS if( pFile->pId ){ if( pFile->isDelete ){ @@ -1717,10 +1799,24 @@ ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if ** compiling for VXWORKS. */ #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS +/* +** Retry flock() calls that fail with EINTR +*/ +#ifdef EINTR +static int robust_flock(int fd, int op){ + int rc; + do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); + return rc; +} +#else +# define robust_flock(a,b) flock(a,b) +#endif + + /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. @@ -1740,14 +1836,14 @@ } /* Otherwise see if some other process holds it. */ if( !reserved ){ /* attempt to get the lock */ - int lrc = flock(pFile->h, LOCK_EX | LOCK_NB); + int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); if( !lrc ){ /* got the lock, unlock it */ - lrc = flock(pFile->h, LOCK_UN); + lrc = robust_flock(pFile->h, LOCK_UN); if ( lrc ) { int tErrno = errno; /* unlock failed with an error */ lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(lrc) ){ @@ -1820,11 +1916,11 @@ return SQLITE_OK; } /* grab an exclusive lock */ - if (flock(pFile->h, LOCK_EX | LOCK_NB)) { + if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { int tErrno = errno; /* didn't get, must be busy */ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1869,11 +1965,11 @@ pFile->eFileLock = eFileLock; return SQLITE_OK; } /* no, really, unlock. */ - int rc = flock(pFile->h, LOCK_UN); + int rc = robust_flock(pFile->h, LOCK_UN); if (rc) { int r, tErrno = errno; r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(r) ){ pFile->lastErrno = tErrno; @@ -2606,14 +2702,14 @@ #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; #if defined(USE_PREAD) - got = pread(id->h, pBuf, cnt, offset); + do{ got = pread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); SimulateIOError( got = -1 ); #elif defined(USE_PREAD64) - got = pread64(id->h, pBuf, cnt, offset); + do{ got = pread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); SimulateIOError( got = -1 ); #else newOffset = lseek(id->h, offset, SEEK_SET); SimulateIOError( newOffset-- ); if( newOffset!=offset ){ @@ -2622,11 +2718,11 @@ }else{ ((unixFile*)id)->lastErrno = 0; } return -1; } - got = read(id->h, pBuf, cnt); + do{ got = read(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); #endif TIMER_END; if( got<0 ){ ((unixFile*)id)->lastErrno = errno; } @@ -2684,13 +2780,13 @@ #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; #if defined(USE_PREAD) - got = pwrite(id->h, pBuf, cnt, offset); + do{ got = pwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); #elif defined(USE_PREAD64) - got = pwrite64(id->h, pBuf, cnt, offset); + do{ got = pwrite64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); #else newOffset = lseek(id->h, offset, SEEK_SET); if( newOffset!=offset ){ if( newOffset == -1 ){ ((unixFile*)id)->lastErrno = errno; @@ -2697,11 +2793,11 @@ }else{ ((unixFile*)id)->lastErrno = 0; } return -1; } - got = write(id->h, pBuf, cnt); + do{ got = write(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); #endif TIMER_END; if( got<0 ){ ((unixFile*)id)->lastErrno = errno; } @@ -2937,11 +3033,11 @@ OSTRACE(("SYNC %-3d\n", pFile->h)); rc = full_fsync(pFile->h, isFullsync, isDataOnly); SimulateIOError( rc=1 ); if( rc ){ pFile->lastErrno = errno; - return SQLITE_IOERR_FSYNC; + return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); } if( pFile->dirfd>=0 ){ int err; OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd, HAVE_FULLFSYNC, isFullsync)); @@ -2964,11 +3060,11 @@ err = close(pFile->dirfd); /* Only need to sync once, so close the */ if( err==0 ){ /* directory when we are done */ pFile->dirfd = -1; }else{ pFile->lastErrno = errno; - rc = SQLITE_IOERR_DIR_CLOSE; + rc = unixLogError(SQLITE_IOERR_DIR_CLOSE, "close", pFile->zPath); } } return rc; } @@ -2988,14 +3084,14 @@ */ if( pFile->szChunk ){ nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; } - rc = ftruncate(pFile->h, (off_t)nByte); + rc = robust_ftruncate(pFile->h, (off_t)nByte); if( rc ){ pFile->lastErrno = errno; - return SQLITE_IOERR_TRUNCATE; + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); }else{ #ifndef NDEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) and we truncate the file to zero length, @@ -3063,13 +3159,15 @@ if( fstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT; nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; if( nSize>(i64)buf.st_size ){ #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE - if( posix_fallocate(pFile->h, buf.st_size, nSize-buf.st_size) ){ - return SQLITE_IOERR_WRITE; - } + int rc; + do{ + rc = posix_fallocate(pFile-.h, buf.st_size, nSize-buf.st_size; + }while( rc<0 && errno=EINTR ); + if( rc ) return SQLITE_IOERR_WRITE; #else /* If the OS does not have posix_fallocate(), fake it. First use ** ftruncate() to set the file size, then write a single byte to ** the last byte in each block within the extended region. This ** is the same technique used by glibc to implement posix_fallocate() @@ -3077,13 +3175,13 @@ */ int nBlk = buf.st_blksize; /* File-system block size */ i64 iWrite; /* Next offset to write to */ int nWrite; /* Return value from seekAndWrite() */ - if( ftruncate(pFile->h, nSize) ){ + if( robust_ftruncate(pFile->h, nSize) ){ pFile->lastErrno = errno; - return SQLITE_IOERR_TRUNCATE; + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); } iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1; do { nWrite = seekAndWrite(pFile, iWrite, "", 1); iWrite += nBlk; @@ -3425,21 +3523,21 @@ goto shm_open_err; } pShmNode->h = open(zShmFilename, O_RDWR|O_CREAT, (sStat.st_mode & 0777)); if( pShmNode->h<0 ){ - rc = SQLITE_CANTOPEN_BKPT; + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); goto shm_open_err; } /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ - if( ftruncate(pShmNode->h, 0) ){ - rc = SQLITE_IOERR_SHMOPEN; + if( robust_ftruncate(pShmNode->h, 0) ){ + rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); } } if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); } @@ -3540,12 +3638,12 @@ ** ** Alternatively, if bExtend is true, use ftruncate() to allocate ** the requested memory region. */ if( !bExtend ) goto shmpage_out; - if( ftruncate(pShmNode->h, nByte) ){ - rc = SQLITE_IOERR_SHMSIZE; + if( robust_ftruncate(pShmNode->h, nByte) ){ + rc = unixLogError(SQLITE_IOERR_SHMSIZE,"ftruncate",pShmNode->zFilename); goto shmpage_out; } } /* Map the requested memory region into this processes address space. */ @@ -4260,11 +4358,11 @@ #endif OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); } } *pFd = fd; - return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN_BKPT); + return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); } /* ** Return the name of a directory in which to put temporary files. ** If no suitable temporary file directory can be found, return NULL. @@ -4601,11 +4699,11 @@ flags |= SQLITE_OPEN_READONLY; openFlags |= O_RDONLY; fd = open(zName, openFlags, openMode); } if( fd<0 ){ - rc = SQLITE_CANTOPEN_BKPT; + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); goto open_finished; } } assert( fd>=0 ); if( pOutFlags ){ @@ -4733,11 +4831,11 @@ ){ int rc = SQLITE_OK; UNUSED_PARAMETER(NotUsed); SimulateIOError(return SQLITE_IOERR_DELETE); if( unlink(zPath)==(-1) && errno!=ENOENT ){ - return SQLITE_IOERR_DELETE; + return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); } #ifndef SQLITE_DISABLE_DIRSYNC if( dirSync ){ int fd; rc = openDirectory(zPath, &fd); @@ -4746,14 +4844,14 @@ if( fsync(fd)==-1 ) #else if( fsync(fd) ) #endif { - rc = SQLITE_IOERR_DIR_FSYNC; + rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); } if( close(fd)&&!rc ){ - rc = SQLITE_IOERR_DIR_CLOSE; + rc = unixLogError(SQLITE_IOERR_DIR_CLOSE, "close", zPath); } } } #endif return rc; @@ -4833,11 +4931,11 @@ if( zPath[0]=='/' ){ sqlite3_snprintf(nOut, zOut, "%s", zPath); }else{ int nCwd; if( getcwd(zOut, nOut-1)==0 ){ - return SQLITE_CANTOPEN_BKPT; + return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); } nCwd = (int)strlen(zOut); sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath); } return SQLITE_OK; @@ -4937,11 +5035,11 @@ pid = getpid(); memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid)); assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf ); nBuf = sizeof(t) + sizeof(pid); }else{ - nBuf = read(fd, zBuf, nBuf); + do{ nBuf = read(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR ); close(fd); } } #endif return nBuf; @@ -5697,27 +5795,31 @@ strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN); }else{ strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); } writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); - ftruncate(conchFile->h, writeSize); + robust_ftruncate(conchFile->h, writeSize); rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); fsync(conchFile->h); /* If we created a new conch file (not just updated the contents of a ** valid conch file), try to match the permissions of the database */ if( rc==SQLITE_OK && createConch ){ struct stat buf; + int rc; int err = fstat(pFile->h, &buf); if( err==0 ){ mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH); /* try to match the database file R/W permissions, ignore failure */ #ifndef SQLITE_PROXY_DEBUG fchmod(conchFile->h, cmode); #else - if( fchmod(conchFile->h, cmode)!=0 ){ + do{ + rc = fchmod(conchFile->h, cmode); + }while( rc==(-1) && errno==EINTR ); + if( rc!=0 ){ int code = errno; fprintf(stderr, "fchmod %o FAILED with %d %s\n", cmode, code, strerror(code)); } else { fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -2913,26 +2913,63 @@ } return rc; } + +/* +** Update the value of the change-counter at offsets 24 and 92 in +** the header and the sqlite version number at offset 96. +** +** This is an unconditional update. See also the pager_incr_changecounter() +** routine which only updates the change-counter if the update is actually +** needed, as determined by the pPager->changeCountDone state variable. +*/ +static void pager_write_changecounter(PgHdr *pPg){ + u32 change_counter; + + /* Increment the value just read and write it back to byte 24. */ + change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1; + put32bits(((char*)pPg->pData)+24, change_counter); + + /* Also store the SQLite version number in bytes 96..99 and in + ** bytes 92..95 store the change counter for which the version number + ** is valid. */ + put32bits(((char*)pPg->pData)+92, change_counter); + put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER); +} + /* ** This function is a wrapper around sqlite3WalFrames(). As well as logging ** the contents of the list of pages headed by pList (connected by pDirty), ** this function notifies any active backup processes that the pages have -** changed. +** changed. +** +** The list of pages passed into this routine is always sorted by page number. +** Hence, if page 1 appears anywhere on the list, it will be the first page. */ static int pagerWalFrames( Pager *pPager, /* Pager object */ PgHdr *pList, /* List of frames to log */ Pgno nTruncate, /* Database size after this commit */ int isCommit, /* True if this is a commit */ int syncFlags /* Flags to pass to OsSync() (or 0) */ ){ int rc; /* Return code */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES) + PgHdr *p; /* For looping over pages */ +#endif assert( pPager->pWal ); +#ifdef SQLITE_DEBUG + /* Verify that the page list is in accending order */ + for(p=pList; p && p->pDirty; p=p->pDirty){ + assert( p->pgno < p->pDirty->pgno ); + } +#endif + + if( pList->pgno==1 ) pager_write_changecounter(pList); rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, syncFlags ); if( rc==SQLITE_OK && pPager->pBackup ){ PgHdr *p; @@ -2940,13 +2977,12 @@ sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); } } #ifdef SQLITE_CHECK_PAGES - { - PgHdr *p; - for(p=pList; p; p=p->pDirty) pager_set_pagehash(p); + for(p=pList; p; p=p->pDirty){ + pager_set_pagehash(p); } #endif return rc; } @@ -3967,10 +4003,11 @@ if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */ char *pData; /* Data to write */ assert( (pList->flags&PGHDR_NEED_SYNC)==0 ); + if( pList->pgno==1 ) pager_write_changecounter(pList); /* Encode the database */ CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData); /* Write out the page data. */ @@ -5487,11 +5524,17 @@ } /* ** This routine is called to increment the value of the database file ** change-counter, stored as a 4-byte big-endian integer starting at -** byte offset 24 of the pager file. +** byte offset 24 of the pager file. The secondary change counter at +** 92 is also updated, as is the SQLite version number at offset 96. +** +** But this only happens if the pPager->changeCountDone flag is false. +** To avoid excess churning of page 1, the update only happens once. +** See also the pager_write_changecounter() routine that does an +** unconditional update of the change counters. ** ** If the isDirectMode flag is zero, then this is done by calling ** sqlite3PagerWrite() on page 1, then modifying the contents of the ** page data. In this case the file will be updated when the current ** transaction is committed. @@ -5528,11 +5571,10 @@ # define DIRECT_MODE isDirectMode #endif if( !pPager->changeCountDone && pPager->dbSize>0 ){ PgHdr *pPgHdr; /* Reference to page 1 */ - u32 change_counter; /* Initial value of change-counter field */ assert( !pPager->tempFile && isOpen(pPager->fd) ); /* Open page 1 of the file for writing. */ rc = sqlite3PagerGet(pPager, 1, &pPgHdr); @@ -5541,25 +5583,17 @@ /* If page one was fetched successfully, and this function is not ** operating in direct-mode, make page 1 writable. When not in ** direct mode, page 1 is always held in cache and hence the PagerGet() ** above is always successful - hence the ALWAYS on rc==SQLITE_OK. */ - if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){ + if( !DIRECT_MODE && rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPgHdr); } if( rc==SQLITE_OK ){ - /* Increment the value just read and write it back to byte 24. */ - change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers); - change_counter++; - put32bits(((char*)pPgHdr->pData)+24, change_counter); - - /* Also store the SQLite version number in bytes 96..99 and in - ** bytes 92..95 store the change counter for which the version number - ** is valid. */ - put32bits(((char*)pPgHdr->pData)+92, change_counter); - put32bits(((char*)pPgHdr->pData)+96, SQLITE_VERSION_NUMBER); + /* Actually do the update of the change counter */ + pager_write_changecounter(pPgHdr); /* If running in direct mode, write the contents of page 1 to the file. */ if( DIRECT_MODE ){ const void *zBuf; assert( pPager->dbFileSize>0 ); Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -1571,11 +1571,12 @@ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); testcase( szPage<=32768 ); testcase( szPage>=65536 ); - if( pWal->hdr.mxFrame==0 ) return SQLITE_OK; + pInfo = walCkptInfo(pWal); + if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK; /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK ){ return rc; @@ -1593,11 +1594,10 @@ ** overwrite database pages that are in use by active readers and thus ** cannot be backfilled from the WAL. */ mxSafeFrame = pWal->hdr.mxFrame; mxPage = pWal->hdr.nPage; - pInfo = walCkptInfo(pWal); for(i=1; iaReadMark[i]; if( mxSafeFrame>=y ){ assert( y<=pWal->hdr.mxFrame ); rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); @@ -1914,14 +1914,35 @@ int i; /* Loop counter */ int rc = SQLITE_OK; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ - /* Take steps to avoid spinning forever if there is a protocol error. */ + /* Take steps to avoid spinning forever if there is a protocol error. + ** + ** Circumstances that cause a RETRY should only last for the briefest + ** instances of time. No I/O or other system calls are done while the + ** locks are held, so the locks should not be held for very long. But + ** if we are unlucky, another process that is holding a lock might get + ** paged out or take a page-fault that is time-consuming to resolve, + ** during the few nanoseconds that it is holding the lock. In that case, + ** it might take longer than normal for the lock to free. + ** + ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few + ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this + ** is more of a scheduler yield than an actual delay. But on the 10th + ** an subsequent retries, the delays start becoming longer and longer, + ** so that on the 100th (and last) RETRY we delay for 21 milliseconds. + ** The total delay time before giving up is less than 1 second. + */ if( cnt>5 ){ - if( cnt>100 ) return SQLITE_PROTOCOL; - sqlite3OsSleep(pWal->pVfs, 1); + int nDelay = 1; /* Pause time in microseconds */ + if( cnt>100 ){ + VVA_ONLY( pWal->lockError = 1; ) + return SQLITE_PROTOCOL; + } + if( cnt>=10 ) nDelay = (cnt-9)*238; /* Max delay 21ms. Total delay 996ms */ + sqlite3OsSleep(pWal->pVfs, nDelay); } if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); if( rc==SQLITE_BUSY ){ @@ -1999,26 +2020,13 @@ assert( thisMark!=READMARK_NOT_USED ); mxReadMark = thisMark; mxI = i; } } - if( mxI==0 ){ - /* If we get here, it means that all of the aReadMark[] entries between - ** 1 and WAL_NREADER-1 are zero. Try to initialize aReadMark[1] to - ** be mxFrame, then retry. - */ - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1); - if( rc==SQLITE_OK ){ - pInfo->aReadMark[1] = pWal->hdr.mxFrame; - walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1); - rc = WAL_RETRY; - }else if( rc==SQLITE_BUSY ){ - rc = WAL_RETRY; - } - return rc; - }else{ - if( mxReadMark < pWal->hdr.mxFrame ){ + /* There was once an "if" here. The extra "{" is to preserve indentation. */ + { + if( mxReadMark < pWal->hdr.mxFrame || mxI==0 ){ for(i=1; iaReadMark[i] = pWal->hdr.mxFrame; mxI = i; @@ -2027,10 +2035,14 @@ }else if( rc!=SQLITE_BUSY ){ return rc; } } } + if( mxI==0 ){ + assert( rc==SQLITE_BUSY ); + return WAL_RETRY; + } rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); if( rc ){ return rc==SQLITE_BUSY ? WAL_RETRY : rc; } @@ -2087,10 +2099,14 @@ int cnt = 0; /* Number of TryBeginRead attempts */ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); + testcase( (rc&0xff)==SQLITE_BUSY ); + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); return rc; } /* ** Finish with a read transaction. All this does is release the @@ -2404,10 +2420,12 @@ if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ + u32 salt1; + sqlite3_randomness(4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL), then the transactions ** frames will overwrite the start of the existing log. Update the @@ -2421,11 +2439,11 @@ int i; /* Loop counter */ u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ pWal->nCkpt++; pWal->hdr.mxFrame = 0; sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); - sqlite3_randomness(4, &aSalt[1]); + aSalt[1] = salt1; walIndexWriteHdr(pWal); pInfo->nBackfill = 0; for(i=1; iaReadMark[i] = READMARK_NOT_USED; assert( pInfo->aReadMark[0]==0 ); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); @@ -2438,10 +2456,14 @@ cnt = 0; do{ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); + assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); } return rc; } /* Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -2313,14 +2313,13 @@ Parse *pParse, Expr *pExpr, u8 aff, sqlite3_value **pp ){ - /* The evalConstExpr() function will have already converted any TK_VARIABLE - ** expression involved in an comparison into a TK_REGISTER. */ - assert( pExpr->op!=TK_VARIABLE ); - if( pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE ){ + if( pExpr->op==TK_VARIABLE + || (pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE) + ){ int iVar = pExpr->iColumn; sqlite3VdbeSetVarmask(pParse->pVdbe, iVar); /* IMP: R-23257-02778 */ *pp = sqlite3VdbeGetValue(pParse->pReprepare, iVar, aff); return SQLITE_OK; } Index: test/exclusive2.test ================================================================== --- test/exclusive2.test +++ test/exclusive2.test @@ -297,15 +297,15 @@ execsql { PRAGMA locking_mode = normal; INSERT INTO t1 VALUES(randstr(10, 400)); } readPagerChangeCounter test.db -} {4} +} {5} do_test exclusive2-3.6 { execsql { INSERT INTO t1 VALUES(randstr(10, 400)); } readPagerChangeCounter test.db -} {5} +} {6} sqlite3_soft_heap_limit $cmdlinearg(soft-heap-limit) finish_test Index: test/filefmt.test ================================================================== --- test/filefmt.test +++ test/filefmt.test @@ -190,7 +190,27 @@ integrity_check filefmt-2.2.5 do_execsql_test filefmt-2.2.6 { COMMIT } {} db close sqlite3 db test.db integrity_check filefmt-2.2.7 + +#-------------------------------------------------------------------------- +# Check that ticket 89b8c9ac54 is fixed. Before the fix, the SELECT +# statement would return SQLITE_CORRUPT. The database file was not actually +# corrupted, but SQLite was reporting that it was. +# +db close +forcedelete test.db +sqlite3 db test.db +do_execsql_test filefmt-3.1 { + PRAGMA auto_vacuum = 1; + CREATE TABLE t1(a, b); +} {} +do_test filefmt-3.2 { + sql36231 { DROP TABLE t1 } +} {} +do_execsql_test filefmt-3.3 { + SELECT * FROM sqlite_master; + PRAGMA integrity_check; +} {ok} finish_test ADDED test/oserror.test Index: test/oserror.test ================================================================== --- /dev/null +++ test/oserror.test @@ -0,0 +1,112 @@ +# 2011 February 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing that error messages are logged via the +# sqlite3_log() mechanism when certain errors are encountered in the +# default unix or windows VFS modules. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +if {$::tcl_platform(platform)!="unix"} { finish_test ; return } +set ::testprefix oserror + +db close +sqlite3_shutdown +test_sqlite3_log xLog +proc xLog {error_code msg} { + if {[string match os_* $msg]} { + lappend ::log $msg + } +} + +proc do_re_test {tn script expression} { + uplevel do_test $tn [list [subst -nocommands { + set res [eval { $script }] + if {[regexp {$expression} [set res]]} { + set {} {$expression} + } else { + set res + } + }]] [list $expression] + +} + +#-------------------------------------------------------------------------- +# Tests oserror-1.* test failures in the open() system call. +# + +# Test a failure in open() due to too many files. +# +do_test 1.1.1 { + set ::log [list] + list [catch { + for {set i 0} {$i < 2000} {incr i} { sqlite3 dbh_$i test.db -readonly 1 } + } msg] $msg +} {1 {unable to open database file}} +do_test 1.1.2 { + catch { for {set i 0} {$i < 2000} {incr i} { dbh_$i close } } +} {1} + +do_re_test 1.1.3 { lindex $::log 0 } {^os_unix.c: open.*test.db$} + + +# Test a failure in open() due to the path being a directory. +# +do_test 1.2.1 { + file mkdir dir.db + set ::log [list] + list [catch { sqlite3 dbh dir.db } msg] $msg +} {1 {unable to open database file}} + +do_re_test 1.2.2 { lindex $::log 0 } {^os_unix.c: open.*dir.db$} + +# Test a failure in open() due to the path not existing. +# +do_test 1.3.1 { + set ::log [list] + list [catch { sqlite3 dbh /x/y/z/test.db } msg] $msg +} {1 {unable to open database file}} + +do_re_test 1.3.2 { lindex $::log 0 } {^os_unix.c: open.*test.db$} + +# Test a failure in open() due to the path not existing. +# +do_test 1.4.1 { + set ::log [list] + list [catch { sqlite3 dbh /root/test.db } msg] $msg +} {1 {unable to open database file}} + +do_re_test 1.4.2 { lindex $::log 0 } {^os_unix.c: open.*test.db$} + +#-------------------------------------------------------------------------- +# Tests oserror-1.* test failures in the unlink() system call. +# +do_test 2.1.1 { + set ::log [list] + file mkdir test.db-wal + forcedelete test.db + sqlite3 dbh test.db + catchsql { SELECT * FROM sqlite_master } dbh +} {1 {disk I/O error}} + +do_re_test 2.1.2 { lindex $::log 0 } {^os_unix.c: unlink.*test.db-wal$} +do_test 2.1.3 { + dbh close + forcedelete test.db-wal +} {} + + +sqlite3_shutdown +test_sqlite3_log +sqlite3_initialize +finish_test + Index: tool/mksqlite3h.tcl ================================================================== --- tool/mksqlite3h.tcl +++ tool/mksqlite3h.tcl @@ -51,11 +51,11 @@ # set in [open $TOP/manifest] set zDate {} while {![eof $in]} { set line [gets $in] - if {[regexp {^D (2.*[0-9])} $line all date]} { + if {[regexp {^D (2[-0-9T:]+)} $line all date]} { set zDate [string map {T { }} $date] break } } close $in