Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -426,10 +426,11 @@ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ + u32 minFrame; /* Ignore wal frames before this one */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ #endif @@ -2294,16 +2295,31 @@ ** copied into the database by a checkpointer. If either of these things ** happened, then reading the database with the current value of ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry ** instead. ** - ** This does not guarantee that the copy of the wal-index header is up to - ** date before proceeding. That would not be possible without somehow - ** blocking writers. It only guarantees that a dangerous checkpoint or - ** log-wrap (either of which would require an exclusive lock on - ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid. + ** Before checking that the live wal-index header has not changed + ** since it was read, set Wal.minFrame to the first frame in the wal + ** file that has not yet been checkpointed. This client will not need + ** to read any frames earlier than minFrame from the wal file - they + ** can be safely read directly from the database file. + ** + ** Because a ShmBarrier() call is made between taking the copy of + ** nBackfill and checking that the wal-header in shared-memory still + ** matches the one cached in pWal->hdr, it is guaranteed that the + ** checkpointer that set nBackfill was not working with a wal-index + ** header newer than that cached in pWal->hdr. If it were, that could + ** cause a problem. The checkpointer could omit to checkpoint + ** a version of page X that lies before pWal->minFrame (call that version + ** A) on the basis that there is a newer version (version B) of the same + ** page later in the wal file. But if version B happens to like past + ** frame pWal->hdr.mxFrame - then the client would incorrectly assume + ** that it can read version A from the database file. However, since + ** we can guarantee that the checkpointer that set nBackfill could not + ** see any pages past pWal->hdr.mxFrame, this problem does not come up. */ + pWal->minFrame = pInfo->nBackfill+1; walShmBarrier(pWal); if( pInfo->aReadMark[mxI]!=mxReadMark || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); @@ -2370,10 +2386,11 @@ u32 *piRead /* OUT: Frame number (or zero) */ ){ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ + int iMinHash; /* This routine is only be called from within a read transaction. */ assert( pWal->readLock>=0 || pWal->lockError ); /* If the "last page" field of the wal-index header snapshot is 0, then @@ -2410,11 +2427,12 @@ ** ** (iFrame<=iLast): ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. */ - for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){ + iMinHash = walFramePage(pWal->minFrame); + for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){ volatile ht_slot *aHash; /* Pointer to hash table */ volatile u32 *aPgno; /* Pointer to array of page numbers */ u32 iZero; /* Frame number corresponding to aPgno[0] */ int iKey; /* Hash slot index */ int nCollide; /* Number of hash collisions remaining */ @@ -2425,11 +2443,11 @@ return rc; } nCollide = HASHTABLE_NSLOT; for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; - if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ + if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){ assert( iFrame>iRead || CORRUPT_DB ); iRead = iFrame; } if( (nCollide--)==0 ){ return SQLITE_CORRUPT_BKPT; Index: test/wal6.test ================================================================== --- test/wal6.test +++ test/wal6.test @@ -190,8 +190,50 @@ db eval {SELECT test4('3.3.2')} do_test 3.x { db2 close } {} + +#------------------------------------------------------------------------- +# Check that if a wal file has been partially checkpointed, no frames are +# read from the checkpointed part. +# +reset_db +do_execsql_test 4.1 { + PRAGMA page_size = 1024; + PRAGMA journal_mode = wal; + CREATE TABLE t1(a, b); + CREATE TABLE t2(a, b); + PRAGMA wal_checkpoint = truncate; +} {wal 0 0 0} + +do_test 4.2 { + execsql { INSERT INTO t1 VALUES(1, 2) } + file size test.db-wal +} [wal_file_size 1 1024] + +do_test 4.3 { + sqlite3 db2 test.db + execsql { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + } + execsql { PRAGMA wal_checkpoint = passive } db2 +} {0 1 1} + +do_test 4.3 { + execsql { COMMIT } + db2 close + hexio_write test.db-wal 0 [string repeat 00 2000] + sqlite3 db2 test.db +} {} + +do_test 4.4.1 { + catchsql { SELECT * FROM t1 } db2 +} {0 {1 2}} +do_test 4.4.2 { + catchsql { SELECT * FROM t2 } db2 +} {1 {database disk image is malformed}} + finish_test