Index: src/pcache.h ================================================================== --- src/pcache.h +++ src/pcache.h @@ -52,10 +52,12 @@ #define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before ** writing this page to the database */ #define PGHDR_NEED_READ 0x010 /* Content is unread */ #define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ #define PGHDR_MMAP 0x040 /* This is an mmap page object */ + +#define PGHDR_WAL_APPEND 0x080 /* Appended to wal file */ /* Initialize and shutdown the page cache subsystem */ int sqlite3PcacheInitialize(void); void sqlite3PcacheShutdown(void); Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -443,10 +443,11 @@ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ + u32 iReCksum; /* On commit, recalculate checksums from here */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ #endif @@ -696,18 +697,20 @@ int nativeCksum; /* True for native byte-order checksums */ u32 *aCksum = pWal->hdr.aFrameCksum; assert( WAL_FRAME_HDRSIZE==24 ); sqlite3Put4byte(&aFrame[0], iPage); sqlite3Put4byte(&aFrame[4], nTruncate); - memcpy(&aFrame[8], pWal->hdr.aSalt, 8); - - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - - sqlite3Put4byte(&aFrame[16], aCksum[0]); - sqlite3Put4byte(&aFrame[20], aCksum[1]); + if( pWal->iReCksum==0 ){ + memcpy(&aFrame[8], pWal->hdr.aSalt, 8); + + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + + sqlite3Put4byte(&aFrame[16], aCksum[0]); + sqlite3Put4byte(&aFrame[20], aCksum[1]); + } } /* ** Check to see if the frame with header in aFrame[] and content ** in aData[] is valid. If it is a valid frame, fill *piPage and @@ -2630,10 +2633,11 @@ int rc; /* Cannot start a write transaction without first holding a read ** transaction. */ assert( pWal->readLock>=0 ); + assert( pWal->writeLock==0 && pWal->iReCksum==0 ); if( pWal->readOnly ){ return SQLITE_READONLY; } @@ -2665,10 +2669,11 @@ */ int sqlite3WalEndWriteTransaction(Wal *pWal){ if( pWal->writeLock ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; + pWal->iReCksum = 0; pWal->truncateOnCommit = 0; } return SQLITE_OK; } @@ -2882,10 +2887,63 @@ if( rc ) return rc; /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); return rc; } + +/* +** This function is called as part of committing a transaction within which +** one or more frames have been overwritten. It updates the checksums for +** all frames written to the wal file by the current transaction starting +** with the earliest to have been overwritten. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int walRewriteChecksums(Wal *pWal, u32 iLast){ + const int szPage = pWal->szPage;/* Database page size */ + int rc = SQLITE_OK; /* Return code */ + u8 *aBuf; /* Buffer to load data from wal file into */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ + u32 iRead; /* Next frame to read from wal file */ + i64 iCksumOff; + + aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); + if( aBuf==0 ) return SQLITE_NOMEM; + + /* Find the checksum values to use as input for the recalculating the + ** first checksum. If the first frame is frame 1 (implying that the current + ** transaction restarted the wal file), these values must be read from the + ** wal-file header. Otherwise, read them from the frame header of the + ** previous frame. */ + assert( pWal->iReCksum>0 ); + if( pWal->iReCksum==1 ){ + iCksumOff = 24; + }else{ + iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; + } + rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); + pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); + pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); + + iRead = pWal->iReCksum; + pWal->iReCksum = 0; + for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ + i64 iOff = walFrameOffset(iRead, szPage); + rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); + if( rc==SQLITE_OK ){ + u32 iPgno, nDbSize; + iPgno = sqlite3Get4byte(aBuf); + nDbSize = sqlite3Get4byte(&aBuf[4]); + + walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); + rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); + } + } + + sqlite3_free(aBuf); + return rc; +} /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). */ @@ -2903,10 +2961,12 @@ PgHdr *pLast = 0; /* Last frame in list */ int nExtra = 0; /* Number of extra copies of last page */ int szFrame; /* The size of a single frame */ i64 iOffset; /* Next byte to write in WAL file */ WalWriter w; /* The writer */ + u32 iFirst = 0; /* First frame that may be overwritten */ + WalIndexHdr *pLive; /* Pointer to shared header */ assert( pList ); assert( pWal->writeLock ); /* If this frame set completes a transaction, then nTruncate>0. If @@ -2917,10 +2977,15 @@ { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); } #endif + + pLive = (WalIndexHdr*)walIndexHdr(pWal); + if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){ + iFirst = pLive->mxFrame+1; + } /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ @@ -2982,17 +3047,45 @@ szFrame = szPage + WAL_FRAME_HDRSIZE; /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ + + /* Check if this page has already been written into the wal file by + ** the current transaction. If so, overwrite the existing frame and + ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that + ** checksums must be recomputed when the transaction is committed. */ + if( iFirst && (p->pDirty || isCommit==0) ){ + u32 iWrite = 0; + VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite); + assert( rc==SQLITE_OK || iWrite==0 ); + if( iWrite>=iFirst ){ + i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; + if( pWal->iReCksum==0 || iWriteiReCksum ){ + pWal->iReCksum = iWrite; + } + rc = sqlite3OsWrite(pWal->pWalFd, p->pData, szPage, iOff); + if( rc ) return rc; + p->flags &= ~PGHDR_WAL_APPEND; + continue; + } + } + iFrame++; assert( iOffset==walFrameOffset(iFrame, szPage) ); nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; rc = walWriteOneFrame(&w, p, nDbSize, iOffset); if( rc ) return rc; pLast = p; iOffset += szFrame; + p->flags |= PGHDR_WAL_APPEND; + } + + /* Recalculate checksums within the wal file if required. */ + if( isCommit && pWal->iReCksum ){ + rc = walRewriteChecksums(pWal, iFrame); + if( rc ) return rc; } /* If this is the end of a transaction, then we might need to pad ** the transaction and/or sync the WAL file. ** @@ -3040,10 +3133,11 @@ ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. */ iFrame = pWal->hdr.mxFrame; for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ + if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } while( rc==SQLITE_OK && nExtra>0 ){ iFrame++; @@ -3152,10 +3246,11 @@ } } /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ + if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); } Index: test/wal.test ================================================================== --- test/wal.test +++ test/wal.test @@ -710,29 +710,36 @@ } } {16 ok} do_test wal-11.6 { execsql COMMIT list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 3 [wal_file_size 41 1024]] +} [list 3 [wal_file_size 40 1024]] do_test wal-11.7 { execsql { SELECT count(*) FROM t1; PRAGMA integrity_check; } } {16 ok} do_test wal-11.8 { execsql { PRAGMA wal_checkpoint } list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 37 [wal_file_size 41 1024]] +} [list 37 [wal_file_size 40 1024]] do_test wal-11.9 { db close list [expr [file size test.db]/1024] [log_deleted test.db-wal] } {37 1} sqlite3_wal db test.db -set nWal 39 -if {[permutation]!="mmap"} {set nWal 37} -ifcapable !mmap {set nWal 37} + +# After adding the capability of WAL to overwrite prior uncommitted +# frame in the WAL-file with revised content, the size of the WAL file +# following cache-spill is smaller. +# +#set nWal 39 +#if {[permutation]!="mmap"} {set nWal 37} +#ifcapable !mmap {set nWal 37} +set nWal 34 + do_test wal-11.10 { execsql { PRAGMA cache_size = 10; BEGIN; INSERT INTO t1 SELECT blob(900) FROM t1; -- 32 ADDED test/waloverwrite.test Index: test/waloverwrite.test ================================================================== --- /dev/null +++ test/waloverwrite.test @@ -0,0 +1,164 @@ +# 2010 May 5 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/wal_common.tcl +set testprefix waloverwrite + +ifcapable !wal {finish_test ; return } + +# Simple test: +# +# Test cases *.1 - *.6: +# +# + Create a database of blobs roughly 50 pages in size. +# +# + Set the db cache size to something much smaller than this (5 pages) +# +# + Within a transaction, loop through the set of blobs 5 times. Update +# each blob as it is visited. +# +# + Test that the wal file is roughly 50 pages in size - even though many +# database pages have been written to it multiple times. +# +# + Take a copy of the database and wal file. Test that recovery can +# be run on it. +# +# Test cases *.7 - *.9: +# +# + Same thing, but before committing the statement transaction open +# a SAVEPOINT, update the blobs another 5 times, then roll it back. +# +# + Check that if recovery is run on the resulting wal file, the rolled +# back changes from within the SAVEPOINT are not present in the db. +# +# The above is run twice - once where the wal file is empty at the start of +# step 3 (tn==1) and once where it already contains a transaction (tn==2). +# +foreach {tn xtra} { + 1 {} + 2 { UPDATE t1 SET y = randomblob(799) WHERE x=4 } +} { + reset_db + do_execsql_test 1.$tn.0 { + CREATE TABLE t1(x, y); + CREATE TABLE t2(x, y); + CREATE INDEX i1y ON t1(y); + + WITH cnt(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<20 + ) + INSERT INTO t1 SELECT i, randomblob(800) FROM cnt; + } {} + + do_test 1.$tn.1 { + set nPg [db one { PRAGMA page_count } ] + expr $nPg>40 && $nPg<50 + } {1} + + do_test 1.$tn.2 { + db close + sqlite3 db test.db + + execsql {PRAGMA journal_mode = wal} + execsql {PRAGMA cache_size = 5} + execsql $xtra + + db transaction { + for {set i 0} {$i < 5} {incr i} { + foreach x [db eval {SELECT x FROM t1}] { + execsql { UPDATE t1 SET y = randomblob(799) WHERE x=$x } + } + } + } + + set nPg [wal_frame_count test.db-wal 1024] + expr $nPg>40 && $nPg<60 + } {1} + + do_execsql_test 1.$tn.3 { PRAGMA integrity_check } ok + + do_test 1.$tn.4 { + forcedelete test.db2 test.db2-wal + forcecopy test.db test.db2 + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*800] + + do_test 1.$tn.5 { + db2 close + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*799] + + do_test 1.$tn.6 { + execsql { PRAGMA integrity_check } db2 + } ok + db2 close + + do_test 1.$tn.7 { + execsql { PRAGMA wal_checkpoint } + db transaction { + for {set i 0} {$i < 1} {incr i} { + foreach x [db eval {SELECT x FROM t1}] { + execsql { UPDATE t1 SET y = randomblob(798) WHERE x=$x } + } + } + + execsql { + WITH cnt(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<20) + INSERT INTO t2 SELECT i, randomblob(800) FROM cnt; + } + + execsql {SAVEPOINT abc} + for {set i 0} {$i < 5} {incr i} { + foreach x [db eval {SELECT x FROM t1}] { + execsql { UPDATE t1 SET y = randomblob(797) WHERE x=$x } + } + } + breakpoint + execsql {ROLLBACK TO abc} + + } + + set nPg [wal_frame_count test.db-wal 1024] + expr $nPg>55 && $nPg<75 + } {1} + + do_test 1.$tn.8 { + forcedelete test.db2 test.db2-wal + forcecopy test.db test.db2 + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*799] + + do_test 1.$tn.9 { + db2 close + forcecopy test.db-wal test.db2-wal + sqlite3 db2 test.db2 + execsql { SELECT sum(length(y)) FROM t1 } db2 + } [expr 20*798] + + do_test 1.$tn.9 { + execsql { PRAGMA integrity_check } db2 + } ok + db2 close +} + +finish_test +