Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -250,10 +250,25 @@ int sqlite3WalTrace = 0; # define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X #else # define WALTRACE(X) #endif + +/* +** WAL tracing logic added to search for an SQLITE_PROTOCOL error. +*/ +static void walTrace(const char *zFormat, ...){ + va_list ap; + char zMsg[100]; + va_start(ap, zFormat); + sqlite3_vsnprintf(sizeof(zMsg), zMsg, zFormat, ap); + va_end(ap); +#ifdef SQLITE_WAL_TRACE + fprintf(stdout, "WALTRACE: [%s]\n", zMsg); fflush(stdout); +#endif + sqlite3_log(99, "%s", zMsg); +} /* ** The maximum (and only) versions of the wal and wal-index formats ** that may be interpreted by this version of SQLite. ** @@ -509,10 +524,11 @@ int nByte = sizeof(u32*)*(iPage+1); volatile u32 **apNew; apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte); if( !apNew ){ *ppPage = 0; + walTrace("realloc(%d) in walIndexPage()", nByte); return SQLITE_NOMEM; } memset((void*)&apNew[pWal->nWiData], 0, sizeof(u32*)*(iPage+1-pWal->nWiData)); pWal->apWiData = apNew; @@ -527,10 +543,11 @@ }else{ rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] ); } + if( rc ) walTrace("xShmMap():%d in walIndexPage(iPage=%d)",rc,iPage); } *ppPage = pWal->apWiData[iPage]; assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); return rc; @@ -1858,24 +1875,28 @@ ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ pWal->writeLock = 1; + walTrace("trying walIndexTryHdr w/lock"); if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding ** a WRITE lock, it can only mean that the header is corrupted and ** needs to be reconstructed. So run recovery to do exactly that. */ + walTrace("walIndexTryHdr() failed w/lock"); rc = walIndexRecover(pWal); + if( rc ) walTrace("walIndexRecover():%d", rc); *pChanged = 1; } } pWal->writeLock = 0; walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); } + else if(badHdr) walTrace("walLockExcl():%d in walIndexReadHdr()", rc); /* If the header is read successfully, check the version number to make ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ @@ -1949,14 +1970,33 @@ int i; /* Loop counter */ int rc = SQLITE_OK; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ - /* Take steps to avoid spinning forever if there is a protocol error. */ + /* Take steps to avoid spinning forever if there is a protocol error. + ** + ** Circumstances that cause a RETRY should only last for the briefest + ** instances of time. No I/O or other system calls are done while the + ** locks are held, so the locks should not be held for very long. But + ** if we are unlucky, another process that is holding a lock might get + ** paged out or take a page-fault that is time-consuming to resolve, + ** during the few nanoseconds that it is holding the lock. In that case, + ** it might take longer than normal for the lock to free. + ** + ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few + ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this + ** is more of a scheduler yield than an actual delay. But on the 10th + ** an subsequent retries, the delays start becoming longer and longer, + ** so that on the 100th (and last) RETRY we delay for 21 milliseconds. + ** The total delay time before giving up is less than 1 second. + */ if( cnt>5 ){ + int nDelay = 1; /* Pause time in microseconds */ + walTrace("cnt=%d",cnt); if( cnt>100 ) return SQLITE_PROTOCOL; - sqlite3OsSleep(pWal->pVfs, 1); + if( cnt>=10 ) nDelay = (cnt-9)*238; /* Max delay 21ms. Total delay 996ms */ + sqlite3OsSleep(pWal->pVfs, nDelay); } if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); if( rc==SQLITE_BUSY ){ @@ -2009,16 +2049,18 @@ ** is wrapped and written for that matter) before the READ_LOCK(0) ** is obtained, that is not necessarily true. A checkpointer may ** have started to backfill the appended frames but crashed before ** it finished. Leaving a corrupt image in the database file. */ + walTrace("wal read/write race - writer won"); walUnlockShared(pWal, WAL_READ_LOCK(0)); return WAL_RETRY; } pWal->readLock = 0; return SQLITE_OK; }else if( rc!=SQLITE_BUSY ){ + walTrace("walLockShared(0):%d in walTryBeginRead", rc); return rc; } } /* If we get this far, it means that the reader will want to use @@ -2034,41 +2076,35 @@ assert( thisMark!=READMARK_NOT_USED ); mxReadMark = thisMark; mxI = i; } } - if( mxI==0 ){ - /* If we get here, it means that all of the aReadMark[] entries between - ** 1 and WAL_NREADER-1 are zero. Try to initialize aReadMark[1] to - ** be mxFrame, then retry. - */ - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1); - if( rc==SQLITE_OK ){ - pInfo->aReadMark[1] = pWal->hdr.mxFrame; - walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1); - rc = WAL_RETRY; - }else if( rc==SQLITE_BUSY ){ - rc = WAL_RETRY; - } - return rc; - }else{ - if( mxReadMark < pWal->hdr.mxFrame ){ + /* There was once an "if" here. The extra "{" is to preserve indentation. */ + { + if( mxReadMark < pWal->hdr.mxFrame || mxI==0 ){ for(i=1; iaReadMark[i] = pWal->hdr.mxFrame; mxI = i; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); break; }else if( rc!=SQLITE_BUSY ){ + walTrace("walLockExclusive(%d):%d", i, rc); return rc; } } } + if( mxI==0 ){ + assert( rc==SQLITE_BUSY ); + walTrace("all readlocks busy: cannot set read mark"); + return WAL_RETRY; + } rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); if( rc ){ + walTrace("walLockShared(mxI=%d):%d", mxI, rc); return rc==SQLITE_BUSY ? WAL_RETRY : rc; } /* Now that the read-lock has been obtained, check that neither the ** value in the aReadMark[] array or the contents of the wal-index ** header have changed. @@ -2439,10 +2475,12 @@ if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ + u32 salt1; + sqlite3_randomness(4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL), then the transactions ** frames will overwrite the start of the existing log. Update the @@ -2456,11 +2494,11 @@ int i; /* Loop counter */ u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ pWal->nCkpt++; pWal->hdr.mxFrame = 0; sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); - sqlite3_randomness(4, &aSalt[1]); + aSalt[1] = salt1; walIndexWriteHdr(pWal); pInfo->nBackfill = 0; for(i=1; iaReadMark[i] = READMARK_NOT_USED; assert( pInfo->aReadMark[0]==0 ); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);