Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -67,10 +67,11 @@ memjournal.o \ mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \ notify.o opcodes.o os.o os_unix.o os_win.o \ pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \ random.o resolve.o rowset.o rtree.o select.o sqlite3rbu.o status.o \ + server.o \ table.o threads.o tokenize.o treeview.o trigger.o \ update.o userauth.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \ utf.o vtab.o @@ -142,10 +143,12 @@ $(TOP)/src/printf.c \ $(TOP)/src/random.c \ $(TOP)/src/resolve.c \ $(TOP)/src/rowset.c \ $(TOP)/src/select.c \ + $(TOP)/src/server.c \ + $(TOP)/src/server.h \ $(TOP)/src/status.c \ $(TOP)/src/shell.c \ $(TOP)/src/sqlite.h.in \ $(TOP)/src/sqlite3ext.h \ $(TOP)/src/sqliteInt.h \ Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -5614,10 +5614,259 @@ } pCur->ix--; return SQLITE_OK; } +#ifdef SQLITE_SERVER_EDITION + +#define SERVER_DEFAULT_FREELISTS 16 +#define SERVER_DEFAULT_FREELIST_SIZE 128 + +/* +** Allocate the free-node and the first SERVER_DEFAULT_FREELISTS +** trunk pages. +*/ +static int allocateServerFreenode(BtShared *pBt){ + int rc; + MemPage *pPage1 = pBt->pPage1; + + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc==SQLITE_OK ){ + Pgno pgnoNode = (++pBt->nPage); + MemPage *pNode = 0; + int i; + + put4byte(&pPage1->aData[32], pgnoNode); + rc = btreeGetUnusedPage(pBt, pgnoNode, &pNode, PAGER_GET_NOCONTENT); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pNode->pDbPage); + } + if( rc==SQLITE_OK ){ + put4byte(&pNode->aData[0], 0); + put4byte(&pNode->aData[4], SERVER_DEFAULT_FREELISTS); + } + for(i=0; rc==SQLITE_OK && inPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + pgnoTrunk = pBt->nPage; + + rc = btreeGetUnusedPage(pBt, pgnoTrunk, &pTrunk, PAGER_GET_NOCONTENT); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pTrunk->pDbPage); + } + if( rc==SQLITE_OK ){ + memset(pTrunk->aData, 0, 8); + put4byte(&pNode->aData[8+i*4], pgnoTrunk); + } + releasePage(pTrunk); + } + releasePage(pNode); + } + + return rc; +} + +/* +** Return a reference to the first trunk page in one of the database free-lists. +** Allocate the database free-lists if required. +*/ +static int findServerTrunk(BtShared *pBt, int bAlloc, MemPage **ppTrunk){ + MemPage *pPage1 = pBt->pPage1; + MemPage *pNode = 0; /* The node page */ + MemPage *pTrunk = 0; /* The returned page */ + Pgno iNode; /* Page number of node page */ + int rc = SQLITE_OK; + + /* If the node page and free-list trunks have not yet been allocated, allocate + ** them now. */ + pPage1 = pBt->pPage1; + iNode = get4byte(&pPage1->aData[32]); + if( iNode==0 ){ + rc = allocateServerFreenode(pBt); + iNode = get4byte(&pPage1->aData[32]); + } + + /* Grab the node page */ + if( rc==SQLITE_OK ){ + rc = btreeGetUnusedPage(pBt, iNode, &pNode, 0); + } + if( rc==SQLITE_OK ){ + int nList; /* Number of free-lists in this db */ + int i; + + /* Try to lock a free-list trunk. If bAlloc is true, it has to be a + ** free-list trunk with at least one entry in the free-list. */ + nList = (int)get4byte(&pNode->aData[4]); + for(i=0; iaData[8+i*4]); + if( SQLITE_OK==sqlite3PagerPagelock(pBt->pPager, iTrunk, 1) ){ + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); + if( rc==SQLITE_OK && bAlloc ){ + if( !get4byte(&pTrunk->aData[0]) && !get4byte(&pTrunk->aData[4]) ){ + releasePage(pTrunk); + pTrunk = 0; + } + } + if( rc!=SQLITE_OK || pTrunk ) break; + } + } + + /* No free pages in any free-list. Or perhaps we were locked out. In + ** either case, try to allocate more from the end of the file now. */ + if( i==nList ){ + assert( rc==SQLITE_OK && pTrunk==0 ); + rc = sqlite3PagerWrite(pPage1->pDbPage); + for(i=0; rc==SQLITE_OK && iaData[8+i*4]); + rc = btreeGetUnusedPage(pBt, iTrunk, &pT, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pT->pDbPage); + } + if( rc==SQLITE_OK ){ + int iPg = get4byte(&pT->aData[4]); + for(/*no-op*/; iPgnPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + put4byte(&pT->aData[8+iPg*4], pBt->nPage); + } + put4byte(&pT->aData[4], iPg); + if( pTrunk==0 ){ + pTrunk = pT; + pT = 0; + } + } + releasePage(pT); + } + if( rc==SQLITE_OK ){ + MemPage *pLast = 0; + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pLast, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pLast->pDbPage); + releasePage(pLast); + put4byte(28 + (u8*)pPage1->aData, pBt->nPage); + } + } + } + } + + releasePage(pNode); + if( rc==SQLITE_OK ){ + assert( pTrunk ); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + } + if( rc!=SQLITE_OK ){ + releasePage(pTrunk); + pTrunk = 0; + } + *ppTrunk = pTrunk; + return rc; +} + +static int allocateServerPage( + BtShared *pBt, /* The btree */ + MemPage **ppPage, /* Store pointer to the allocated page here */ + Pgno *pPgno, /* Store the page number here */ + Pgno nearby, /* Search for a page near this one */ + u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ +){ + int rc; /* Return code */ + MemPage *pTrunk = 0; /* The node page */ + Pgno pgnoNew = 0; + +#ifdef SQLITE_DEBUG + int nRef = sqlite3PagerRefcount(pBt->pPager); +#endif + + assert( eMode==BTALLOC_ANY ); + assert( sqlite3_mutex_held(pBt->mutex) ); + + *ppPage = 0; + rc = findServerTrunk(pBt, 1, &pTrunk); + if( rc==SQLITE_OK ){ + int nFree; /* Number of free pages on this trunk page */ + nFree = (int)get4byte(&pTrunk->aData[4]); + if( nFree==0 ){ + pgnoNew = get4byte(&pTrunk->aData[0]); + assert( pgnoNew ); + }else{ + nFree--; + pgnoNew = get4byte(&pTrunk->aData[8+4*nFree]); + put4byte(&pTrunk->aData[4], (u32)nFree); + releasePage(pTrunk); + pTrunk = 0; + } + } + + if( rc==SQLITE_OK ){ + MemPage *pNew = 0; + int flags = pTrunk ? 0 : PAGER_GET_NOCONTENT; + rc = btreeGetUnusedPage(pBt, pgnoNew, &pNew, flags); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pNew->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pNew); + pNew = 0; + } + } + if( rc==SQLITE_OK && pTrunk ){ + memcpy(pTrunk->aData, pNew->aData, pBt->usableSize); + } + *ppPage = pNew; + *pPgno = pgnoNew; + } + + releasePage(pTrunk); + assert( (rc==SQLITE_OK)==(*ppPage!=0) ); + assert( sqlite3PagerRefcount(pBt->pPager)==(nRef+(*ppPage!=0)) ); + return rc; +} + +static int freeServerPage2(BtShared *pBt, MemPage *pPage, Pgno iPage){ + int rc; /* Return code */ + MemPage *pTrunk = 0; /* The node page */ +#ifdef SQLITE_DEBUG + int nRef = sqlite3PagerRefcount(pBt->pPager); +#endif + + assert( sqlite3_mutex_held(pBt->mutex) ); + rc = findServerTrunk(pBt, 0, &pTrunk); + if( rc==SQLITE_OK ){ + int nFree; /* Number of free pages on this trunk page */ + nFree = (int)get4byte(&pTrunk->aData[4]); + if( nFree>=((pBt->usableSize / 4) - 2) ){ + if( pPage==0 ){ + rc = btreeGetUnusedPage(pBt, iPage, &pPage, 0); + }else{ + sqlite3PagerRef(pPage->pDbPage); + } + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc==SQLITE_OK ){ + memcpy(pPage->aData, pTrunk->aData, pBt->usableSize); + put4byte(&pTrunk->aData[0], iPage); + put4byte(&pTrunk->aData[4], 0); + } + releasePage(pPage); + }else{ + put4byte(&pTrunk->aData[8+nFree*4], iPage); + put4byte(&pTrunk->aData[4], (u32)nFree+1); + } + releasePage(pTrunk); + } + + assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); + return rc; +} + +#else +# define allocateServerPage(v, w, x, y, z) SQLITE_OK +# define freeServerPage2(x, y, z) SQLITE_OK +#endif /* SQLITE_SERVER_EDITION */ + /* ** Allocate a new page from the database file. ** ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() ** has already been called on the new page.) The new page has also @@ -5650,10 +5899,14 @@ u32 n; /* Number of pages on the freelist */ u32 k; /* Number of leaves on the trunk of the freelist */ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ + + if( sqlite3PagerIsServer(pBt->pPager) ){ + return allocateServerPage(pBt, ppPage, pPgno, nearby, eMode); + } assert( sqlite3_mutex_held(pBt->mutex) ); assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); @@ -5978,16 +6231,10 @@ sqlite3PagerRef(pPage->pDbPage); }else{ pPage = btreePageLookup(pBt, iPage); } - /* Increment the free page count on pPage1 */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) goto freepage_out; - nFree = get4byte(&pPage1->aData[36]); - put4byte(&pPage1->aData[36], nFree+1); - if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) @@ -5995,10 +6242,21 @@ ){ goto freepage_out; } memset(pPage->aData, 0, pPage->pBt->pageSize); } + + if( sqlite3PagerIsServer(pBt->pPager) ){ + rc = freeServerPage2(pBt, pPage, iPage); + goto freepage_out; + } + + /* Increment the free page count on pPage1 */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) goto freepage_out; + nFree = get4byte(&pPage1->aData[36]); + put4byte(&pPage1->aData[36], nFree+1); /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. */ if( ISAUTOVACUUM ){ @@ -9442,10 +9700,53 @@ return depth+1; } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK + +#if !defined(SQLITE_OMIT_INTEGRITY_CHECK) && defined(SQLITE_SERVER_EDITION) +static void checkServerList(IntegrityCk *pCheck){ + u32 pgnoNode = get4byte(&pCheck->pBt->pPage1->aData[32]); + if( pgnoNode ){ + DbPage *pNode = 0; + u8 *aNodeData; + u32 nList; /* Number of free-lists */ + int i; + + checkRef(pCheck, pgnoNode); + if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoNode, &pNode, 0) ){ + checkAppendMsg(pCheck, "failed to get node page %d", pgnoNode); + return; + } + aNodeData = sqlite3PagerGetData(pNode); + nList = get4byte(&aNodeData[4]); + for(i=0; ipPager, (Pgno)pgnoTrunk, &pTrunk, 0) ){ + checkAppendMsg(pCheck, "failed to get page %d", pgnoTrunk); + pgnoTrunk = 0; + }else{ + u8 *aTrunkData = sqlite3PagerGetData(pTrunk); + int nLeaf = (int)get4byte(&aTrunkData[4]); + int iLeaf; + for(iLeaf=0; iLeafpPage1->aData[32]), - get4byte(&pBt->pPage1->aData[36])); +#ifdef SQLITE_SERVER_EDITION + if( sqlite3PagerIsServer(pBt->pPager) ){ + checkServerList(&sCheck); + }else +#endif + { + checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), + get4byte(&pBt->pPage1->aData[36])); + } sCheck.zPfx = 0; /* Check all the tables. */ testcase( pBt->db->flags & SQLITE_CellSizeCk ); Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -704,10 +704,13 @@ PCache *pPCache; /* Pointer to page cache object */ #ifndef SQLITE_OMIT_WAL Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ char *zWal; /* File name for write-ahead log */ #endif +#ifdef SQLITE_SERVER_EDITION + Server *pServer; +#endif }; /* ** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains ** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS @@ -833,10 +836,16 @@ # define pagerRollbackWal(x) 0 # define pagerWalFrames(v,w,x,y) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK # define pagerBeginReadTransaction(z) SQLITE_OK #endif + +#ifdef SQLITE_SERVER_EDITION +# define pagerIsServer(x) ((x)->pServer!=0) +#else +# define pagerIsServer(x) 0 +#endif #ifndef NDEBUG /* ** Usage: ** @@ -1130,10 +1139,11 @@ int rc = SQLITE_OK; assert( !pPager->exclusiveMode || pPager->eLock==eLock ); assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); + assert( eLock!=NO_LOCK || pagerIsServer(pPager)==0 ); if( isOpen(pPager->fd) ){ assert( pPager->eLock>=eLock ); rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); if( pPager->eLock!=UNKNOWN_LOCK ){ pPager->eLock = (u8)eLock; @@ -1805,10 +1815,16 @@ sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; releaseAllSavepoints(pPager); +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + sqlite3ServerEnd(pPager->pServer); + pPager->eState = PAGER_OPEN; + }else +#endif if( pagerUseWal(pPager) ){ assert( !isOpen(pPager->jfd) ); sqlite3WalEndReadTransaction(pPager->pWal); pPager->eState = PAGER_OPEN; }else if( !pPager->exclusiveMode ){ @@ -2103,10 +2119,15 @@ if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){ rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0); if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; } +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + rc2 = sqlite3ServerReleaseWriteLocks(pPager->pServer); + }else +#endif if( !pPager->exclusiveMode && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) ){ rc2 = pagerUnlockDb(pPager, SHARED_LOCK); pPager->changeCountDone = 0; @@ -4093,14 +4114,28 @@ ** database and close the journal file without attempting to roll it ** back or finalize it. The next database user will have to do hot-journal ** rollback before accessing the database file. */ if( isOpen(pPager->jfd) ){ + if( pagerIsServer(pPager) ){ + assert( pPager->journalMode==PAGER_JOURNALMODE_PERSIST ); + pPager->journalMode = PAGER_JOURNALMODE_DELETE; + /* If necessary, change the pager state so that the journal file + ** is deleted by the call to pagerUnlockAndRollback() below. */ + if( pPager->eState==PAGER_OPEN ) pPager->eState = PAGER_READER; + } pager_error(pPager, pagerSyncHotJournal(pPager)); } pagerUnlockAndRollback(pPager); } +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + sqlite3ServerDisconnect(pPager->pServer, pPager->fd); + pPager->pServer = 0; + sqlite3_free(pPager->zJournal); + } +#endif sqlite3EndBenignMalloc(); enable_simulated_io_errors(); PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); IOTRACE(("CLOSE %p\n", pPager)) sqlite3OsClose(pPager->jfd); @@ -5048,10 +5083,84 @@ } } return rc; } + +#ifdef SQLITE_SERVER_EDITION +static int pagerServerConnect(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->tempFile==0 ){ + int iClient = 0; + pPager->noLock = 1; + pPager->journalMode = PAGER_JOURNALMODE_PERSIST; + rc = sqlite3ServerConnect(pPager, &pPager->pServer, &iClient); + if( rc==SQLITE_OK ){ + pPager->zJournal = sqlite3_mprintf( + "%s-journal%d", pPager->zFilename, iClient + ); + if( pPager->zJournal==0 ){ + rc = SQLITE_NOMEM_BKPT; + } + } + } + return rc; +} + +int sqlite3PagerRollbackJournal(Pager *pPager, int iClient){ + int rc; + char *zJrnl = sqlite3_mprintf("%s-journal%d", pPager->zFilename, iClient); + + if( zJrnl ){ + int bExists = 0; + sqlite3_file *jfd = 0; + sqlite3_vfs * const pVfs = pPager->pVfs; + + rc = sqlite3OsAccess(pVfs, zJrnl, SQLITE_ACCESS_EXISTS, &bExists); + if( rc==SQLITE_OK && bExists ){ + int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; + rc = sqlite3OsOpenMalloc(pVfs, zJrnl, &jfd, flags, &flags); + } + assert( rc==SQLITE_OK || jfd==0 ); + if( jfd ){ + sqlite3_file *saved_jfd = pPager->jfd; + u8 saved_eState = pPager->eState; + u8 saved_eLock = pPager->eLock; + i64 saved_journalOff = pPager->journalOff; + i64 saved_journalHdr = pPager->journalHdr; + char *saved_zJournal = pPager->zJournal; + + pPager->eLock = EXCLUSIVE_LOCK; + pPager->eState = PAGER_WRITER_DBMOD; + pPager->jfd = jfd; + rc = pagerSyncHotJournal(pPager); + if( rc==SQLITE_OK ) rc = pager_playback(pPager, 1); + + pPager->jfd = saved_jfd; + pPager->eState = saved_eState; + pPager->eLock = saved_eLock; + pPager->journalOff = saved_journalOff; + pPager->journalHdr = saved_journalHdr; + pPager->zJournal = saved_zJournal; + + sqlite3OsCloseFree(jfd); + if( rc==SQLITE_OK ){ + rc = sqlite3OsDelete(pVfs, zJrnl, 0); + } + } + sqlite3_free(zJrnl); + }else{ + rc = SQLITE_NOMEM_BKPT; + } + + return rc; +} + +#else +# define pagerServerConnect(pPager) SQLITE_OK +#endif + /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerGet() until after this function ** has been successfully called. If a shared-lock is already held when @@ -5088,11 +5197,13 @@ assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); assert( assert_pager_state(pPager) ); assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); assert( pPager->errCode==SQLITE_OK ); - if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ + if( !pagerUseWal(pPager) + && !pagerIsServer(pPager) + && pPager->eState==PAGER_OPEN ){ int bHotJournal = 1; /* True if there exists a hot journal-file */ assert( !MEMDB ); assert( pPager->tempFile==0 || pPager->eLock==EXCLUSIVE_LOCK ); @@ -5259,21 +5370,32 @@ if( USEFETCH(pPager) ){ sqlite3OsUnfetch(pPager->fd, 0, 0); } } } + + rc = pagerServerConnect(pPager); /* If there is a WAL file in the file-system, open this database in WAL ** mode. Otherwise, the following function call is a no-op. */ - rc = pagerOpenWalIfPresent(pPager); + if( rc==SQLITE_OK ){ + rc = pagerOpenWalIfPresent(pPager); + } #ifndef SQLITE_OMIT_WAL assert( pPager->pWal==0 || rc==SQLITE_OK ); #endif } - if( pagerUseWal(pPager) ){ +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + assert( rc==SQLITE_OK ); + pager_reset(pPager); + rc = sqlite3ServerBegin(pPager->pServer); + } +#endif + if( rc==SQLITE_OK && pagerUseWal(pPager) ){ assert( rc==SQLITE_OK ); rc = pagerBeginReadTransaction(pPager); } if( pPager->tempFile==0 && pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ @@ -5562,10 +5684,16 @@ Pager *pPager, /* The pager open on the database file */ Pgno pgno, /* Page number to fetch */ DbPage **ppPage, /* Write a pointer to the page here */ int flags /* PAGER_GET_XXX flags */ ){ +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + int rc = sqlite3ServerLock(pPager->pServer, pgno, 0, 0); + if( rc!=SQLITE_OK ) return rc; + } +#endif return pPager->xGet(pPager, pgno, ppPage, flags); } /* ** Acquire a page if it is already in the in-memory cache. Do @@ -5863,10 +5991,17 @@ ); assert( assert_pager_state(pPager) ); assert( pPager->errCode==0 ); assert( pPager->readOnly==0 ); CHECK_PAGE(pPg); + +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + rc = sqlite3ServerLock(pPager->pServer, pPg->pgno, 1, 0); + if( rc!=SQLITE_OK ) return rc; + } +#endif /* The journal file needs to be opened. Higher level routines have already ** obtained the necessary locks to begin the write-transaction, but the ** rollback journal might not yet be open. Open it now if this is the case. ** @@ -6142,11 +6277,14 @@ UNUSED_PARAMETER(isDirectMode); #else # define DIRECT_MODE isDirectMode #endif - if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ + if( 0==pagerIsServer(pPager) + && !pPager->changeCountDone + && ALWAYS(pPager->dbSize>0) + ){ PgHdr *pPgHdr; /* Reference to page 1 */ assert( !pPager->tempFile && isOpen(pPager->fd) ); /* Open page 1 of the file for writing. */ @@ -6301,10 +6439,14 @@ /* If this is an in-memory db, or no pages have been written to, or this ** function has already been called, it is mostly a no-op. However, any ** backup in progress needs to be restarted. */ sqlite3BackupRestart(pPager->pBackup); }else{ + /* If this connection is in server mode, ignore any master journal. */ + if( pagerIsServer(pPager) ){ + zMaster = 0; + } if( pagerUseWal(pPager) ){ PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); PgHdr *pPageOne = 0; if( pList==0 ){ /* Must have at least one page for the WAL commit flag. @@ -7311,11 +7453,11 @@ ** Return true if the underlying VFS for the given pager supports the ** primitives necessary for write-ahead logging. */ int sqlite3PagerWalSupported(Pager *pPager){ const sqlite3_io_methods *pMethods = pPager->fd->pMethods; - if( pPager->noLock ) return 0; + if( pPager->noLock && !pagerIsServer(pPager) ) return 0; return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap); } /* ** Attempt to take an exclusive lock on the database file. If a PENDING lock @@ -7406,10 +7548,13 @@ rc = pagerOpenWal(pPager); if( rc==SQLITE_OK ){ pPager->journalMode = PAGER_JOURNALMODE_WAL; pPager->eState = PAGER_OPEN; +#ifdef SQLITE_SERVER_EDITION + sqlite3WalServer(pPager->pWal, pPager->pServer); +#endif } }else{ *pbOpen = 1; } @@ -7518,7 +7663,16 @@ int sqlite3PagerWalFramesize(Pager *pPager){ assert( pPager->eState>=PAGER_READER ); return sqlite3WalFramesize(pPager->pWal); } #endif + +#ifdef SQLITE_SERVER_EDITION +int sqlite3PagerIsServer(Pager *pPager){ + return pagerIsServer(pPager); +} +int sqlite3PagerPagelock(Pager *pPager, Pgno pgno, int bWrite){ + return sqlite3ServerLock(pPager->pServer, pgno, bWrite, 0); +} +#endif #endif /* SQLITE_OMIT_DISKIO */ Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -233,7 +233,13 @@ void enable_simulated_io_errors(void); #else # define disable_simulated_io_errors() # define enable_simulated_io_errors() #endif + +#ifdef SQLITE_SERVER_EDITION + int sqlite3PagerRollbackJournal(Pager*, int); + int sqlite3PagerIsServer(Pager *pPager); + int sqlite3PagerPagelock(Pager *pPager, Pgno, int); +#endif #endif /* SQLITE_PAGER_H */ ADDED src/server.c Index: src/server.c ================================================================== --- /dev/null +++ src/server.c @@ -0,0 +1,601 @@ +/* +** 2017 April 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#include "sqliteInt.h" + +/* +** HMA file layout: +** +** 4 bytes - DMS slot. All connections read-lock this slot. +** +** 16*4 bytes - locking slots. Connections hold a read-lock on a locking slot +** when they are connected, a write lock when they have an open +** transaction. +** +** N*4 bytes - Page locking slots. N is HMA_PAGELOCK_SLOTS. +** +** Page-locking slot format: +** +** Each page-locking slot provides SHARED/RESERVED/EXCLUSIVE locks on a +** single page. A RESERVED lock is similar to a RESERVED in SQLite's +** rollback mode - existing SHARED locks may continue but new SHARED locks +** may not be established. As in rollback mode, EXCLUSIVE and RESERVED +** locks are mutually exclusive. +** +** Each 32-bit locking slot is divided into two sections - a bitmask for +** read-locks and a single integer field for the write lock. The bitmask +** occupies the least-significant 27 bits of the slot. The integer field +** occupies the remaining 5 bits (so that it can store values from 0-31). +** +** Each client has a unique integer client id. Currently these range from +** 0-15 (maximum of 16 concurrent connections). The page-locking slot format +** allows this to be increased to 0-26 (maximum of 26 connections). To +** take a SHARED lock, the corresponding bit is set in the locking slot +** bitmask: +** +** slot = slot | (1 << iClient); +** +** To take an EXCLUSIVE or RESERVED lock, the integer part of the locking +** slot is set to the client-id of the locker plus one (a value of zero +** indicates that no connection holds a RESERVED or EXCLUSIVE lock): +** +** slot = slot | ((iClient+1) << 27) +*/ + +#ifdef SQLITE_SERVER_EDITION + +#define HMA_CLIENT_SLOTS 16 +#define HMA_PAGELOCK_SLOTS (256*1024) + +#define HMA_FILE_SIZE (4 + 4*HMA_CLIENT_SLOTS + 4*HMA_PAGELOCK_SLOTS) + +#include "unistd.h" +#include "fcntl.h" +#include "sys/mman.h" +#include "sys/types.h" +#include "sys/stat.h" +#include "errno.h" + +typedef struct ServerHMA ServerHMA; + +struct ServerGlobal { + ServerHMA *pHma; /* Linked list of all ServerHMA objects */ +}; +static struct ServerGlobal g_server; + +/* +** There is one instance of the following structure for each distinct +** HMA file opened by clients within this process. +*/ +struct ServerHMA { + char *zName; /* hma file path */ + int fd; /* Fd open on hma file */ + int nClient; /* Current number of clients */ + Server *aClient[HMA_CLIENT_SLOTS]; /* Local (this process) clients */ + u32 *aMap; /* MMapped hma file */ + ServerHMA *pNext; /* Next HMA in this process */ + + dev_t st_dev; + ino_t st_ino; +}; + +struct Server { + ServerHMA *pHma; /* Hma file object */ + int iClient; /* Client id */ + Pager *pPager; /* Associated pager object */ + i64 nUsWrite; /* Cumulative us holding WRITER lock */ + i64 iUsWrite; /* Time WRITER lock was taken */ + int nAlloc; /* Allocated size of aLock[] array */ + int nLock; /* Number of entries in aLock[] */ + u32 *aLock; /* Mapped lock file */ +}; + +#define SERVER_WRITE_LOCK 3 +#define SERVER_READ_LOCK 2 +#define SERVER_NO_LOCK 1 + +/* +** Global mutex functions used by code in this file. +*/ +static void serverEnterMutex(void){ + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1)); +} +static void serverLeaveMutex(void){ + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1)); +} +static void serverAssertMutexHeld(void){ + assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1)) ); +} + +static int posixLock(int fd, int iSlot, int eLock, int bBlock){ + int res; + struct flock l; + short aType[4] = {0, F_UNLCK, F_RDLCK, F_WRLCK}; + assert( eLock==SERVER_WRITE_LOCK + || eLock==SERVER_READ_LOCK + || eLock==SERVER_NO_LOCK + ); + memset(&l, 0, sizeof(l)); + l.l_type = aType[eLock]; + l.l_whence = SEEK_SET; + l.l_start = iSlot*sizeof(u32); + l.l_len = 1; + + res = fcntl(fd, (bBlock ? F_SETLKW : F_SETLK), &l); + if( res && bBlock && errno==EDEADLK ){ + return SQLITE_BUSY_DEADLOCK; + } + return (res==0 ? SQLITE_OK : SQLITE_BUSY); +} + +static int serverMapFile(ServerHMA *p){ + assert( p->aMap==0 ); + p->aMap = mmap(0, HMA_FILE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0); + if( p->aMap==0 ){ + return SQLITE_ERROR; + } + return SQLITE_OK; +} + + +static void serverDecrHmaRefcount(ServerHMA *pHma){ + if( pHma ){ + pHma->nClient--; + if( pHma->nClient<=0 ){ + ServerHMA **pp; + if( pHma->aMap ) munmap(pHma->aMap, HMA_FILE_SIZE); + if( pHma->fd>=0 ) close(pHma->fd); + for(pp=&g_server.pHma; *pp!=pHma; pp=&(*pp)->pNext); + *pp = pHma->pNext; + sqlite3_free(pHma); + } + } +} + + +static int serverOpenHma(Pager *pPager, const char *zPath, ServerHMA **ppHma){ + struct stat sStat; /* Structure populated by stat() */ + int res; /* result of stat() */ + int rc = SQLITE_OK; /* Return code */ + ServerHMA *pHma = 0; + + serverAssertMutexHeld(); + + res = stat(zPath, &sStat); + if( res!=0 ){ + sqlite3_log(SQLITE_CANTOPEN, "Failed to stat(%s)", zPath); + rc = SQLITE_ERROR; + }else{ + for(pHma=g_server.pHma; pHma; pHma=pHma->pNext){ + if( sStat.st_dev==pHma->st_dev && sStat.st_ino==pHma->st_ino ) break; + } + if( pHma==0 ){ + int nPath = strlen(zPath); + int nByte = sizeof(ServerHMA) + nPath+1 + 4; + + pHma = (ServerHMA*)sqlite3_malloc(nByte); + if( pHma==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + memset(pHma, 0, nByte); + pHma->zName = (char*)&pHma[1]; + pHma->nClient = 1; + pHma->st_dev = sStat.st_dev; + pHma->st_ino = sStat.st_ino; + pHma->pNext = g_server.pHma; + g_server.pHma = pHma; + + memcpy(pHma->zName, zPath, nPath); + memcpy(&pHma->zName[nPath], "-hma", 5); + + pHma->fd = open(pHma->zName, O_RDWR|O_CREAT, 0644); + if( pHma->fd<0 ){ + sqlite3_log(SQLITE_CANTOPEN, "Failed to open(%s)", pHma->zName); + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK ){ + /* Write-lock the DMS slot. If successful, initialize the hma file. */ + rc = posixLock(pHma->fd, 0, SERVER_WRITE_LOCK, 0); + if( rc==SQLITE_OK ){ + res = ftruncate(pHma->fd, HMA_FILE_SIZE); + if( res!=0 ){ + sqlite3_log(SQLITE_CANTOPEN, + "Failed to ftruncate(%s)", pHma->zName + ); + rc = SQLITE_ERROR; + } + if( rc==SQLITE_OK ){ + rc = serverMapFile(pHma); + } + if( rc==SQLITE_OK ){ + memset(pHma->aMap, 0, HMA_FILE_SIZE); + }else{ + rc = SQLITE_ERROR; + } + for(i=0; rc==SQLITE_OK && ifd, 0, SERVER_READ_LOCK, 1); + } + } + + if( rc!=SQLITE_OK ){ + serverDecrHmaRefcount(pHma); + pHma = 0; + } + } + }else{ + pHma->nClient++; + } + } + + *ppHma = pHma; + return rc; +} + +static u32 *serverPageLockSlot(Server *p, Pgno pgno){ + int iSlot = pgno % HMA_PAGELOCK_SLOTS; + return &p->pHma->aMap[1 + HMA_CLIENT_SLOTS + iSlot]; +} +static u32 *serverClientSlot(Server *p, int iClient){ + return &p->pHma->aMap[1 + iClient]; +} + +/* +** Close the "connection" and *-hma file. This deletes the object passed +** as the first argument. +*/ +void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd){ + if( p->pHma ){ + ServerHMA *pHma = p->pHma; + serverEnterMutex(); + if( p->iClient>=0 ){ + u32 *pSlot = serverClientSlot(p, p->iClient); + *pSlot = 0; + assert( pHma->aClient[p->iClient]==p ); + pHma->aClient[p->iClient] = 0; + posixLock(pHma->fd, p->iClient+1, SERVER_NO_LOCK, 0); + } + if( dbfd + && pHma->nClient==1 + && SQLITE_OK==sqlite3OsLock(dbfd, SQLITE_LOCK_EXCLUSIVE) + ){ + unlink(pHma->zName); + } + serverDecrHmaRefcount(pHma); + serverLeaveMutex(); + } + sqlite3_free(p->aLock); + sqlite3_free(p); +} + +static int serverRollbackClient(Server *p, int iBlock){ + int rc; + + sqlite3_log(SQLITE_NOTICE, "Rolling back failed client %d", iBlock); + + /* Roll back any journal file for client iBlock. */ + rc = sqlite3PagerRollbackJournal(p->pPager, iBlock); + + /* Clear any locks held by client iBlock from the HMA file. */ + if( rc==SQLITE_OK ){ + int i; + for(i=0; i>HMA_CLIENT_SLOTS)==iBlock+1 ){ + n = n & ((1<iClient = -1; + p->pPager = pPager; + + serverEnterMutex(); + rc = serverOpenHma(pPager, zPath, &p->pHma); + + /* File is now mapped. Find a free client slot. */ + if( rc==SQLITE_OK ){ + int i; + Server **aClient = p->pHma->aClient; + int fd = p->pHma->fd; + for(i=0; iHMA_CLIENT_SLOTS ){ + rc = SQLITE_BUSY; + }else{ + u32 *pSlot = serverClientSlot(p, i); + *piClient = p->iClient = i; + aClient[i] = p; + *pSlot = 1; + } + } + } + + serverLeaveMutex(); + } + + if( rc!=SQLITE_OK ){ + sqlite3ServerDisconnect(p, 0); + p = 0; + } + *ppOut = p; + return rc; +} + +static int serverOvercomeLock( + Server *p, /* Server connection */ + int bWrite, /* True for a write-lock */ + int bBlock, /* If true, block for this lock */ + u32 v, /* Value of blocking page locking slot */ + int *pbRetry /* OUT: True if caller should retry lock */ +){ + int rc = SQLITE_OK; + int iBlock = ((int)(v>>HMA_CLIENT_SLOTS))-1; + + if( iBlock<0 || iBlock==p->iClient ){ + for(iBlock=0; iBlockiClient && (v & (1<pHma->aClient[iBlock] ){ + rc = posixLock(p->pHma->fd, iBlock+1, SERVER_WRITE_LOCK, 0); + if( rc==SQLITE_OK ){ + rc = serverRollbackClient(p, iBlock); + + /* Release the lock on slot iBlock */ + posixLock(p->pHma->fd, iBlock+1, SERVER_NO_LOCK, 0); + if( rc==SQLITE_OK ){ + *pbRetry = 1; + } + }else if( rc==SQLITE_BUSY ){ + if( bBlock ){ + rc = posixLock(p->pHma->fd, iBlock+1, SERVER_READ_LOCK, 1); + if( rc==SQLITE_OK ){ + posixLock(p->pHma->fd, iBlock+1, SERVER_NO_LOCK, 0); + *pbRetry = 1; + } + } + + if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + } + } + + serverLeaveMutex(); + + return rc; +} + +/* +** Begin a transaction. +*/ +int sqlite3ServerBegin(Server *p){ +#if 1 + int rc = posixLock(p->pHma->fd, p->iClient+1, SERVER_WRITE_LOCK, 1); + if( rc ) return rc; +#endif + return sqlite3ServerLock(p, 1, 0, 1); +} + +/* +** End a transaction (and release all locks). +*/ +int sqlite3ServerEnd(Server *p){ + int i; + for(i=0; inLock; i++){ + u32 *pSlot = serverPageLockSlot(p, p->aLock[i]); + while( 1 ){ + u32 v = *pSlot; + u32 n = v; + if( (v>>HMA_CLIENT_SLOTS)==p->iClient+1 ){ + n = n & ((1 << HMA_CLIENT_SLOTS)-1); + } + n = n & ~(1 << p->iClient); + if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; + } + if( p->aLock[i]==0 ){ + struct timeval t2; + i64 nUs; + gettimeofday(&t2, 0); + nUs = (i64)t2.tv_sec * 1000000 + t2.tv_usec - p->iUsWrite; + p->nUsWrite += nUs; + if( (p->nUsWrite / 1000000)!=((p->nUsWrite + nUs)/1000000) ){ + sqlite3_log(SQLITE_WARNING, + "Cumulative WRITER time: %lldms\n", p->nUsWrite/1000 + ); + } + } + } + p->nLock = 0; +#if 1 + return posixLock(p->pHma->fd, p->iClient+1, SERVER_READ_LOCK, 0); +#endif + return SQLITE_OK; +} + +/* +** Release all write-locks. +*/ +int sqlite3ServerReleaseWriteLocks(Server *p){ + int rc = SQLITE_OK; + return rc; +} + +/* +** Return the client id of the client that currently holds the EXCLUSIVE +** or RESERVED lock according to page-locking slot value v. Or -1 if no +** client holds such a lock. +*/ +int serverWriteLocker(u32 v){ + return ((int)(v >> HMA_CLIENT_SLOTS)) - 1; +} + +/* +** Lock page pgno for reading (bWrite==0) or writing (bWrite==1). +** +** If parameter bBlock is non-zero, then make this a blocking lock if +** possible. +*/ +int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock){ + int rc = SQLITE_OK; + int bReserved = 0; + u32 *pSlot = serverPageLockSlot(p, pgno); + + /* Grow the aLock[] array, if required */ + if( p->nLock==p->nAlloc ){ + int nNew = p->nAlloc ? p->nAlloc*2 : 128; + u32 *aNew; + aNew = (u32*)sqlite3_realloc(p->aLock, sizeof(u32)*nNew); + if( aNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + p->aLock = aNew; + p->nAlloc = nNew; + } + } + if( rc==SQLITE_OK ){ + u32 v = *pSlot; + + /* Check if the required lock is already held. If so, exit this function + ** early. Otherwise, add an entry to the aLock[] array to record the fact + ** that the lock may need to be released. */ + if( bWrite ){ + int iLock = ((int)(v>>HMA_CLIENT_SLOTS)) - 1; + if( iLock==p->iClient ) goto server_lock_out; + }else{ + if( v & (1<iClient) ) goto server_lock_out; + } + p->aLock[p->nLock++] = pgno; + + while( 1 ){ + u32 n; + int w; + u32 mask = (bWrite ? (((1<iClient)) : 0); + + while( ((w = serverWriteLocker(v))>=0 && w!=p->iClient) || (v & mask) ){ + int bRetry = 0; + + if( w<0 && bWrite && bBlock ){ + /* Attempt a RESERVED lock before anything else */ + n = v | ((p->iClient+1) << HMA_CLIENT_SLOTS); + assert( serverWriteLocker(n)==p->iClient ); + if( __sync_val_compare_and_swap(pSlot, v, n)!=v ){ + v = *pSlot; + continue; + } + v = n; + bReserved = 1; + } + + rc = serverOvercomeLock(p, bWrite, bBlock, v, &bRetry); + if( rc!=SQLITE_OK ) goto server_lock_out; + if( bRetry==0 ){ + /* There is a conflicting lock. Cannot obtain this lock. */ + sqlite3_log(SQLITE_BUSY_DEADLOCK, "Conflict at page %d", (int)pgno); + rc = SQLITE_BUSY_DEADLOCK; + goto server_lock_out; + } + + v = *pSlot; + } + + n = v | (1 << p->iClient); + if( bWrite ){ + n = n | ((p->iClient+1) << HMA_CLIENT_SLOTS); + } + if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; + v = *pSlot; + } + } + +server_lock_out: + if( rc!=SQLITE_OK && bReserved ){ + u32 n; + u32 v; + do{ + v = *pSlot; + assert( serverWriteLocker(v)==p->iClient ); + n = v & ((1<iUsWrite = ((i64)t1.tv_sec * 1000000) + (i64)t1.tv_usec; + } + assert( rc!=SQLITE_OK || sqlite3ServerHasLock(p, pgno, bWrite) ); + return rc; +} + +int sqlite3ServerHasLock(Server *p, Pgno pgno, int bWrite){ + u32 v = *serverPageLockSlot(p, pgno); + if( bWrite ){ + return (v>>HMA_CLIENT_SLOTS)==(p->iClient+1); + } + return (v & (1 << p->iClient))!=0; +} + +#endif /* ifdef SQLITE_SERVER_EDITION */ ADDED src/server.h Index: src/server.h ================================================================== --- /dev/null +++ src/server.h @@ -0,0 +1,36 @@ +/* +** 2017 April 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#ifdef SQLITE_SERVER_EDITION + +#ifndef SQLITE_SERVER_H +#define SQLITE_SERVER_H + + +typedef struct Server Server; + +int sqlite3ServerConnect(Pager *pPager, Server **ppOut, int *piClient); + +void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd); + +int sqlite3ServerBegin(Server *p); +int sqlite3ServerEnd(Server *p); +int sqlite3ServerReleaseWriteLocks(Server *p); + +int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock); + +int sqlite3ServerHasLock(Server *p, Pgno pgno, int bWrite); + +#endif /* SQLITE_SERVER_H */ + +#endif /* SQLITE_SERVER_EDITION */ Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -495,10 +495,11 @@ #define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8)) #define SQLITE_IOERR_AUTH (SQLITE_IOERR | (28<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) +#define SQLITE_BUSY_DEADLOCK (SQLITE_BUSY | (3<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) #define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) #define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8)) #define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8)) #define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8)) Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1099,10 +1099,11 @@ #include "vdbe.h" #include "pager.h" #include "pcache.h" #include "os.h" #include "mutex.h" +#include "server.h" /* The SQLITE_EXTRA_DURABLE compile-time option used to set the default ** synchronous setting to EXTRA. It is no longer supported. */ #ifdef SQLITE_EXTRA_DURABLE Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -2637,25 +2637,26 @@ sqlite3VdbeEnter(p); /* Check for one of the special errors */ mrc = p->rc & 0xff; isSpecialError = mrc==SQLITE_NOMEM || mrc==SQLITE_IOERR - || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL; + || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL + || p->rc==SQLITE_BUSY_DEADLOCK; if( isSpecialError ){ - /* If the query was read-only and the error code is SQLITE_INTERRUPT, - ** no rollback is necessary. Otherwise, at least a savepoint - ** transaction must be rolled back to restore the database to a - ** consistent state. + /* If the query was read-only and the error code is SQLITE_INTERRUPT + ** or SQLITE_BUSY_SERVER, no rollback is necessary. Otherwise, at + ** least a savepoint transaction must be rolled back to restore the + ** database to a consistent state. ** ** Even if the statement is read-only, it is important to perform ** a statement or transaction rollback operation. If the error ** occurred while writing to the journal, sub-journal or database ** file as part of an effort to free up cache space (see function ** pagerStress() in pager.c), the rollback is required to restore ** the pager to a consistent state. */ - if( !p->readOnly || mrc!=SQLITE_INTERRUPT ){ + if( !p->readOnly || (mrc!=SQLITE_INTERRUPT && mrc!=SQLITE_BUSY) ){ if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){ eStatementOp = SAVEPOINT_ROLLBACK; }else{ /* We are forced to roll back the active transaction. Before doing ** so, abort any other statements this handle currently has active. Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -452,11 +452,20 @@ u8 lockError; /* True if a locking error has occurred */ #endif #ifdef SQLITE_ENABLE_SNAPSHOT WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ #endif +#ifdef SQLITE_SERVER_EDITION + Server *pServer; +#endif }; + +#ifdef SQLITE_SERVER_EDITION +# define walIsServer(p) ((p)->pServer!=0) +#else +# define walIsServer(p) 0 +#endif /* ** Candidate values for Wal.exclusiveMode. */ #define WAL_NORMAL_MODE 0 @@ -1259,10 +1268,18 @@ }else{ sqlite3OsShmUnmap(pWal->pDbFd, isDelete); } } +#ifdef SQLITE_SERVER_EDITION +int sqlite3WalServer(Wal *pWal, Server *pServer){ + assert( pWal->pServer==0 ); + pWal->pServer = pServer; + return SQLITE_OK; +} +#endif + /* ** Open a connection to the WAL file zWalName. The database file must ** already be opened on connection pDbFd. The buffer that zWalName points ** to must remain valid for the lifetime of the returned Wal* handle. ** @@ -1890,10 +1907,13 @@ ** the wal-index header do not match the contents of the ** file-system. To avoid this, update the wal-index header to ** indicate that the log file contains zero valid frames. */ walRestartHdr(pWal, salt1); rc = sqlite3OsTruncate(pWal->pWalFd, 0); + }else if( walIsServer(pWal) ){ + assert( eMode==SQLITE_CHECKPOINT_RESTART ); + walRestartHdr(pWal, salt1); } walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } } @@ -2048,10 +2068,18 @@ } /* The header was successfully read. Return zero. */ return 0; } + +static int walIndexWriteLock(Wal *pWal){ + if( walIsServer(pWal) ){ + return sqlite3ServerLock(pWal->pServer, 0, 1, 0); + }else{ + return walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + } +} /* ** Read the wal-index header from the wal-index and into pWal->hdr. ** If the wal-header appears to be corrupt, try to reconstruct the ** wal-index from the WAL before returning. @@ -2089,15 +2117,16 @@ ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ if( pWal->readOnly & WAL_SHM_RDONLY ){ + assert( walIsServer(pWal)==0 ); if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; } - }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ + }else if( SQLITE_OK==(rc = walIndexWriteLock(pWal)) ){ pWal->writeLock = 1; if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding @@ -2107,11 +2136,13 @@ rc = walIndexRecover(pWal); *pChanged = 1; } } pWal->writeLock = 0; - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( walIsServer(pWal)==0 ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + } } } /* If the header is read successfully, check the version number to make ** sure the wal-index was not constructed with some future format that @@ -2247,10 +2278,13 @@ if( rc!=SQLITE_OK ){ return rc; } } + assert( rc==SQLITE_OK ); + if( walIsServer(pWal) ) return SQLITE_OK; + pInfo = walCkptInfo(pWal); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame #ifdef SQLITE_ENABLE_SNAPSHOT && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0 || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr))) @@ -2586,11 +2620,19 @@ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ int iMinHash; /* This routine is only be called from within a read transaction. */ - assert( pWal->readLock>=0 || pWal->lockError ); + assert( walIsServer(pWal) || pWal->readLock>=0 || pWal->lockError ); + + assert( walIsServer(pWal)==0 || pWal->writeLock==0 + || sqlite3ServerHasLock(pWal->pServer, 0, 1) + ); + if( walIsServer(pWal) && pWal->writeLock==0 ){ + /* A server mode connection must read from the most recent snapshot. */ + iLast = walIndexHdr(pWal)->mxFrame; + } /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case as an optimization. Likewise, if pWal->readLock==0, ** then the WAL is ignored by the reader so return early, as if the @@ -2698,11 +2740,11 @@ /* ** Return the size of the database in pages (or zero, if unknown). */ Pgno sqlite3WalDbsize(Wal *pWal){ - if( pWal && ALWAYS(pWal->readLock>=0) ){ + if( pWal && (walIsServer(pWal) || ALWAYS(pWal->readLock>=0)) ){ return pWal->hdr.nPage; } return 0; } @@ -2723,16 +2765,21 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){ int rc; /* Cannot start a write transaction without first holding a read ** transaction. */ - assert( pWal->readLock>=0 ); + assert( walIsServer(pWal) || pWal->readLock>=0 ); assert( pWal->writeLock==0 && pWal->iReCksum==0 ); if( pWal->readOnly ){ return SQLITE_READONLY; } + + /* For a server connection, do nothing at this point. */ + if( walIsServer(pWal) ){ + return SQLITE_OK; + } /* Only one writer allowed at a time. Get the write lock. Return ** SQLITE_BUSY if unable. */ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); @@ -2780,11 +2827,11 @@ ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; - if( ALWAYS(pWal->writeLock) ){ + if( pWal->writeLock ){ Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; /* Restore the clients cache of the wal-index header to the state it ** was in before the client began writing to the database. @@ -2870,15 +2917,17 @@ ** SQLITE_OK is returned if no error is encountered (regardless of whether ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned ** if an error occurs. */ static int walRestartLog(Wal *pWal){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); int rc = SQLITE_OK; int cnt; - if( pWal->readLock==0 ){ - volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + if( pWal->readLock==0 + || (walIsServer(pWal) && pInfo->nBackfill==pWal->hdr.mxFrame) + ){ assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ u32 salt1; sqlite3_randomness(4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); @@ -2896,10 +2945,11 @@ walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); }else if( rc!=SQLITE_BUSY ){ return rc; } } + if( walIsServer(pWal) ) return rc; walUnlockShared(pWal, WAL_READ_LOCK(0)); pWal->readLock = -1; cnt = 0; do{ int notUsed; @@ -3056,11 +3106,27 @@ WalWriter w; /* The writer */ u32 iFirst = 0; /* First frame that may be overwritten */ WalIndexHdr *pLive; /* Pointer to shared header */ assert( pList ); - assert( pWal->writeLock ); + assert( pWal->writeLock || walIsServer(pWal) ); + if( pWal->writeLock==0 ){ + int bDummy = 0; +#if 0 + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); +#endif + rc = sqlite3ServerLock(pWal->pServer, 0, 1, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + rc = walIndexTryHdr(pWal, &bDummy); + } + if( rc!=SQLITE_OK ){ + return rc; + } + assert( sqlite3ServerHasLock(pWal->pServer, 0, 1) ); + } + assert( walIsServer(pWal)==0 || sqlite3ServerHasLock(pWal->pServer, 0, 1) ); /* If this frame set completes a transaction, then nTruncate>0. If ** nTruncate==0 then this frame set does not complete the transaction. */ assert( (isCommit!=0)==(nTruncate!=0) ); @@ -3077,11 +3143,11 @@ } /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ - if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ + if( walIsServer(pWal)==0 && SQLITE_OK!=(rc = walRestartLog(pWal)) ){ return rc; } /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of @@ -3329,11 +3395,23 @@ ** immediately, and a busy-handler is configured, it is invoked and the ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); + if( walIsServer(pWal) ){ + rc = sqlite3ServerBegin(pWal->pServer); + if( rc!=SQLITE_OK ) goto ckpt_out; + if( eMode>=SQLITE_CHECKPOINT_RESTART ){ + /* Exclusive lock on page 1. This is exclusive access to the db. */ + rc = sqlite3ServerLock(pWal->pServer, 1, 1, 1); + }else{ + /* Take the server write-lock ("page" 0) */ + rc = sqlite3ServerLock(pWal->pServer, 0, 1, 1); + } + }else{ + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); + } if( rc==SQLITE_OK ){ pWal->writeLock = 1; }else if( rc==SQLITE_BUSY ){ eMode2 = SQLITE_CHECKPOINT_PASSIVE; xBusy2 = 0; @@ -3374,14 +3452,16 @@ */ memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } /* Release the locks. */ + ckpt_out: sqlite3WalEndWriteTransaction(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); pWal->ckptLock = 0; WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); + if( walIsServer(pWal) ) sqlite3ServerEnd(pWal->pServer); return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); } /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since Index: src/wal.h ================================================================== --- src/wal.h +++ src/wal.h @@ -141,8 +141,12 @@ int sqlite3WalFramesize(Wal *pWal); #endif /* Return the sqlite3_file object for the WAL file */ sqlite3_file *sqlite3WalFile(Wal *pWal); + +#ifdef SQLITE_SERVER_EDITION +int sqlite3WalServer(Wal *pWal, Server *pServer); +#endif #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* SQLITE_WAL_H */ Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -270,10 +270,19 @@ } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] + +test_suite "server" -prefix "" -description { + All server-edition tests. +} -files [ + test_set \ + select1.test \ + [glob -nocomplain $::testdir/server*.test] \ + -exclude *server1.test +] test_suite "fts5-light" -prefix "" -description { All FTS5 tests. } -files [ test_set \ ADDED test/server2.test Index: test/server2.test ================================================================== --- /dev/null +++ test/server2.test @@ -0,0 +1,117 @@ +# 2017 April 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is testing the server mode of SQLite. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix server2 + +#------------------------------------------------------------------------- +# Check that the *-hma file is deleted correctly. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b); +} {} +do_test 1.1 { + file exists test.db-hma +} {1} +do_test 1.2 { + db close + file exists test.db-hma +} {0} +do_test 1.3 { + sqlite3 db test.db + db eval { CREATE TABLE t2(a, b) } + sqlite3 db2 test.db + db2 eval { CREATE TABLE t3(a, b) } + file exists test.db-hma +} {1} +do_test 1.4 { + db2 close + file exists test.db-hma +} {1} +integrity_check 1.5 +do_test 1.6 { + db close + file exists test.db-hma +} {0} + + +#------------------------------------------------------------------------- +# +reset_db +sqlite3 db2 test.db + +do_execsql_test 2.0 { + CREATE TABLE t1(a, b); + CREATE TABLE t2(c, d); +} + +# Two concurrent transactions committed. +# +do_test 2.1 { + db eval { + BEGIN; + INSERT INTO t1 VALUES(1, 2); + } + db2 eval { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + } +} {} +do_test 2.2 { + lsort [glob test.db*] +} {test.db test.db-hma test.db-journal0 test.db-journal1} +do_test 2.3.1 { db eval COMMIT } {} +do_test 2.3.2 { db2 eval COMMIT } {} +do_execsql_test 2.4 {SELECT * FROM t1, t2} {1 2 3 4} +do_test 2.5 { + lsort [glob test.db*] +} {test.db test.db-hma test.db-journal0 test.db-journal1} + +do_test 2.6 { + execsql {BEGIN} + execsql {INSERT INTO t1 VALUES(5, 6)} + + execsql {BEGIN} db2 + catchsql {INSERT INTO t1 VALUES(7, 8)} db2 +} {1 {database is locked}} +do_test 2.7 { + # Transaction is automatically rolled back in this case. + sqlite3_get_autocommit db2 +} {1} +do_test 2.8 { + execsql COMMIT + execsql { SELECT * FROM t1 } db2 +} {1 2 5 6} +db2 close + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE t1(a, b); +} + +do_test 3.1 { + glob test.db* +} {test.db-journal0 test.db test.db-hma} + +do_test 3.2 { + db close + glob test.db* +} {test.db} + +finish_test + ADDED test/server3.test Index: test/server3.test ================================================================== --- /dev/null +++ test/server3.test @@ -0,0 +1,45 @@ +# 2017 April 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is testing the server mode of SQLite. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set testprefix server3 + +db close + +do_multiclient_test tn { + do_test $tn.1 { + sql1 { CREATE TABLE t1(a, b) } + sql2 { CREATE TABLE t2(a, b) } + } {} + + do_test $tn.2 { + sql1 { + INSERT INTO t2 VALUES(1, 2); + BEGIN; + INSERT INTO t1 VALUES(1, 2); + } + } {} + + do_test $tn.3 { csql2 { SELECT * FROM t1 } } {1 {database is locked}} + do_test $tn.4 { csql2 { SELECT * FROM t1 } } {1 {database is locked}} + do_test $tn.5 { sql2 { SELECT * FROM t2 } } {1 2} + + +} + +finish_test + ADDED test/servercrash.test Index: test/servercrash.test ================================================================== --- /dev/null +++ test/servercrash.test @@ -0,0 +1,68 @@ +# 2017 April 27 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix servercrash + +ifcapable !crashtest { + finish_test + return +} +do_not_use_codec + +do_execsql_test 1.0 { + PRAGMA page_siBlockze = 4096; + PRAGMA auto_vacuum = OFF; + CREATE TABLE t1(a, b); + CREATE TABLE t2(c, d); + + INSERT INTO t1 VALUES(1, 2), (3, 4); + INSERT INTO t2 VALUES(1, 2), (3, 4); +} + +for {set i 0} {$i < 10} {incr i} { + do_test 1.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + do_execsql_test 1.$i.2 { + SELECT * FROM t1 + } {1 2 3 4} +} + +for {set i 0} {$i < 10} {incr i} { + do_test 2.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + do_test 2.$i.2 { + sqlite3 dbX test.db + execsql { SELECT * FROM t1 } dbX + } {1 2 3 4} + dbX close +} + +db close +for {set i 0} {$i < 10} {incr i} { + do_test 3.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + sqlite3 db test.db + do_execsql_test 3.$i.2 { SELECT * FROM t1 } {1 2 3 4} + db close +} + +finish_test + ADDED test/serverwal.test Index: test/serverwal.test ================================================================== --- /dev/null +++ test/serverwal.test @@ -0,0 +1,138 @@ +# 2017 April 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is testing the server mode of SQLite. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix serverwal + +# Check files are created and deleted as expected. +# +do_execsql_test 1.0 { + PRAGMA journal_mode = wal; +} {wal} +do_execsql_test 1.1 { + CREATE TABLE t1(a, b); +} +do_execsql_test 1.2 { + SELECT * FROM t1; +} {} +do_test 1.3 { + lsort [glob test.db*] +} {test.db test.db-hma test.db-shm test.db-wal} +do_test 1.4 { + db close + glob test.db* +} {test.db} + +#------------------------------------------------------------------------- +# Two concurrent transactions. +# +do_test 2.0 { + sqlite3 db test.db + sqlite3 db2 test.db + db eval { + CREATE TABLE t2(a, b); + } +} {} +do_test 2.1 { + execsql { + BEGIN; + INSERT INTO t1 VALUES(1, 2); + } db + execsql { + BEGIN; + INSERT INTO t2 VALUES(1, 2); + } db2 +} {} +do_test 2.2 { + execsql COMMIT db + execsql COMMIT db2 +} {} +db close +db2 close + +#------------------------------------------------------------------------- +# That the wal file can be wrapped around. +# +reset_db +do_execsql_test 3.0 { + PRAGMA journal_mode = wal; + CREATE TABLE ttt(a, b); + INSERT INTO ttt VALUES(1, 2); + INSERT INTO ttt VALUES(3, 4); + INSERT INTO ttt VALUES(5, 6); + INSERT INTO ttt VALUES(7, 8); + INSERT INTO ttt VALUES(9, 10); +} {wal} + +do_test 3.1 { + set N [file size test.db-wal] + execsql { + PRAGMA wal_checkpoint = restart; + INSERT INTO ttt VALUES(11, 12); + INSERT INTO ttt VALUES(13, 14); + } + expr {$N == [file size test.db-wal]} +} {1} + +#------------------------------------------------------------------------- +# That ROLLBACK appears to work. +# +reset_db +do_execsql_test 4.0 { + PRAGMA cache_size = 10; + CREATE TABLE ttt(a, b); + CREATE INDEX yyy ON ttt(b, a); + PRAGMA journal_mode = wal; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO ttt SELECT randomblob(100), randomblob(100) FROM s; +} {wal} + +do_execsql_test 4.1 { + PRAGMA integrity_check; + BEGIN; + UPDATE ttt SET b=a; + ROLLBACK; + PRAGMA integrity_check; +} {ok ok} + +reset_db +do_execsql_test 5.1 { + CREATE TABLE xyz(a); + PRAGMA journal_mode = wal; + INSERT INTO xyz VALUES(1); + INSERT INTO xyz VALUES(2); + INSERT INTO xyz VALUES(3); +} {wal} + +breakpoint + +do_test 5.2 { + sqlite3 db2 test.db + execsql { SELECT * FROM xyz } db2 +} {1 2 3} + +do_execsql_test 5.3 { + PRAGMA wal_checkpoint = restart +} {0 0 0} + +do_test 5.4 { + execsql { SELECT * FROM xyz } db2 +} {1 2 3} + +finish_test + Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -584,10 +584,14 @@ proc reset_db {} { catch {db close} forcedelete test.db forcedelete test.db-journal forcedelete test.db-wal + for {set i 0} {$i < 16} {incr i} { + forcedelete test.db-journal$i + } + sqlite3 db ./test.db set ::DB [sqlite3_connection_pointer db] if {[info exists ::SETUP_SQL]} { db eval $::SETUP_SQL } Index: tool/mksqlite3c.tcl ================================================================== --- tool/mksqlite3c.tcl +++ tool/mksqlite3c.tcl @@ -112,10 +112,11 @@ pager.h parse.h pcache.h pragma.h rtree.h + server.h sqlite3session.h sqlite3.h sqlite3ext.h sqlite3rbu.h sqliteicu.h @@ -317,10 +318,11 @@ pcache.c pcache1.c rowset.c pager.c wal.c + server.c btmutex.c btree.c backup.c ADDED tool/tserver.c Index: tool/tserver.c ================================================================== --- /dev/null +++ tool/tserver.c @@ -0,0 +1,442 @@ +/* +** 2017 June 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** Simple multi-threaded server used for informal testing of concurrency +** between connections in different threads. Listens for tcp/ip connections +** on port 9999 of the 127.0.0.1 interface only. To build: +** +** gcc -g $(TOP)/tool/tserver.c sqlite3.o -lpthread -o tserver +** +** To run using "x.db" as the db file: +** +** ./tserver x.db +** +** To connect, open a client socket on port 9999 and start sending commands. +** Commands are either SQL - which must be terminated by a semi-colon, or +** dot-commands, which must be terminated by a newline. If an SQL statement +** is seen, it is prepared and added to an internal list. +** +** Dot-commands are: +** +** .list Display all SQL statements in the list. +** .quit Disconnect. +** .run Run all SQL statements in the list. +** .repeats N Configure the number of repeats per ".run". +** .seconds N Configure the number of seconds to ".run" for. +** +** Example input: +** +** BEGIN; +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** COMMIT; +** .repeats 100000 +** .run +** +*/ +#define TSERVER_PORTNUMBER 9999 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sqlite3.h" + +/* Database used by this server */ +static char *zDatabaseName = 0; + +typedef struct ClientCtx ClientCtx; +struct ClientCtx { + sqlite3 *db; /* Database handle for this client */ + int fd; /* Client fd */ + int nRepeat; /* Number of times to repeat SQL */ + int nSecond; /* Number of seconds to run for */ + sqlite3_stmt **apPrepare; /* Array of prepared statements */ + int nPrepare; /* Valid size of apPrepare[] */ + int nAlloc; /* Allocated size of apPrepare[] */ +}; + +static int is_eol(int i){ + return (i=='\n' || i=='\r'); +} +static int is_whitespace(int i){ + return (i==' ' || i=='\t' || is_eol(i)); +} + +static void trim_string(const char **pzStr, int *pnStr){ + const char *zStr = *pzStr; + int nStr = *pnStr; + + while( nStr>0 && is_whitespace(zStr[0]) ){ + zStr++; + nStr--; + } + while( nStr>0 && is_whitespace(zStr[nStr-1]) ){ + nStr--; + } + + *pzStr = zStr; + *pnStr = nStr; +} + +static int send_message(ClientCtx *p, const char *zFmt, ...){ + char *zMsg; + va_list ap; /* Vararg list */ + va_start(ap, zFmt); + int res = -1; + + zMsg = sqlite3_vmprintf(zFmt, ap); + if( zMsg ){ + res = write(p->fd, zMsg, strlen(zMsg)); + } + sqlite3_free(zMsg); + va_end(ap); + + return (res<0); +} + +static int handle_some_sql(ClientCtx *p, const char *zSql, int nSql){ + const char *zTail = zSql; + int nTail = nSql; + int rc = SQLITE_OK; + + while( rc==SQLITE_OK ){ + if( p->nPrepare>=p->nAlloc ){ + int nByte = (p->nPrepare+32) * sizeof(sqlite3_stmt*); + sqlite3_stmt **apNew = sqlite3_realloc(p->apPrepare, nByte); + if( apNew ){ + p->apPrepare = apNew; + p->nAlloc = p->nPrepare+32; + }else{ + rc = SQLITE_NOMEM; + break; + } + } + rc = sqlite3_prepare_v2( + p->db, zTail, nTail, &p->apPrepare[p->nPrepare], &zTail + ); + if( rc!=SQLITE_OK ){ + send_message(p, "error - %s\n", sqlite3_errmsg(p->db)); + rc = 1; + break; + } + if( p->apPrepare[p->nPrepare]==0 ){ + break; + } + p->nPrepare++; + nTail = nSql - (zTail-zSql); + rc = send_message(p, "ok (%d SQL statements)\n", p->nPrepare); + } + + return rc; +} + +static sqlite3_int64 get_timer(void){ + struct timeval t; + gettimeofday(&t, 0); + return ((sqlite3_int64)t.tv_usec / 1000) + ((sqlite3_int64)t.tv_sec * 1000); +} + +static void clear_sql(ClientCtx *p){ + int j; + for(j=0; jnPrepare; j++){ + sqlite3_finalize(p->apPrepare[j]); + } + p->nPrepare = 0; +} + +static int handle_dot_command(ClientCtx *p, const char *zCmd, int nCmd){ + assert( zCmd[0]=='.' ); + int n; + int rc = 0; + const char *z = &zCmd[1]; + const char *zArg; + int nArg; + + for(n=0; n<(nCmd-1); n++){ + if( is_whitespace(z[n]) ) break; + } + + zArg = &z[n]; + nArg = nCmd-n; + trim_string(&zArg, &nArg); + + if( n>=1 && n<=4 && 0==strncmp(z, "list", n) ){ + int i; + for(i=0; rc==0 && inPrepare; i++){ + const char *zSql = sqlite3_sql(p->apPrepare[i]); + int nSql = strlen(zSql); + trim_string(&zSql, &nSql); + rc = send_message(p, "%d: %.*s\n", i, nSql, zSql); + } + } + + else if( n>=1 && n<=4 && 0==strncmp(z, "quit", n) ){ + rc = 1; + } + + else if( n>=2 && n<=7 && 0==strncmp(z, "repeats", n) ){ + if( nArg ){ + p->nRepeat = strtol(zArg, 0, 0); + if( p->nRepeat>0 ) p->nSecond = 0; + } + rc = send_message(p, "ok (repeat=%d)\n", p->nRepeat); + } + + else if( n>=2 && n<=3 && 0==strncmp(z, "run", n) ){ + int i, j; + int nBusy = 0; + sqlite3_int64 t0 = get_timer(); + sqlite3_int64 t1 = t0; + int nT1 = 0; + int nTBusy1 = 0; + + for(j=0; (p->nRepeat<=0 || jnRepeat) && rc==SQLITE_OK; j++){ + sqlite3_int64 t2; + + for(i=0; inPrepare && rc==SQLITE_OK; i++){ + sqlite3_stmt *pStmt = p->apPrepare[i]; + + /* Execute the statement */ + while( sqlite3_step(pStmt)==SQLITE_ROW ); + rc = sqlite3_reset(pStmt); + + if( (rc & 0xFF)==SQLITE_BUSY ){ + if( sqlite3_get_autocommit(p->db)==0 ){ + sqlite3_exec(p->db, "ROLLBACK", 0, 0, 0); + } + nBusy++; + rc = SQLITE_OK; + break; + } + else if( rc!=SQLITE_OK ){ + send_message(p, "error - %s\n", sqlite3_errmsg(p->db)); + } + } + + t2 = get_timer(); + if( t2>=(t1+1000) ){ + int nMs = (t2 - t1); + int nDone = (j+1 - nBusy - nT1); + + rc = send_message( + p, "(%d done @ %d per second, %d busy)\n", + nDone, (1000*nDone + nMs/2) / nMs, nBusy - nTBusy1 + ); + t1 = t2; + nT1 = j+1 - nBusy; + nTBusy1 = nBusy; + if( p->nSecond>0 && (p->nSecond*1000)<=t1-t0 ) break; + } + } + + if( rc==SQLITE_OK ){ + send_message(p, "ok (%d/%d SQLITE_BUSY)\n", nBusy, j); + } + clear_sql(p); + } + + else if( n>=1 && n<=7 && 0==strncmp(z, "seconds", n) ){ + if( nArg ){ + p->nSecond = strtol(zArg, 0, 0); + if( p->nSecond>0 ) p->nRepeat = 0; + } + rc = send_message(p, "ok (repeat=%d)\n", p->nRepeat); + } + + else{ + send_message(p, + "unrecognized dot command: %.*s\n" + "should be \"list\", \"run\", \"repeats\", or \"seconds\"\n", n, z + ); + rc = 1; + } + + return rc; +} + +static void *handle_client(void *pArg){ + char zCmd[32*1024]; /* Read buffer */ + int nCmd = 0; /* Valid bytes in zCmd[] */ + int res; /* Result of read() call */ + int rc = SQLITE_OK; + int j; + + ClientCtx ctx; + memset(&ctx, 0, sizeof(ClientCtx)); + + ctx.fd = (int)(intptr_t)pArg; + ctx.nRepeat = 1; + rc = sqlite3_open(zDatabaseName, &ctx.db); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(ctx.db)); + return 0; + } + + while( rc==SQLITE_OK ){ + int i; + int iStart; + int nConsume; + res = read(ctx.fd, &zCmd[nCmd], sizeof(zCmd)-nCmd-1); + if( res<=0 ) break; + nCmd += res; + if( nCmd>=sizeof(zCmd)-1 ){ + fprintf(stderr, "oversized (>32KiB) message\n"); + res = 0; + break; + } + zCmd[nCmd] = '\0'; + + do { + nConsume = 0; + + /* Gobble up any whitespace */ + iStart = 0; + while( is_whitespace(zCmd[iStart]) ) iStart++; + + if( zCmd[iStart]=='.' ){ + /* This is a dot-command. Search for end-of-line. */ + for(i=iStart; i0 ){ + nCmd = nCmd-nConsume; + if( nCmd>0 ){ + memmove(zCmd, &zCmd[nConsume], nCmd); + } + } + }while( rc==SQLITE_OK && nConsume>0 ); + } + + fprintf(stdout, "Client %d disconnects\n", ctx.fd); + close(ctx.fd); + clear_sql(&ctx); + sqlite3_free(ctx.apPrepare); + sqlite3_close(ctx.db); + return 0; +} + +int main(int argc, char *argv[]) { + sqlite3 *db; + int sfd; + int rc; + int yes = 1; + struct sockaddr_in server; + + /* Ignore SIGPIPE. Otherwise the server exits if a client disconnects + ** abruptly. */ + signal(SIGPIPE, SIG_IGN); + + if( argc!=2 ){ + fprintf(stderr, "Usage: %s DATABASE\n", argv[0]); + return 1; + } + zDatabaseName = argv[1]; + + rc = sqlite3_open(zDatabaseName, &db); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(db)); + return 1; + } + + rc = sqlite3_exec(db, "SELECT * FROM sqlite_master", 0, 0, 0); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_exec(): %s\n", sqlite3_errmsg(db)); + return 1; + } + + sfd = socket(AF_INET, SOCK_STREAM, 0); + if( sfd<0 ){ + fprintf(stderr, "socket() failed\n"); + return 1; + } + + rc = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); + if( rc<0 ){ + perror("setsockopt"); + return 1; + } + + memset(&server, 0, sizeof(server)); + server.sin_family = AF_INET; + server.sin_addr.s_addr = inet_addr("127.0.0.1"); + server.sin_port = htons(TSERVER_PORTNUMBER); + + rc = bind(sfd, (struct sockaddr *)&server, sizeof(struct sockaddr)); + if( rc<0 ){ + fprintf(stderr, "bind() failed\n"); + return 1; + } + + rc = listen(sfd, 8); + if( rc<0 ){ + fprintf(stderr, "listen() failed\n"); + return 1; + } + + while( 1 ){ + pthread_t tid; + int cfd = accept(sfd, NULL, NULL); + if( cfd<0 ){ + perror("accept()"); + return 1; + } + + fprintf(stdout, "Client %d connects\n", cfd); + rc = pthread_create(&tid, NULL, handle_client, (void*)(intptr_t)cfd); + if( rc!=0 ){ + perror("pthread_create()"); + return 1; + } + + pthread_detach(tid); + } + + return 0; +}