Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -725,10 +725,30 @@ }else{ *pHasMoved = 0; } return SQLITE_OK; } + +/* +** Set up the correct data pointers for a MemPage +*/ +static u8 *btreeGetData(MemPage *pPage){ + pPage->aData = sqlite3PagerGetData(pPage->pDbPage); + pPage->aDataEnd = &pPage->aData[pPage->pBt->usableSize]; + pPage->aCellIdx = &pPage->aData[pPage->cellOffset]; + return pPage->aData; +} + +/* +** Make a btree page is writable. +*/ +static int btreeMakePageWriteable(MemPage *pPage){ + int rc; + rc = sqlite3PagerWrite(pPage->pDbPage); + btreeGetData(pPage); + return rc; +} #ifndef SQLITE_OMIT_AUTOVACUUM /* ** Given a page number of a regular database page, return the page ** number for the pointer-map page that contains the entry for the @@ -795,10 +815,11 @@ pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); *pRC= rc = sqlite3PagerWrite(pDbPage); + pPtrmap = sqlite3PagerGetData(pDbPage); if( rc==SQLITE_OK ){ pPtrmap[offset] = eType; put4byte(&pPtrmap[offset+1], parent); } } @@ -1543,11 +1564,11 @@ ** MemPage.aData elements if needed. ** ** If the noContent flag is set, it means that we do not care about ** the content of the page at this time. So do not go to the disk ** to fetch the content. Just fill in the content with zeros for now. -** If in the future we call sqlite3PagerWrite() on this page, that +** If in the future we call btreeMakePageWriteable() on this page, that ** means we have started to be concerned about content and the disk ** read should occur at that point. */ static int btreeGetPage( BtShared *pBt, /* The btree */ @@ -1633,11 +1654,11 @@ static void releasePage(MemPage *pPage){ if( pPage ){ assert( pPage->aData ); assert( pPage->pBt ); assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + /* assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); sqlite3PagerUnref(pPage->pDbPage); } } @@ -2308,10 +2329,11 @@ int rc; /* Result code from subfunctions */ MemPage *pPage1; /* Page 1 of the database file */ int nPage; /* Number of pages in the database */ int nPageFile = 0; /* Number of pages in the database file */ int nPageHeader; /* Number of pages in the database according to hdr */ + u8 *page1; /* Content of page 1 */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; @@ -2319,19 +2341,19 @@ if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is ** a valid database file. */ - nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData); + page1 = btreeGetData(pPage1); + nPage = nPageHeader = get4byte(&page1[28]); sqlite3PagerPagecount(pBt->pPager, &nPageFile); - if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ + if( nPage==0 || memcmp(24+page1, 92+page1, 4)!=0 ){ nPage = nPageFile; } if( nPage>0 ){ u32 pageSize; u32 usableSize; - u8 *page1 = pPage1->aData; rc = SQLITE_NOTADB; if( memcmp(page1, zMagicHeader, 16)!=0 ){ goto page1_init_failed; } @@ -2480,13 +2502,13 @@ if( pBt->nPage>0 ){ return SQLITE_OK; } pP1 = pBt->pPage1; assert( pP1!=0 ); - rc = sqlite3PagerWrite(pP1->pDbPage); + rc = btreeMakePageWriteable(pP1); if( rc ) return rc; - data = pP1->aData; + data = btreeGetData(pP1); memcpy(data, zMagicHeader, sizeof(zMagicHeader)); assert( sizeof(zMagicHeader)==16 ); data[16] = (u8)((pBt->pageSize>>8)&0xff); data[17] = (u8)((pBt->pageSize>>16)&0xff); data[18] = 1; @@ -2652,12 +2674,13 @@ ** client has been writing the database file), update it now. Doing ** this sooner rather than later means the database size can safely ** re-read the database size from page 1 if a savepoint or transaction ** rollback occurs within the transaction. */ + btreeGetData(pPage1); if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){ - rc = sqlite3PagerWrite(pPage1->pDbPage); + rc = btreeMakePageWriteable(pPage1); if( rc==SQLITE_OK ){ put4byte(&pPage1->aData[28], pBt->nPage); } } } @@ -2852,11 +2875,11 @@ if( eType!=PTRMAP_ROOTPAGE ){ rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); if( rc!=SQLITE_OK ){ return rc; } - rc = sqlite3PagerWrite(pPtrPage->pDbPage); + rc = btreeMakePageWriteable(pPtrPage); if( rc!=SQLITE_OK ){ releasePage(pPtrPage); return rc; } rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); @@ -2898,10 +2921,11 @@ if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ u8 eType; Pgno iPtrPage; + btreeGetData(pBt->pPage1); nFreeList = get4byte(&pBt->pPage1->aData[36]); if( nFreeList==0 ){ return SQLITE_DONE; } @@ -2954,11 +2978,11 @@ } releasePage(pFreePg); }while( nFin!=0 && iFreePg>nFin ); assert( iFreePgpDbPage); + rc = btreeMakePageWriteable(pLastPg); if( rc==SQLITE_OK ){ rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); } releasePage(pLastPg); if( rc!=SQLITE_OK ){ @@ -2974,11 +2998,11 @@ MemPage *pPg; rc = btreeGetPage(pBt, iLastPg, &pPg, 0); if( rc!=SQLITE_OK ){ return rc; } - rc = sqlite3PagerWrite(pPg->pDbPage); + rc = btreeMakePageWriteable(pPg); releasePage(pPg); if( rc!=SQLITE_OK ){ return rc; } } @@ -3008,11 +3032,11 @@ rc = SQLITE_DONE; }else{ invalidateAllOverflowCache(pBt); rc = incrVacuumStep(pBt, 0, btreePagecount(pBt)); if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + rc = btreeMakePageWriteable(pBt->pPage1); put4byte(&pBt->pPage1->aData[28], pBt->nPage); } } sqlite3BtreeLeave(p); return rc; @@ -3050,10 +3074,11 @@ ** is encountered, this indicates corruption. */ return SQLITE_CORRUPT_BKPT; } + btreeGetData(pBt->pPage1); nFree = get4byte(&pBt->pPage1->aData[36]); nEntry = pBt->usableSize/5; nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; nFin = nOrig - nFree - nPtrmap; if( nOrig>PENDING_BYTE_PAGE(pBt) && nFinnFin && rc==SQLITE_OK; iFree--){ rc = incrVacuumStep(pBt, nFin, iFree); } if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + rc = btreeMakePageWriteable(pBt->pPage1); put4byte(&pBt->pPage1->aData[32], 0); put4byte(&pBt->pPage1->aData[36], 0); put4byte(&pBt->pPage1->aData[28], nFin); sqlite3PagerTruncateImage(pBt->pPager, nFin); pBt->nPage = nFin; @@ -3409,11 +3434,11 @@ sqlite3BtreeEnter(p); rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); if( rc==SQLITE_OK ){ if( iSavepoint<0 && pBt->initiallyEmpty ) pBt->nPage = 0; rc = newDatabase(pBt); - pBt->nPage = get4byte(28 + pBt->pPage1->aData); + pBt->nPage = get4byte(28 + btreeGetData(pBt->pPage1)); /* The database size was written into the offset 28 of the header ** when the transaction started, so we know that the value at offset ** 28 is nonzero. */ assert( pBt->nPage>0 ); @@ -3846,11 +3871,11 @@ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } if( eOp ){ - if( (rc = sqlite3PagerWrite(pPage->pDbPage))!=SQLITE_OK ) return rc; + if( (rc = btreeMakePageWriteable(pPage))!=SQLITE_OK ) return rc; getCellInfo(pCur); aPayload = pCur->info.pCell + pCur->info.nHeader; memcpy(aPayload+offset, pBuf, a); }else{ memcpy(pBuf, aPayload+offset, a); @@ -3948,11 +3973,11 @@ */ if( eOp==0 /* (1) */ && offset==0 /* (2) */ && pBt->inTransaction==TRANS_READ /* (4) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ - && pBt->pPage1->aData[19]==0x01 /* (5) */ + && btreeGetData(pBt->pPage1)[19]==0x01 /* (5) */ ){ u8 aSave[4]; u8 *aWrite = &pBuf[-4]; memcpy(aSave, aWrite, 4); rc = sqlite3OsRead(fd, aWrite, a+4, pBt->pageSize * (nextPage-1)); @@ -4774,11 +4799,11 @@ } /* ** Allocate a new page from the database file. ** -** The new page is marked as dirty. (In other words, sqlite3PagerWrite() +** The new page is marked as dirty. (In other words, btreeMakePageWriteable() ** has already been called on the new page.) The new page has also ** been referenced and the calling routine is responsible for calling ** sqlite3PagerUnref() on the new page when it is done. ** ** SQLITE_OK is returned on success. Any other return value indicates @@ -4810,11 +4835,11 @@ Pgno mxPage; /* Total size of the database file */ assert( sqlite3_mutex_held(pBt->mutex) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); - n = get4byte(&pPage1->aData[36]); + n = get4byte(&btreeGetData(pPage1)[36]); testcase( n==mxPage-1 ); if( n>=mxPage ){ return SQLITE_CORRUPT_BKPT; } if( n>0 ){ @@ -4841,11 +4866,11 @@ #endif /* Decrement the free-list count by 1. Set iTrunk to the index of the ** first free-list trunk page. iPrevTrunk is initially 1. */ - rc = sqlite3PagerWrite(pPage1->pDbPage); + rc = btreeMakePageWriteable(pPage1); if( rc ) return rc; put4byte(&pPage1->aData[36], n-1); /* The code within this loop is run only once if the 'searchList' variable ** is not true. Otherwise, it runs once for each trunk-page on the @@ -4875,11 +4900,11 @@ if( k==0 && !searchList ){ /* The trunk has no leaves and the list is not being searched. ** So extract the trunk page itself and use it as the newly ** allocated page */ assert( pPrevTrunk==0 ); - rc = sqlite3PagerWrite(pTrunk->pDbPage); + rc = btreeMakePageWriteable(pTrunk); if( rc ){ goto end_allocate_page; } *pPgno = iTrunk; memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); @@ -4896,19 +4921,19 @@ ** to allocate, regardless of whether it has leaves. */ assert( *pPgno==iTrunk ); *ppPage = pTrunk; searchList = 0; - rc = sqlite3PagerWrite(pTrunk->pDbPage); + rc = btreeMakePageWriteable(pTrunk); if( rc ){ goto end_allocate_page; } if( k==0 ){ if( !pPrevTrunk ){ memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); }else{ - rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + rc = btreeMakePageWriteable(pPrevTrunk); if( rc!=SQLITE_OK ){ goto end_allocate_page; } memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); } @@ -4926,11 +4951,11 @@ testcase( iNewTrunk==mxPage ); rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } - rc = sqlite3PagerWrite(pNewTrunk->pDbPage); + rc = btreeMakePageWriteable(pNewTrunk); if( rc!=SQLITE_OK ){ releasePage(pNewTrunk); goto end_allocate_page; } memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); @@ -4939,11 +4964,11 @@ releasePage(pNewTrunk); if( !pPrevTrunk ){ assert( sqlite3PagerIswriteable(pPage1->pDbPage) ); put4byte(&pPage1->aData[32], iNewTrunk); }else{ - rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + rc = btreeMakePageWriteable(pPrevTrunk); if( rc ){ goto end_allocate_page; } put4byte(&pPrevTrunk->aData[0], iNewTrunk); } @@ -4983,20 +5008,21 @@ int noContent; *pPgno = iPage; TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" ": %d more free pages\n", *pPgno, closest+1, k, pTrunk->pgno, n-1)); - rc = sqlite3PagerWrite(pTrunk->pDbPage); + rc = btreeMakePageWriteable(pTrunk); if( rc ) goto end_allocate_page; + aData = pTrunk->aData; if( closestpDbPage); + rc = btreeMakePageWriteable(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); } } searchList = 0; @@ -5006,11 +5032,11 @@ pPrevTrunk = 0; }while( searchList ); }else{ /* There are no pages on the freelist, so create a new page at the ** end of the file */ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + rc = btreeMakePageWriteable(pBt->pPage1); if( rc ) return rc; pBt->nPage++; if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; #ifndef SQLITE_OMIT_AUTOVACUUM @@ -5022,11 +5048,11 @@ MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); rc = btreeGetPage(pBt, pBt->nPage, &pPg, 1); if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pPg->pDbPage); + rc = btreeMakePageWriteable(pPg); releasePage(pPg); } if( rc ) return rc; pBt->nPage++; if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } @@ -5036,11 +5062,11 @@ *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); rc = btreeGetPage(pBt, *pPgno, ppPage, 1); if( rc ) return rc; - rc = sqlite3PagerWrite((*ppPage)->pDbPage); + rc = btreeMakePageWriteable(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); } TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); } @@ -5093,21 +5119,21 @@ }else{ pPage = btreePageLookup(pBt, iPage); } /* Increment the free page count on pPage1 */ - rc = sqlite3PagerWrite(pPage1->pDbPage); + rc = btreeMakePageWriteable(pPage1); if( rc ) goto freepage_out; nFree = get4byte(&pPage1->aData[36]); put4byte(&pPage1->aData[36], nFree+1); if( pBt->secureDelete ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) - || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) + || ((rc = btreeMakePageWriteable(pPage))!=0) ){ goto freepage_out; } memset(pPage->aData, 0, pPage->pBt->pageSize); } @@ -5155,11 +5181,11 @@ ** we will continue to restrict the number of entries to usableSize/4 - 8 ** for now. At some point in the future (once everyone has upgraded ** to 3.6.0 or later) we should consider fixing the conditional above ** to read "usableSize/4-2" instead of "usableSize/4-8". */ - rc = sqlite3PagerWrite(pTrunk->pDbPage); + rc = btreeMakePageWriteable(pTrunk); if( rc==SQLITE_OK ){ put4byte(&pTrunk->aData[4], nLeaf+1); put4byte(&pTrunk->aData[8+nLeaf*4], iPage); if( pPage && !pBt->secureDelete ){ sqlite3PagerDontWrite(pPage->pDbPage); @@ -5178,11 +5204,11 @@ ** will become the new first trunk page in the free-list. */ if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ goto freepage_out; } - rc = sqlite3PagerWrite(pPage->pDbPage); + rc = btreeMakePageWriteable(pPage); if( rc!=SQLITE_OK ){ goto freepage_out; } put4byte(pPage->aData, iTrunk); put4byte(&pPage->aData[4], 0); @@ -5538,11 +5564,11 @@ j = pPage->nOverflow++; assert( j<(int)(sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0])) ); pPage->aOvfl[j].pCell = pCell; pPage->aOvfl[j].idx = (u16)i; }else{ - int rc = sqlite3PagerWrite(pPage->pDbPage); + int rc = btreeMakePageWriteable(pPage); if( rc!=SQLITE_OK ){ *pRC = rc; return; } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); @@ -6207,11 +6233,11 @@ for(i=0; ipDbPage); + rc = btreeMakePageWriteable(pNew); nNew++; if( rc ) goto balance_cleanup; }else{ assert( i>0 ); rc = allocateBtreePage(pBt, &pNew, &pgno, pgno, 0); @@ -6536,11 +6562,11 @@ /* Make pRoot, the root page of the b-tree, writable. Allocate a new ** page that will become the new right-child of pPage. Copy the contents ** of the node stored on pRoot into the new child page. */ - rc = sqlite3PagerWrite(pRoot->pDbPage); + rc = btreeMakePageWriteable(pRoot); if( rc==SQLITE_OK ){ rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); copyNodeContent(pRoot, pChild, &rc); if( ISAUTOVACUUM ){ ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); @@ -6614,11 +6640,11 @@ break; }else{ MemPage * const pParent = pCur->apPage[iPage-1]; int const iIdx = pCur->aiIdx[iPage-1]; - rc = sqlite3PagerWrite(pParent->pDbPage); + rc = btreeMakePageWriteable(pParent); if( rc==SQLITE_OK ){ #ifndef SQLITE_OMIT_QUICKBALANCE if( pPage->hasData && pPage->nOverflow==1 && pPage->aOvfl[0].idx==pPage->nCell @@ -6791,11 +6817,11 @@ assert( szNew <= MX_CELL_SIZE(pBt) ); idx = pCur->aiIdx[pCur->iPage]; if( loc==0 ){ u16 szOld; assert( idxnCell ); - rc = sqlite3PagerWrite(pPage->pDbPage); + rc = btreeMakePageWriteable(pPage); if( rc ){ goto end_insert; } oldCell = findCell(pPage, idx); if( !pPage->leaf ){ @@ -6907,11 +6933,11 @@ ** deleted writable. Then free any overflow pages associated with the ** entry and finally remove the cell itself from within the page. */ rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); if( rc ) return rc; - rc = sqlite3PagerWrite(pPage->pDbPage); + rc = btreeMakePageWriteable(pPage); if( rc ) return rc; rc = clearCell(pPage, pCell); dropCell(pPage, iCellIdx, cellSizePtr(pPage, pCell), &rc); if( rc ) return rc; @@ -6931,11 +6957,11 @@ assert( MX_CELL_SIZE(pBt) >= nCell ); allocateTempSpace(pBt); pTmp = pBt->pTmpSpace; - rc = sqlite3PagerWrite(pLeaf->pDbPage); + rc = btreeMakePageWriteable(pLeaf); insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); if( rc ) return rc; } @@ -7068,11 +7094,11 @@ } rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); if( rc!=SQLITE_OK ){ return rc; } - rc = sqlite3PagerWrite(pRoot->pDbPage); + rc = btreeMakePageWriteable(pRoot); if( rc!=SQLITE_OK ){ releasePage(pRoot); return rc; } }else{ @@ -7160,11 +7186,11 @@ assert( pPage->intKey ); *pnChange += pPage->nCell; } if( freePageFlag ){ freePage(pPage, &rc); - }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ + }else if( (rc = btreeMakePageWriteable(pPage))==0 ){ zeroPage(pPage, pPage->aData[0] | PTF_LEAF); } cleardatabasepage_out: releasePage(pPage); @@ -7355,11 +7381,11 @@ assert( p->inTrans>TRANS_NONE ); assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) ); assert( pBt->pPage1 ); assert( idx>=0 && idx<=15 ); - *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); + *pMeta = get4byte(&btreeGetData(pBt->pPage1)[36 + idx*4]); /* If auto-vacuum is disabled in this build and this is an auto-vacuum ** database, mark the database as read-only. */ #ifdef SQLITE_OMIT_AUTOVACUUM if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ) pBt->readOnly = 1; @@ -7377,11 +7403,11 @@ int rc; assert( idx>=1 && idx<=15 ); sqlite3BtreeEnter(p); assert( p->inTrans==TRANS_WRITE ); assert( pBt->pPage1!=0 ); - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + rc = btreeMakePageWriteable(pBt->pPage1); if( rc==SQLITE_OK ){ put4byte(&pBt->pPage1->aData[36 + idx*4], iMeta); #ifndef SQLITE_OMIT_AUTOVACUUM if( idx==BTREE_INCR_VACUUM ){ assert( pBt->autoVacuum || iMeta==0 ); @@ -7915,10 +7941,11 @@ sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000); sCheck.errMsg.useMalloc = 2; /* Check the integrity of the freelist */ + btreeGetData(pBt->pPage1); checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), get4byte(&pBt->pPage1->aData[36]), "Main freelist: "); /* Check all the tables. */ @@ -8202,17 +8229,17 @@ */ pBt->doNotUseWAL = (u8)(iVersion==1); rc = sqlite3BtreeBeginTrans(pBtree, 0); if( rc==SQLITE_OK ){ - u8 *aData = pBt->pPage1->aData; + u8 *aData = btreeGetData(pBt->pPage1); if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ rc = sqlite3BtreeBeginTrans(pBtree, 2); if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + rc = btreeMakePageWriteable(pBt->pPage1); if( rc==SQLITE_OK ){ - aData = pBt->pPage1->aData; + aData = btreeGetData(pBt->pPage1); aData[18] = (u8)iVersion; aData[19] = (u8)iVersion; } } } Index: src/os.c ================================================================== --- src/os.c +++ src/os.c @@ -196,10 +196,56 @@ rc = pVfs->xCurrentTime(pVfs, &r); *pTimeOut = (sqlite3_int64)(r*86400000.0); } return rc; } + +/* Attempt to map all or part of a file into memory. VFSes are not +** required to implement this. The VFS might be an older version (less then +** 3) that does not have an xMap pointer. Or the xMap pointer might be NULL. +*/ +int sqlite3OsMap( + sqlite3_file *pFile, /* The file to be mapped into memory */ + sqlite3_int64 ofst, /* Index of the first byte to map */ + sqlite3_int64 len, /* Number of bytes to be mapped */ + int mmapFlags, /* Map control flags */ + void **ppMemObj, /* Write a mapping object here */ + void **ppMem /* Write the start of the mapped file here */ +){ + int rc; + + /* The current implementation only does read-only mmap. This could change + ** in the future. */ + assert( mmapFlags==SQLITE_OPEN_READONLY ); + + /* The current implementation currently only maps the whole file. This + ** could change in the future. */ + assert( ofst==0 ); + + if( pFile->pMethods==0 || pFile->pMethods->iVersion<3 + || pFile->pMethods->xMap==0 ){ + *ppMemObj = 0; + *ppMem = 0; + rc = SQLITE_CANTOPEN; + }else{ + rc = pFile->pMethods->xMap(pFile, ofst, len, mmapFlags, ppMemObj, ppMem); + } + return rc; +} + +/* Undo a mapping. +** +** The pMemObj parameter will have been obtained by a prior call to +** sqlite3OsMap(). So if pMemObj is not NULL, we know that the current +** VFS does support xMap and xUnmap. +*/ +int sqlite3OsUnmap(sqlite3_file *pFile, void *pMemObj){ + int rc = SQLITE_OK; + if( pMemObj ) rc = pFile->pMethods->xUnmap(pFile, pMemObj); + return rc; +} + int sqlite3OsOpenMalloc( sqlite3_vfs *pVfs, const char *zFile, sqlite3_file **ppFile, Index: src/os.h ================================================================== --- src/os.h +++ src/os.h @@ -249,10 +249,12 @@ int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmUnmap(sqlite3_file *id, int); +int sqlite3OsMap(sqlite3_file*,sqlite3_int64,sqlite3_int64,int,void**,void**); +int sqlite3OsUnmap(sqlite3_file*,void*); /* ** Functions for accessing sqlite3_vfs methods */ int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -117,13 +117,11 @@ #include #include #include #include #include -#ifndef SQLITE_OMIT_WAL #include -#endif #if SQLITE_ENABLE_LOCKING_STYLE # include # if OS_VXWORKS # include @@ -410,10 +408,18 @@ { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#define osMmap ((int(*)(void*,size_t,int,int,int,off_t))aSyscall[20].pCurrent) + + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#define osMunmap ((int(*)(void*,size_t))aSyscall[21].pCurrent) + + }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the @@ -4226,10 +4232,67 @@ # define unixShmMap 0 # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ + +/* +** An object used to record enough information about a file mapping to +** undo that mapping. +*/ +struct unixMapping { + sqlite3_int64 len; + void *p; +}; + +/* +** Try to map some or all of a file into memory +*/ +static int unixMap( + sqlite3_file *pFile, /* File to be mapped */ + sqlite3_int64 ofst, /* Offset of start of section to be mapped */ + sqlite3_int64 len, /* Length of the section to be mapped */ + int mmapFlags, /* Flags controlling the mapping */ + void **ppMapObj, /* Write here an object to undo the mapping */ + void **ppMem /* Write here a pointer to the mapped file */ +){ + struct unixMapping *pNew; + unixFile *pUFile = (unixFile*)pFile; + + assert( mmapFlags==SQLITE_OPEN_READONLY ); + sqlite3BeginBenignMalloc(); + pNew = sqlite3_malloc( sizeof(*pNew) ); + sqlite3EndBenignMalloc(); + if( pNew==0 ){ + *ppMapObj = 0; + *ppMem = 0; + return SQLITE_CANTOPEN; + } + pNew->len = len; + pNew->p = *ppMem = mmap(0, len, PROT_READ, MAP_SHARED, pUFile->h, 0); + if( pNew->p==0 ){ + sqlite3_free(pNew); + return SQLITE_CANTOPEN; + }else{ + *ppMapObj = pNew; + return SQLITE_OK; + } +} + +/* +** Undo a prior memory mapping. +*/ +static int unixUnmap( + sqlite3_file *pFile, + void *pMapObj +){ + struct unixMapping *pMap = (struct unixMapping*)pMapObj; + assert( pMap!=0 ); + munmap(pMap->p, pMap->len); + sqlite3_free(pMap); + return SQLITE_OK; +} /* ** Here ends the implementation of all sqlite3_file methods. ** ********************** End sqlite3_file Methods ******************************* @@ -4267,13 +4330,13 @@ ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. ** ** * An I/O method finder function called FINDER that returns a pointer ** to the METHOD object in the previous bullet. */ -#define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK) \ +#define IOMETHODS(FINDER, METHOD, CLOSE, LOCK, UNLOCK, CKLOCK, SHMMAP) \ static const sqlite3_io_methods METHOD = { \ - VERSION, /* iVersion */ \ + 3, /* iVersion */ \ CLOSE, /* xClose */ \ unixRead, /* xRead */ \ unixWrite, /* xWrite */ \ unixTruncate, /* xTruncate */ \ unixSync, /* xSync */ \ @@ -4282,14 +4345,16 @@ UNLOCK, /* xUnlock */ \ CKLOCK, /* xCheckReservedLock */ \ unixFileControl, /* xFileControl */ \ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics, /* xDeviceCapabilities */ \ - unixShmMap, /* xShmMap */ \ + SHMMAP, /* xShmMap */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmUnmap /* xShmUnmap */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixMap, /* xMap */ \ + unixUnmap /* xUnmap */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ return &METHOD; \ } \ @@ -4302,68 +4367,68 @@ ** are also created. */ IOMETHODS( posixIoFinder, /* Finder function name */ posixIoMethods, /* sqlite3_io_methods object name */ - 2, /* shared memory is enabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ unixUnlock, /* xUnlock method */ - unixCheckReservedLock /* xCheckReservedLock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + unixShmMap /* Shared memory enabled */ ) IOMETHODS( nolockIoFinder, /* Finder function name */ nolockIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ nolockClose, /* xClose method */ nolockLock, /* xLock method */ nolockUnlock, /* xUnlock method */ - nolockCheckReservedLock /* xCheckReservedLock method */ + nolockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) IOMETHODS( dotlockIoFinder, /* Finder function name */ dotlockIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ dotlockClose, /* xClose method */ dotlockLock, /* xLock method */ dotlockUnlock, /* xUnlock method */ - dotlockCheckReservedLock /* xCheckReservedLock method */ + dotlockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS IOMETHODS( flockIoFinder, /* Finder function name */ flockIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ flockClose, /* xClose method */ flockLock, /* xLock method */ flockUnlock, /* xUnlock method */ - flockCheckReservedLock /* xCheckReservedLock method */ + flockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #endif #if OS_VXWORKS IOMETHODS( semIoFinder, /* Finder function name */ semIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ semClose, /* xClose method */ semLock, /* xLock method */ semUnlock, /* xUnlock method */ - semCheckReservedLock /* xCheckReservedLock method */ + semCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #endif #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE IOMETHODS( afpIoFinder, /* Finder function name */ afpIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ afpClose, /* xClose method */ afpLock, /* xLock method */ afpUnlock, /* xUnlock method */ - afpCheckReservedLock /* xCheckReservedLock method */ + afpCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #endif /* ** The proxy locking method is a "super-method" in the sense that it @@ -4380,28 +4445,28 @@ static int proxyUnlock(sqlite3_file*, int); static int proxyCheckReservedLock(sqlite3_file*, int*); IOMETHODS( proxyIoFinder, /* Finder function name */ proxyIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ proxyClose, /* xClose method */ proxyLock, /* xLock method */ proxyUnlock, /* xUnlock method */ - proxyCheckReservedLock /* xCheckReservedLock method */ + proxyCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #endif /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE IOMETHODS( nfsIoFinder, /* Finder function name */ nfsIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ nfsUnlock, /* xUnlock method */ - unixCheckReservedLock /* xCheckReservedLock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + 0 /* Shared memory disabled */ ) #endif #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* @@ -6769,11 +6834,11 @@ }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==20 ); + assert( ArraySize(aSyscall)==22 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -653,10 +653,13 @@ i64 journalHdr; /* Byte offset to previous journal header */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ PagerSavepoint *aSavepoint; /* Array of active savepoints */ int nSavepoint; /* Number of elements in aSavepoint[] */ char dbFileVers[16]; /* Changes whenever database file changes */ + u8 *aFileContent; /* File mapped into memory */ + sqlite3_int64 nFileContent; /* Bytes of memory mapped into aFileContent */ + void *pMapObject; /* Used to unmap the file */ /* ** End of the routinely-changing class members ***************************************************************************/ u16 nExtra; /* Add this many bytes to each in-memory page */ @@ -1721,10 +1724,11 @@ ** or not, any journal file left in the file-system will be treated ** as a hot-journal and rolled back the next time a read-transaction ** is opened (by this or by any other connection). */ static void pager_unlock(Pager *pPager){ + PgHdr *pPg; assert( pPager->eState==PAGER_READER || pPager->eState==PAGER_OPEN || pPager->eState==PAGER_ERROR ); @@ -1791,10 +1795,24 @@ } pPager->journalOff = 0; pPager->journalHdr = 0; pPager->setMaster = 0; + + pPg = 0; + sqlite3PcacheFetch(pPager->pPCache, 1, 0, &pPg); + if( pPg ){ + /* assert( sqlite3PcachePagecount(pPager->pPCache)==1 ); */ + pPg->pData = pPg->pBuf; + sqlite3PcacheRelease(pPg); + }else{ + /*assert( sqlite3PcachePagecount(pPager->pPCache)==0 );*/ + } + sqlite3OsUnmap(pPager->fd, pPager->pMapObject); + pPager->pMapObject = 0; + pPager->aFileContent = 0; + pPager->nFileContent = 0; } /* ** This function is called whenever an IOERR or FULL error that requires ** the pager to transition into the ERROR state may ahve occurred. @@ -2798,26 +2816,25 @@ int isInWal = 0; /* True if page is in log file */ int pgsz = pPager->pageSize; /* Number of bytes to read */ assert( pPager->eState>=PAGER_READER && !MEMDB ); assert( isOpen(pPager->fd) ); - - if( NEVER(!isOpen(pPager->fd)) ){ - assert( pPager->tempFile ); - memset(pPg->pData, 0, pPager->pageSize); - return SQLITE_OK; - } + assert( pPg->pBuf==pPg->pData ); if( pagerUseWal(pPager) ){ /* Try to pull the page from the write-ahead log. */ rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData); } if( rc==SQLITE_OK && !isInWal ){ - i64 iOffset = (pgno-1)*(i64)pPager->pageSize; - rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); - if( rc==SQLITE_IOERR_SHORT_READ ){ - rc = SQLITE_OK; + i64 iOffset = (pgno-1)*(i64)pgsz; + if( iOffset+pgsz <= pPager->nFileContent ){ + pPg->pData = &pPager->aFileContent[iOffset]; + }else{ + rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } } } if( pgno==1 ){ if( rc ){ @@ -4890,10 +4907,17 @@ if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ rc = pagerPagecount(pPager, &pPager->dbSize); } + assert( pPager->aFileContent==0 ); + pPager->nFileContent = pPager->dbSize*(sqlite3_int64)pPager->pageSize; + sqlite3OsMap(pPager->fd, 0, pPager->nFileContent, + SQLITE_OPEN_READONLY, (void**)&pPager->pMapObject, + (void**)&pPager->aFileContent); + if( pPager->aFileContent==0 ) pPager->nFileContent = 0; + failed: if( rc!=SQLITE_OK ){ assert( !MEMDB ); pager_unlock(pPager); assert( pPager->eState==PAGER_OPEN ); @@ -5274,10 +5298,20 @@ } PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); return rc; } + +/* +** Make a copy of page content into malloced space. +*/ +void makePageWriteable(Pager *pPager, PgHdr *pPg){ + if( pPg->pData!=pPg->pBuf ){ + memcpy(pPg->pBuf, pPg->pData, pPager->pageSize); + pPg->pData = pPg->pBuf; + } +} /* ** Mark a single data page as writeable. The page is written into the ** main journal or sub-journal as required. If the page is written into ** one of the journals, the corresponding bit is set in the @@ -5322,10 +5356,14 @@ rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; } assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); assert( assert_pager_state(pPager) ); + + /* Make sure page content is held in malloced memory */ + makePageWriteable(pPager, pPg); + pData = pPg->pData; /* Mark the page as dirty. If the page has already been written ** to the journal then we can return right away. */ sqlite3PcacheMakeDirty(pPg); @@ -5523,15 +5561,13 @@ /* ** Return TRUE if the page given in the argument was previously passed ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok ** to change the content of the page. */ -#ifndef NDEBUG int sqlite3PagerIswriteable(DbPage *pPg){ return pPg->flags&PGHDR_DIRTY; } -#endif /* ** A call to this routine tells the pager that it is not necessary to ** write the information on page pPg back to the disk, even though ** that page might be marked as dirty. This happens, for example, when @@ -6417,10 +6453,11 @@ sqlite3PcacheDrop(pPgOld); } } origPgno = pPg->pgno; + makePageWriteable(pPager, pPg); sqlite3PcacheMove(pPg, pgno); sqlite3PcacheMakeDirty(pPg); /* For an in-memory database, make sure the original page continues ** to exist, in case the transaction needs to roll back. Use pPgOld @@ -6466,13 +6503,13 @@ #endif /* ** Return a pointer to the data for the specified page. */ -void *sqlite3PagerGetData(DbPage *pPg){ +u8 *sqlite3PagerGetData(DbPage *pPg){ assert( pPg->nRef>0 || pPg->pPager->memDb ); - return pPg->pData; + return (u8*)pPg->pData; } /* ** Return a pointer to the Pager.nExtra bytes of "extra" space ** allocated along with the specified page. Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -121,11 +121,11 @@ /* Operations on page references. */ int sqlite3PagerWrite(DbPage*); void sqlite3PagerDontWrite(DbPage*); int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int); int sqlite3PagerPageRefcount(DbPage*); -void *sqlite3PagerGetData(DbPage *); +u8 *sqlite3PagerGetData(DbPage *); void *sqlite3PagerGetExtra(DbPage *); /* Functions used to manage pager transactions and savepoints. */ void sqlite3PagerPagecount(Pager*, int*); int sqlite3PagerBegin(Pager*, int exFlag, int); @@ -166,12 +166,12 @@ #endif /* Functions to support testing and debugging. */ #if !defined(NDEBUG) || defined(SQLITE_TEST) Pgno sqlite3PagerPagenumber(DbPage*); - int sqlite3PagerIswriteable(DbPage*); #endif +int sqlite3PagerIswriteable(DbPage*); #ifdef SQLITE_TEST int *sqlite3PagerStats(Pager*); void sqlite3PagerRefdump(Pager*); void disable_simulated_io_errors(void); void enable_simulated_io_errors(void); Index: src/pcache.c ================================================================== --- src/pcache.c +++ src/pcache.c @@ -129,11 +129,11 @@ PCache *pCache = p->pCache; if( pCache->bPurgeable ){ if( p->pgno==1 ){ pCache->pPage1 = 0; } - sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, p->pPage, 0); + sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, p->pPage, 1); } } /*************************************************** General Interfaces ****** ** @@ -285,19 +285,19 @@ pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ memset(pPgHdr, 0, sizeof(PgHdr)); pPgHdr->pPage = pPage; - pPgHdr->pData = pPage->pBuf; + pPgHdr->pData = pPgHdr->pBuf = pPage->pBuf; pPgHdr->pExtra = (void *)&pPgHdr[1]; memset(pPgHdr->pExtra, 0, pCache->szExtra); pPgHdr->pCache = pCache; pPgHdr->pgno = pgno; } assert( pPgHdr->pCache==pCache ); assert( pPgHdr->pgno==pgno ); - assert( pPgHdr->pData==pPage->pBuf ); + assert( pPgHdr->pBuf==pPage->pBuf ); assert( pPgHdr->pExtra==(void *)&pPgHdr[1] ); if( 0==pPgHdr->nRef ){ pCache->nRef++; } Index: src/pcache.h ================================================================== --- src/pcache.h +++ src/pcache.h @@ -22,11 +22,12 @@ ** Every page in the cache is controlled by an instance of the following ** structure. */ struct PgHdr { sqlite3_pcache_page *pPage; /* Pcache object page handle */ - void *pData; /* Page data */ + void *pData; /* Page data to actually use */ + void *pBuf; /* Malloced buffer to hold pData */ void *pExtra; /* Extra content */ PgHdr *pDirty; /* Transient list of dirty pages */ Pgno pgno; /* Page number for this page */ Pager *pPager; /* The pager this page is part of */ #ifdef SQLITE_CHECK_PAGES Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -687,10 +687,14 @@ int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmUnmap)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ + int (*xMap)(sqlite3_file*, sqlite3_int64 ofst, sqlite3_int64 len, + int mmapFlags, void **ppMemObj, void **ppMem); + int (*xUnmap)(sqlite3_file*, void *pMemObj); + /* Methods above are valid for version 3 */ /* Additional methods may be added in future releases */ }; /* ** CAPI3REF: Standard File Control Opcodes Index: test/syscall.test ================================================================== --- test/syscall.test +++ test/syscall.test @@ -58,10 +58,11 @@ # foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate pread64 pwrite64 unlink openDirectory mkdir rmdir + mmap munmap } { if {[test_syscall exists $s]} {lappend syscall_list $s} } do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]