Index: ext/fts5/fts5.c ================================================================== --- ext/fts5/fts5.c +++ ext/fts5/fts5.c @@ -15,10 +15,18 @@ #if defined(SQLITE_ENABLE_FTS5) #include "fts5Int.h" +/* +** This variable is set to true when running corruption tests. Otherwise +** false. If it is false, extra assert() conditions in the fts5 code are +** activated - conditions that are only true if it is guaranteed that the +** fts5 database is not corrupt. +*/ +int sqlite3_fts5_may_be_corrupt = 0; + typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -42,11 +42,11 @@ /* ** The assert_nc() macro is similar to the assert() macro, except that it ** is used for assert() conditions that are true only if it can be ** guranteed that the database is not corrupt. */ -#ifdef SQLITE_TEST +#ifdef SQLITE_DEBUG extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else # define assert_nc(x) assert(x) #endif @@ -113,11 +113,11 @@ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; }; /* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 1 +#define FTS5_CURRENT_VERSION 2 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -200,19 +200,37 @@ ** is an associated index-by-rowid record. ** * the number of zero-term leaves as a varint. ** ** 5. Segment doclist indexes: ** -** A list of varints. If the first termless page contains at least one -** docid, the list begins with that docid as a varint followed by the -** value 1 (0x01). Or, if the first termless page contains no docids, -** a varint containing the last docid stored on the term page followed -** by a 0 (0x00) value. +** Doclist indexes are themselves b-trees, however they usually consist of +** a single leaf record only. The format of each doclist index leaf page +** is: ** -** For each subsequent page in the doclist, either a 0x00 byte if the -** page contains no terms, or a delta-encoded docid (always +ve) -** representing the first docid on the page otherwise. +** * Flags byte. Bits are: +** 0x01: Clear if leaf is also the root page, otherwise set. +** +** * Page number of fts index leaf page. As a varint. +** +** * First docid on page indicated by previous field. As a varint. +** +** * A list of varints, one for each subsequent termless page. A +** positive delta if the termless page contains at least one docid, +** or an 0x00 byte otherwise. +** +** Internal doclist index nodes are: +** +** * Flags byte. Bits are: +** 0x01: Clear for root page, otherwise set. +** +** * Page number of first child page. As a varint. +** +** * Copy of first docid on page indicated by previous field. As a varint. +** +** * A list of delta-encoded varints - the first docid on each subsequent +** child page. +** */ /* ** Rowids for the averages and structure records in the %_data table. */ @@ -238,37 +256,46 @@ ** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used ** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns ** SQLITE_FULL and fails the current operation if they ever prove too small. */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ +#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ -#define FTS5_SEGMENT_ROWID(segid, height, pgno) ( \ - ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ +#define fts5_dri(segid, dlidx, height, pgno) ( \ + ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ + ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) +#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) +#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) + +#if 0 /* ** The height of segment b-trees is actually limited to one less than ** (1<term); } /* -** The iterator passed as the first argument has the following fields set -** as follows. This function sets up the rest of the iterator so that it -** points to the first rowid in the doclist-index. -** -** pData: pointer to doclist-index record, -** iLeafPgno: page number that this doclist-index is associated with. -** -** When this function is called pIter->iLeafPgno is the page number the -** doclist is associated with (the one featuring the term). +** Advance the iterator passed as the only argument. If the end of the +** doclist-index page is reached, return non-zero. */ -static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int i; - int bPresent; - - assert( pIter->pData ); - assert( pIter->iLeafPgno>0 ); - - /* Read the first rowid value. And the "present" flag that follows it. */ - pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); - bPresent = pData->p[pIter->iOff++]; - if( bPresent ){ - i = 0; +static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ + Fts5Data *pData = pLvl->pData; + + if( pLvl->iOff==0 ){ + assert( pLvl->bEof==0 ); + pLvl->iOff = 1; + pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); + pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); + pLvl->iFirstOff = pLvl->iOff; }else{ - /* Count the number of leading 0x00 bytes. */ - for(i=1; pIter->iOffn; i++){ - if( pData->p[pIter->iOff] ) break; - pIter->iOff++; + int iOff; + for(iOff=pLvl->iOff; iOffn; iOff++){ + if( pData->p[iOff] ) break; } - /* Unless we are already at the end of the doclist-index, load the first - ** rowid value. */ - if( pIter->iOffn ){ + if( iOffn ){ i64 iVal; - pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); - pIter->iRowid += iVal; + pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + pLvl->iRowid += iVal; + pLvl->iOff = iOff; }else{ - pIter->bEof = 1; + pLvl->bEof = 1; } } - pIter->iLeafPgno += (i+1); - pIter->iFirstOff = pIter->iOff; - return pIter->bEof; + return pLvl->bEof; } /* ** Advance the iterator passed as the only argument. */ -static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int iOff; - - for(iOff=pIter->iOff; iOffn; iOff++){ - if( pData->p[iOff] ) break; - } - - if( iOffn ){ - i64 iVal; - pIter->iLeafPgno += (iOff - pIter->iOff) + 1; - iOff += getVarint(&pData->p[iOff], (u64*)&iVal); - pIter->iRowid += iVal; - pIter->iOff = iOff; - }else{ - pIter->bEof = 1; - } - - return pIter->bEof; -} +static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; + + assert( iLvlnLvl ); + if( fts5DlidxLvlNext(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterNextR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ) fts5DlidxLvlNext(pLvl); + } + } + } + + return pIter->aLvl[0].bEof; +} +static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterNextR(p, pIter, 0); +} + +/* +** The iterator passed as the first argument has the following fields set +** as follows. This function sets up the rest of the iterator so that it +** points to the first rowid in the doclist-index. +** +** pData: +** pointer to doclist-index record, +** +** When this function is called pIter->iLeafPgno is the page number the +** doclist is associated with (the one featuring the term). +*/ +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ + int i; + for(i=0; inLvl; i++){ + fts5DlidxLvlNext(&pIter->aLvl[i]); + } + return pIter->aLvl[0].bEof; +} + static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ - return pIter->bEof; + return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; } -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ - if( fts5DlidxIterFirst(pIter)==0 ){ - while( 0==fts5DlidxIterNext(pIter) ); - pIter->bEof = 0; +static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ + int i; + + /* Advance each level to the last entry on the last page */ + for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + + if( i>0 ){ + Fts5DlidxLvl *pChild = &pLvl[-1]; + fts5DataRelease(pChild->pData); + memset(pChild, 0, sizeof(Fts5DlidxLvl)); + pChild->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) + ); + } } } -static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ - int iOff = pIter->iOff; +/* +** Move the iterator passed as the only argument to the previous entry. +*/ +static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ + int iOff = pLvl->iOff; - assert( pIter->bEof==0 ); - if( iOff<=pIter->iFirstOff ){ - pIter->bEof = 1; + assert( pLvl->bEof==0 ); + if( iOff<=pLvl->iFirstOff ){ + pLvl->bEof = 1; }else{ - u8 *a = pIter->pData->p; + u8 *a = pLvl->pData->p; i64 iVal; int iLimit; + int ii; + int nZero = 0; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous ** varint. Taking care not to read any memory locations that occur ** before the buffer in memory. */ @@ -1508,61 +1576,128 @@ for(iOff--; iOff>iLimit; iOff--){ if( (a[iOff-1] & 0x80)==0 ) break; } getVarint(&a[iOff], (u64*)&iVal); - pIter->iRowid -= iVal; - pIter->iLeafPgno--; - - /* Skip backwards passed any 0x00 bytes. */ - while( iOff>pIter->iFirstOff - && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 - ){ - iOff--; - pIter->iLeafPgno--; - } - pIter->iOff = iOff; + pLvl->iRowid -= iVal; + pLvl->iLeafPgno--; + + /* Skip backwards past any 0x00 varints. */ + for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ + nZero++; + } + if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ + /* The byte immediately before the last 0x00 byte has the 0x80 bit + ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 + ** bytes before a[ii]. */ + int bZero = 0; /* True if last 0x00 counts */ + if( (ii-8)>=pLvl->iFirstOff ){ + int j; + for(j=1; j<=8 && (a[ii-j] & 0x80); j++); + bZero = (j>8); + } + if( bZero==0 ) nZero--; + } + pLvl->iLeafPgno -= nZero; + pLvl->iOff = iOff - nZero; } - return pIter->bEof; + return pLvl->bEof; +} + +static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; + + assert( iLvlnLvl ); + if( fts5DlidxLvlPrev(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterPrevR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ){ + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + } + } + } + } + + return pIter->aLvl[0].bEof; +} +static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterPrevR(p, pIter, 0); +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + int i; + for(i=0; inLvl; i++){ + fts5DataRelease(pIter->aLvl[i].pData); + } + sqlite3_free(pIter); + } } static Fts5DlidxIter *fts5DlidxIterInit( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ - Fts5DlidxIter *pIter; - - pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); - if( pIter==0 ) return 0; - - pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg)); - if( pIter->pData==0 ){ - sqlite3_free(pIter); - pIter = 0; - }else{ - pIter->iLeafPgno = iLeafPg; + Fts5DlidxIter *pIter = 0; + int i; + int bDone = 0; + + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); + Fts5DlidxIter *pNew; + + pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); + Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; + pIter = pNew; + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ + bDone = 1; + } + pIter->nLvl = i+1; + } + } + + if( p->rc==SQLITE_OK ){ + pIter->iSegid = iSegid; if( bRev==0 ){ fts5DlidxIterFirst(pIter); }else{ - fts5DlidxIterLast(pIter); + fts5DlidxIterLast(p, pIter); } } + + if( p->rc!=SQLITE_OK ){ + fts5DlidxIterFree(pIter); + pIter = 0; + } return pIter; } -/* -** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). -*/ -static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ - if( pIter ){ - fts5DataRelease(pIter->pData); - sqlite3_free(pIter); - } +static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iRowid; +} +static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iLeafPgno; } static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ *piRowid = (int)fts5GetU16(&pLeaf->p[0]); *piTerm = (int)fts5GetU16(&pLeaf->p[2]); @@ -1938,11 +2073,11 @@ if( pDlidx ){ /* If the doclist-iterator is already at EOF, then the current doclist ** contains no entries except those on the current page. */ if( fts5DlidxIterEof(p, pDlidx)==0 ){ int iSegid = pIter->pSeg->iSegid; - pgnoLast = pDlidx->iLeafPgno; + pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); } }else{ @@ -2344,34 +2479,38 @@ return 0; } /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. -** It is an error if leaf iLeafPgno contains no rowid. +** It is an error if leaf iLeafPgno does not exist or contains no rowids. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); - pIter->iLeafPgno = iLeafPgno-1; - fts5SegIterNextPage(p, pIter); - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); - - if( p->rc==SQLITE_OK ){ - int iOff; - u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; - - iOff = fts5GetU16(&a[0]); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - }else{ - iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - fts5SegIterLoadNPos(p, pIter); + if( iLeafPgno>pIter->pSeg->pgnoLast ){ + p->rc = FTS5_CORRUPT; + }else{ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); + } } } } /* @@ -2392,25 +2531,25 @@ assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx ); assert( pIter->pLeaf ); if( bRev==0 ){ - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ - iLeafPgno = pDlidx->iLeafPgno; - fts5DlidxIterNext(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ + iLeafPgno = fts5DlidxIterPgno(pDlidx); + fts5DlidxIterNext(p, pDlidx); } - assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); bMove = 0; } }else{ assert( iMatchiRowid ); - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ - fts5DlidxIterPrev(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatchiLeafPgno; + iLeafPgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); if( iLeafPgnoiLeafPgno ){ pIter->iLeafPgno = iLeafPgno+1; @@ -2801,10 +2940,57 @@ for(i=0; inDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); + for(i=0; inDlidx; i++){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + if( pDlidx->buf.n==0 ) break; + if( bFlush ){ + assert( pDlidx->pgno!=0 ); + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + } + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + } +} + +/* +** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. +** Any new array elements are zeroed before returning. +*/ +static int fts5WriteDlidxGrow( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nLvl +){ + if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ + Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( + pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl + ); + if( aDlidx==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); + memset(&aDlidx[pWriter->nDlidx], 0, nByte); + pWriter->aDlidx = aDlidx; + pWriter->nDlidx = nLvl; + } + } + return p->rc; +} /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. */ @@ -2811,27 +2997,26 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - i64 iKey = FTS5_DOCLIST_IDX_ROWID( - pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty - ); - assert( pWriter->cdlidx.n>0 ); - fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); + + /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written + ** to the database, also write the doclist-index to disk. */ + if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ bFlag = 1; } + fts5WriteDlidxClear(p, pWriter, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); pWriter->nEmpty = 0; + }else{ + fts5WriteDlidxClear(p, pWriter, 0); } - /* Whether or not it was written to disk, zero the doclist index at this - ** point */ - sqlite3Fts5BufferZero(&pWriter->cdlidx); - pWriter->bDlidxPrevValid = 0; + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *aNew; @@ -2898,47 +3083,102 @@ break; } } } +/* +** This function is called when flushing a leaf page that contains no +** terms at all to disk. +*/ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ - if( pWriter->bFirstRowidInPage ){ - /* No rowids on this page. Append an 0x00 byte to the current - ** doclist-index */ - if( pWriter->bDlidxPrevValid==0 ){ - i64 iRowid = pWriter->iPrevRowid; - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; - } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); - } + /* If there were no rowids on the leaf page either and the doclist-index + ** has already been started, append an 0x00 byte to it. */ + if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; + assert( pDlidx->bPrevValid ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); + } + + /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } + +static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ + i64 iRowid; + int iOff; + + iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid); + getVarint(&pBuf->p[iOff], (u64*)&iRowid); + return iRowid; +} /* -** Rowid iRowid has just been appended to the current leaf page. As it is -** the first on its page, append an entry to the current doclist-index. +** Rowid iRowid has just been appended to the current leaf page. It is the +** first on the page. This function appends an appropriate entry to the current +** doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ - i64 iVal; - if( pWriter->bDlidxPrevValid ){ - iVal = iRowid - pWriter->iDlidxPrev; - }else{ - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - iVal = 1; - } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; + int i; + int bDone = 0; + + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + i64 iVal; + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + + if( pDlidx->buf.n>=p->pConfig->pgsz ){ + /* The current doclist-index page is full. Write it to disk and push + ** a copy of iRowid (which will become the first rowid on the next + ** doclist-index leaf page) up into the next level of the b-tree + ** hierarchy. If the node being flushed is currently the root node, + ** also push its first rowid upwards. */ + pDlidx->buf.p[0] = 0x01; /* Not the root node */ + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + fts5WriteDlidxGrow(p, pWriter, i+2); + pDlidx = &pWriter->aDlidx[i]; + if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ + i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); + + /* This was the root node. Push its first rowid up to the new root. */ + pDlidx[1].pgno = pDlidx->pgno; + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); + pDlidx[1].bPrevValid = 1; + pDlidx[1].iPrev = iFirst; + } + + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + pDlidx->pgno++; + }else{ + bDone = 1; + } + + if( pDlidx->bPrevValid ){ + iVal = iRowid - pDlidx->iPrev; + }else{ + i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); + assert( pDlidx->buf.n==0 ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); + iVal = iRowid; + } + + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); + pDlidx->bPrevValid = 1; + pDlidx->iPrev = iRowid; + } } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->aWriter[0]; @@ -3031,10 +3271,13 @@ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; + + assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); + pWriter->aDlidx[0].pgno = pPage->pgno; /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } @@ -3169,11 +3412,15 @@ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5BufferFree(&pPg->term); fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); - sqlite3Fts5BufferFree(&pWriter->cdlidx); + + for(i=0; inDlidx; i++){ + sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); + } + sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, @@ -3180,13 +3427,15 @@ int iSegid ){ memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); - if( pWriter->aWriter==0 ) return; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); + if( pWriter->aDlidx==0 ) return; pWriter->nWriter = 1; + pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; pWriter->bFirstTermInPage = 1; } static void fts5WriteInitForAppend( @@ -3196,14 +3445,16 @@ ){ int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); if( p->rc==SQLITE_OK ){ int pgno = 1; int i; + pWriter->nDlidx = 1; pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); Fts5PageWriter *pPg = &pWriter->aWriter[i]; @@ -3581,22 +3832,22 @@ ** page size. */ assert( pgsz>0 ); pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); - /* Begin scanning through hash table entries. */ + /* Begin scanning through hash table entries. This loop runs once for each + ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } - while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ - const char *zTerm; - int nTerm; - const u8 *pDoclist; - int nDoclist; + const char *zTerm; /* Buffer containing term */ + int nTerm; /* Size of zTerm in bytes */ + const u8 *pDoclist; /* Pointer to doclist for this term */ + int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); @@ -3609,11 +3860,13 @@ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); if( p->rc ) break; } } - /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + /* Write the term to the leaf. And if it is the first on the leaf, and + ** the leaf is not page number 1, push it up into the b-tree hierarchy + ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ @@ -3627,10 +3880,16 @@ } nSuffix = nTerm; } pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); + + /* We just wrote a term into page writer.aWriter[0].pgno. If a + ** doclist-index is to be generated for this doclist, it will be + ** associated with this page. */ + assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); + writer.aDlidx[0].pgno = writer.aWriter[0].pgno; if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ @@ -3823,12 +4082,10 @@ ){ if( p->rc==SQLITE_OK ){ Fts5ChunkIter iter; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); - static int nCall = 0; - nCall++; fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ if( bSz ){ @@ -4414,11 +4671,11 @@ /* ** Return the current term. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ int n; - const char *z = fts5MultiIterTerm(pIter->pMulti, &n); + const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); *pn = n-1; return &z[1]; } @@ -4652,36 +4909,39 @@ Fts5Index *p, int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; - i64 cksum1 = 13; - i64 cksum2 = 13; + u64 cksum1 = 13; + u64 cksum2 = 13; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum1 = (cksum1 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + assert( pgno>iLeaf ); + cksum1 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterPrev(pDlidx) + fts5DlidxIterPrev(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum2 = (cksum2 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + + assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); + cksum2 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; - if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } #else # define fts5DlidxIterTestReverse(x,y,z) #endif @@ -4746,36 +5006,35 @@ int iPg; i64 iKey; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ - for(iPg=iPrevLeaf+1; iPgiLeafPgno; iPg++){ + for(iPg=iPrevLeaf+1; iPgp[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } - iPrevLeaf = pDlidx->iLeafPgno; + iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; + if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } - } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); @@ -4992,42 +5251,40 @@ ** the opposite of macro FTS5_SEGMENT_ROWID(). */ static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ int *piSegid, /* OUT: Segment id */ + int *pbDlidx, /* OUT: Dlidx flag */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ ){ *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); iRowid >>= FTS5_DATA_PAGE_B; *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); iRowid >>= FTS5_DATA_HEIGHT_B; + + *pbDlidx = (int)(iRowid & 0x0001); + iRowid >>= FTS5_DATA_DLI_B; *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ - int iSegid, iHeight, iPgno; /* Rowid compenents */ - fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno); + int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - "{structure idx=%d}", (int)(iKey-10) - ); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); } } - else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)", - iSegid, iPgno - ); - }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)", - iSegid, iHeight, iPgno + else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", + bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } static void fts5DebugStructure( @@ -5133,11 +5390,11 @@ sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args (always 2) */ sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iSegid,iHeight,iPgno; /* Rowid components */ + int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ const u8 *aBlob; int n; /* Record to decode */ u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ int nSpace = 0; @@ -5150,28 +5407,28 @@ nSpace = n + FTS5_DATA_ZERO_PADDING; a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); - fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno); + fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); - if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + if( bDlidx ){ Fts5Data dlidx; - Fts5DlidxIter iter; + Fts5DlidxLvl lvl; dlidx.p = a; dlidx.n = n; dlidx.nRef = 2; - memset(&iter, 0, sizeof(Fts5DlidxIter)); - iter.pData = &dlidx; - iter.iLeafPgno = iPgno; + memset(&lvl, 0, sizeof(Fts5DlidxLvl)); + lvl.pData = &dlidx; + lvl.iLeafPgno = iPgno; - for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){ + for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ sqlite3Fts5BufferAppendPrintf(&rc, &s, - " %d(%lld)", iter.iLeafPgno, iter.iRowid + " %d(%lld)", lvl.iLeafPgno, lvl.iRowid ); } }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ Index: ext/fts5/fts5_tcl.c ================================================================== --- ext/fts5/fts5_tcl.c +++ ext/fts5/fts5_tcl.c @@ -20,17 +20,11 @@ #include "fts5.h" #include #include -/* -** This variable is set to true when running corruption tests. Otherwise -** false. If it is false, extra assert() conditions in the fts5 code are -** activated - conditions that are only true if it is guaranteed that the -** fts5 database is not corrupt. -*/ -int sqlite3_fts5_may_be_corrupt = 0; +extern int sqlite3_fts5_may_be_corrupt; /************************************************************************* ** This is a copy of the first part of the SqliteDb structure in ** tclsqlite.c. We need it here so that the get_sqlite_pointer routine ** can extract the sqlite3* pointer from an existing Tcl SQLite Index: ext/fts5/test/fts5aa.test ================================================================== --- ext/fts5/test/fts5aa.test +++ ext/fts5/test/fts5aa.test @@ -47,11 +47,11 @@ do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} +} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } #------------------------------------------------------------------------- @@ -179,11 +179,10 @@ set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} -# if {$i==1} break } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #exit #------------------------------------------------------------------------- @@ -240,10 +239,11 @@ } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} if {[set_test_counter errors]} break } + #------------------------------------------------------------------------- # reset_db do_execsql_test 10.0 { Index: ext/fts5/test/fts5al.test ================================================================== --- ext/fts5/test/fts5al.test +++ ext/fts5/test/fts5al.test @@ -24,21 +24,21 @@ } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {version 1} +} {version 2} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32 version 1} +} {pgsz 32 version 2} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64 version 1} +} {pgsz 64 version 2} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. # foreach {tn defn} { Index: ext/fts5/test/fts5corrupt2.test ================================================================== --- ext/fts5/test/fts5corrupt2.test +++ ext/fts5/test/fts5corrupt2.test @@ -14,10 +14,11 @@ # "correctly" means without crashing. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt2 +sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". # expr srand(0) @@ -27,10 +28,11 @@ INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) INSERT INTO t1 SELECT rnddoc(10) FROM ii; } set mask [expr 31 << 31] + # Test 1: # # For each page in the t1_data table, open a transaction and DELETE # the t1_data entry. Then run: @@ -192,7 +194,8 @@ do_test 4.$tn.x { expr $nCorrupt>0 } 1 } +sqlite3_fts5_may_be_corrupt 0 finish_test Index: ext/fts5/test/fts5dlidx.test ================================================================== --- ext/fts5/test/fts5dlidx.test +++ ext/fts5/test/fts5dlidx.test @@ -59,10 +59,11 @@ append doc " y" } } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } + breakpoint execsql COMMIT do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} @@ -80,12 +81,12 @@ } do_dlidx_test1 1.1 10 100 10000 0 1000 do_dlidx_test1 1.2 10 10 10000 0 128 -do_dlidx_test1 1.3 10 10 100 0 36028797018963970 -do_dlidx_test1 1.3 10 10 50 0 150000000000000000 +do_dlidx_test1 1.3 10 10 66 0 36028797018963970 +do_dlidx_test1 1.4 10 10 50 0 150000000000000000 finish_test Index: ext/fts5/test/fts5integrity.test ================================================================== --- ext/fts5/test/fts5integrity.test +++ ext/fts5/test/fts5integrity.test @@ -28,8 +28,30 @@ INSERT INTO yy VALUES('term'); } do_execsql_test 2.1 { INSERT INTO yy(yy) VALUES('integrity-check'); } + +#-------------------------------------------------------------------- +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE zz USING fts5(z); + INSERT INTO zz(zz, rank) VALUES('pgsz', 32); + INSERT INTO zz VALUES('b b b b b b b b b b b b b b'); + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz(zz) VALUES('optimize'); +} + +do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } + + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} +#exit + finish_test Index: ext/fts5/test/fts5rowid.test ================================================================== --- ext/fts5/test/fts5rowid.test +++ ext/fts5/test/fts5rowid.test @@ -23,11 +23,11 @@ SELECT fts5_rowid('segment') } {1 {should be: fts5_rowid('segment', segid, height, pgno))}} do_execsql_test 1.3 { SELECT fts5_rowid('segment', 1, 1, 1) -} {70866960385} +} {139586437121} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); } {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} Index: ext/fts5/test/fts5version.test ================================================================== --- ext/fts5/test/fts5version.test +++ ext/fts5/test/fts5version.test @@ -22,38 +22,38 @@ INSERT INTO t1 VALUES('a b c d'); } {} do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' -} {version 1} +} {version 2} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { - UPDATE t1_config set v=2 WHERE k='version'; + UPDATE t1_config set v=3 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} breakpoint do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}} finish_test Index: ext/fts5/tool/loadfts5.tcl ================================================================== --- ext/fts5/tool/loadfts5.tcl +++ ext/fts5/tool/loadfts5.tcl @@ -107,10 +107,11 @@ db transaction { set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" + # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } } else { Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -239,10 +239,18 @@ } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] + +test_suite "fts5-light" -prefix "" -description { + All FTS5 tests. +} -files [ + test_set \ + [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ + -exclude *corrupt* *fault* *big* *fts5aj* +] test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. Index: tool/mksqlite3c.tcl ================================================================== --- tool/mksqlite3c.tcl +++ tool/mksqlite3c.tcl @@ -375,10 +375,11 @@ fts5_index.c fts5parse.c fts5_storage.c fts5_tokenize.c fts5_unicode2.c + fts5_vocab.c rtree.c icu.c fts3_icu.c } {