Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -27,11 +27,11 @@ typedef unsigned int u32; typedef unsigned short u16; typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; -#define ArraySize(x) (sizeof(x) / sizeof(x[0])) +#define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0]))) #define testcase(x) #define ALWAYS(x) 1 #define NEVER(x) 0 @@ -313,10 +313,16 @@ */ typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; +struct Fts5IndexIter { + i64 iRowid; + const u8 *pData; + int nData; +}; + /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ @@ -380,12 +386,10 @@ */ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); -int sqlite3Fts5IterPoslist(Fts5IndexIter*,Fts5Colset*, const u8**, int*, i64*); -int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter*); @@ -467,12 +471,10 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); -int sqlite3Fts5IterCollist(Fts5IndexIter*, const u8 **, int*); - /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** Index: ext/fts5/fts5_aux.c ================================================================== --- ext/fts5/fts5_aux.c +++ ext/fts5/fts5_aux.c @@ -542,11 +542,11 @@ { "bm25", 0, fts5Bm25Function, 0 }, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ - for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ + for(i=0; rc==SQLITE_OK && ixCreateFunction(pApi, aBuiltin[i].zFunc, aBuiltin[i].pUserData, aBuiltin[i].xFunc, aBuiltin[i].xDestroy Index: ext/fts5/fts5_buffer.c ================================================================== --- ext/fts5/fts5_buffer.c +++ ext/fts5/fts5_buffer.c @@ -320,11 +320,11 @@ ){ int rc = SQLITE_OK; *pbPresent = 0; if( p ){ int i; - int hash = 13; + u32 hash = 13; Fts5TermsetEntry *pEntry; /* Calculate a hash value for this term. This is the same hash checksum ** used by the fts5_hash.c module. This is not important for correct ** operation of the module, but is necessary to ensure that some tests @@ -337,11 +337,11 @@ for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ if( pEntry->iIdx==iIdx && pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 - ){ + ){ *pbPresent = 1; break; } } Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -304,11 +304,11 @@ static int fts5ExprSynonymList( Fts5ExprTerm *pTerm, int bCollist, Fts5Colset *pColset, i64 iRowid, - int *pbDel, /* OUT: Caller should sqlite3_free(*pa) */ + Fts5Buffer *pBuf, /* Use this buffer for space if required */ u8 **pa, int *pn ){ Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int nIter = 0; @@ -318,22 +318,11 @@ assert( pTerm->pSynonym ); for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ - const u8 *a; - int n; - - if( bCollist ){ - rc = sqlite3Fts5IterCollist(pIter, &a, &n); - }else{ - i64 dummy; - rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy); - } - - if( rc!=SQLITE_OK ) goto synonym_poslist_out; - if( n==0 ) continue; + if( pIter->nData==0 ) continue; if( nIter==nAlloc ){ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( aNew==0 ){ rc = SQLITE_NOMEM; @@ -342,24 +331,23 @@ memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); nAlloc = nAlloc*2; if( aIter!=aStatic ) sqlite3_free(aIter); aIter = aNew; } - sqlite3Fts5PoslistReaderInit(a, n, &aIter[nIter]); + sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); assert( aIter[nIter].bEof==0 ); nIter++; } } - assert( *pbDel==0 ); if( nIter==1 ){ *pa = (u8*)aIter[0].a; *pn = aIter[0].n; }else{ Fts5PoslistWriter writer = {0}; - Fts5Buffer buf = {0,0,0}; i64 iPrev = -1; + fts5BufferZero(pBuf); while( 1 ){ int i; i64 iMin = FTS5_LARGEST_INT64; for(i=0; ip; + *pn = pBuf->n; } } synonym_poslist_out: if( aIter!=aStatic ) sqlite3_free(aIter); @@ -415,32 +400,37 @@ fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ - if( pPhrase->nTerm>(int)ArraySize(aStatic) ){ + if( pPhrase->nTerm>ArraySize(aStatic) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - i64 dummy; int n = 0; int bFlag = 0; - const u8 *a = 0; + u8 *a = 0; if( pTerm->pSynonym ){ + Fts5Buffer buf = {0, 0, 0}; rc = fts5ExprSynonymList( - pTerm, 0, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n + pTerm, 0, pColset, pNode->iRowid, &buf, &a, &n ); + if( rc ){ + sqlite3_free(a); + goto ismatch_out; + } + if( a==buf.p ) bFlag = 1; }else{ - rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy); + a = (u8*)pTerm->pIter->pData; + n = pTerm->pIter->nData; } - if( rc!=SQLITE_OK ) goto ismatch_out; sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); aIter[i].bFlag = (u8)bFlag; if( aIter[i].bEof ) goto ismatch_out; } @@ -551,11 +541,11 @@ assert( pNear->nPhrase>1 ); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ - if( pNear->nPhrase>(int)ArraySize(aStatic) ){ + if( pNear->nPhrase>ArraySize(aStatic) ){ int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); }else{ memset(aStatic, 0, sizeof(aStatic)); } @@ -773,19 +763,24 @@ Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; pPhrase->poslist.n = 0; for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ Fts5IndexIter *pIter = pTerm->pIter; if( sqlite3Fts5IterEof(pIter)==0 ){ +#if 0 int n; i64 iRowid; rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid); if( rc!=SQLITE_OK ){ *pRc = rc; return 0; }else if( iRowid==pNode->iRowid && n>0 ){ pPhrase->poslist.n = 1; } +#endif + if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ + pPhrase->poslist.n = 1; + } } } return pPhrase->poslist.n; }else{ int i; @@ -798,13 +793,17 @@ if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); +#if 0 rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); +#endif } } *pRc = rc; if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ @@ -821,25 +820,24 @@ /* As this "NEAR" object is actually a single phrase that consists ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ - Fts5ExprNearset *pNear = pNode->pNear; - Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - Fts5Colset *pColset = pNear->pColset; - int rc; assert( pNode->eType==FTS5_TERM ); - assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); assert( pPhrase->aTerm[0].pSynonym==0 ); - rc = sqlite3Fts5IterPoslist(pIter, pColset, - (const u8**)&pPhrase->poslist.p, (int*)&pPhrase->poslist.n, &pNode->iRowid - ); + pPhrase->poslist.n = pIter->nData; + if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ + pPhrase->poslist.p = (u8*)pIter->pData; + } + pNode->iRowid = pIter->iRowid; pNode->bNomatch = (pPhrase->poslist.n==0); - return rc; + return SQLITE_OK; } /* ** All individual term iterators in pNear are guaranteed to be valid when ** this function is called. This function checks if all term iterators @@ -1368,14 +1366,14 @@ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); sqlite3Fts5IterClose(pTerm->pIter); - for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; sqlite3Fts5IterClose(pSyn->pIter); + fts5BufferFree((Fts5Buffer*)&pSyn[1]); sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); @@ -1459,17 +1457,17 @@ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; - int nByte = sizeof(Fts5ExprTerm) + nToken+1; + int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ){ rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, nByte); - pSyn->zTerm = (char*)&pSyn[1]; + pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); memcpy(pSyn->zTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } }else{ @@ -2244,11 +2242,11 @@ }; int i; int rc = SQLITE_OK; void *pCtx = (void*)pGlobal; - for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aFunc); i++){ + for(i=0; rc==SQLITE_OK && iz, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); } /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */ @@ -2482,34 +2480,29 @@ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; Fts5ExprNode *pNode = pPhrase->pNode; int rc = SQLITE_OK; assert( iPhrase>=0 && iPhrasenPhrase ); + assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); + if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid && pPhrase->poslist.n>0 ){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; if( pTerm->pSynonym ){ - int bDel = 0; - u8 *a; + Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; rc = fts5ExprSynonymList( - pTerm, 1, 0, pNode->iRowid, &bDel, &a, pnCollist + pTerm, 1, 0, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist ); - if( bDel ){ - sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, *pnCollist, a); - *ppCollist = pPhrase->poslist.p; - sqlite3_free(a); - }else{ - *ppCollist = a; - } - }else{ - sqlite3Fts5IterCollist(pPhrase->aTerm[0].pIter, ppCollist, pnCollist); + }else{ + *ppCollist = pPhrase->aTerm[0].pIter->pData; + *pnCollist = pPhrase->aTerm[0].pIter->nData; } }else{ *ppCollist = 0; *pnCollist = 0; } return rc; } Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -259,10 +259,11 @@ typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; +typedef struct Fts5Iter Fts5Iter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; @@ -501,14 +502,20 @@ ** ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ -struct Fts5IndexIter { +struct Fts5Iter { + Fts5IndexIter base; /* Base class containing output vars */ + Fts5Index *pIndex; /* Index that owns this iterator */ Fts5Structure *pStruct; /* Database structure for this iterator */ Fts5Buffer poslist; /* Buffer containing current poslist */ + Fts5Colset *pColset; /* Restrict matches to these columns */ + + /* Invoked to set output variables. */ + void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ u8 bSkipEmpty; /* True to skip deleted entries */ u8 bEof; /* True at EOF */ @@ -1750,11 +1757,11 @@ /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ -static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){ +static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* @@ -2404,11 +2411,11 @@ ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( - Fts5IndexIter *pIter, + Fts5Iter *pIter, Fts5SegIter *p1, Fts5SegIter *p2, Fts5CResult *pRes ){ int i1 = p1 - pIter->aSeg; @@ -2445,11 +2452,11 @@ ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ -static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){ +static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int i; assert( (pFirst->pLeaf==0)==pIter->bEof ); @@ -2490,11 +2497,11 @@ ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ -static int fts5MultiIterDoCompare(Fts5IndexIter *pIter, int iOut){ +static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ @@ -2636,11 +2643,11 @@ /* ** Free the iterator object passed as the second argument. */ -static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){ +static void fts5MultiIterFree(Fts5Index *p, Fts5Iter *pIter){ if( pIter ){ int i; for(i=0; inSeg; i++){ fts5SegIterClear(&pIter->aSeg[i]); } @@ -2650,11 +2657,11 @@ } } static void fts5MultiIterAdvanced( Fts5Index *p, /* FTS5 backend to iterate within */ - Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */ + Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ @@ -2678,12 +2685,13 @@ ** on the iterator instead. That function does the same as this one, except ** that it deals with more complicated cases as well. */ static int fts5MultiIterAdvanceRowid( Fts5Index *p, /* FTS5 backend to iterate within */ - Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */ - int iChanged /* Index of sub-iterator just advanced */ + Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged, /* Index of sub-iterator just advanced */ + Fts5SegIter **ppFirst ){ Fts5SegIter *pNew = &pIter->aSeg[iChanged]; if( pNew->iRowid==pIter->iSwitchRowid || (pNew->iRowidiSwitchRowid)==pIter->bRev @@ -2712,17 +2720,18 @@ pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; } } + *ppFirst = pNew; return 0; } /* ** Set the pIter->bEof variable based on the state of the sub-iterators. */ -static void fts5MultiIterSetEof(Fts5IndexIter *pIter){ +static void fts5MultiIterSetEof(Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; pIter->bEof = pSeg->pLeaf==0; pIter->iSwitchRowid = pSeg->iRowid; } @@ -2733,43 +2742,48 @@ ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ static void fts5MultiIterNext( Fts5Index *p, - Fts5IndexIter *pIter, + Fts5Iter *pIter, int bFrom, /* True if argument iFrom is valid */ i64 iFrom /* Advance at least as far as this */ ){ - if( p->rc==SQLITE_OK ){ - int bUseFrom = bFrom; - do { - int iFirst = pIter->aFirst[1].iFirst; - int bNewTerm = 0; - Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; - assert( p->rc==SQLITE_OK ); - if( bUseFrom && pSeg->pDlidx ){ - fts5SegIterNextFrom(p, pSeg, iFrom); - }else{ - pSeg->xNext(p, pSeg, &bNewTerm); - } - - if( pSeg->pLeaf==0 || bNewTerm - || fts5MultiIterAdvanceRowid(p, pIter, iFirst) - ){ - fts5MultiIterAdvanced(p, pIter, iFirst, 1); - fts5MultiIterSetEof(pIter); - } - fts5AssertMultiIterSetup(p, pIter); - - bUseFrom = 0; - }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); + int bUseFrom = bFrom; + while( p->rc==SQLITE_OK ){ + int iFirst = pIter->aFirst[1].iFirst; + int bNewTerm = 0; + Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + assert( p->rc==SQLITE_OK ); + if( bUseFrom && pSeg->pDlidx ){ + fts5SegIterNextFrom(p, pSeg, iFrom); + }else{ + pSeg->xNext(p, pSeg, &bNewTerm); + } + + if( pSeg->pLeaf==0 || bNewTerm + || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg) + ){ + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + fts5MultiIterSetEof(pIter); + pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + if( pSeg->pLeaf==0 ) return; + } + + fts5AssertMultiIterSetup(p, pIter); + assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); + if( pIter->bSkipEmpty==0 || pSeg->nPos ){ + pIter->xSetOutputs(pIter, pSeg); + return; + } + bUseFrom = 0; } } static void fts5MultiIterNext2( Fts5Index *p, - Fts5IndexIter *pIter, + Fts5Iter *pIter, int *pbNewTerm /* OUT: True if *might* be new term */ ){ assert( pIter->bSkipEmpty ); if( p->rc==SQLITE_OK ){ do { @@ -2778,11 +2792,11 @@ int bNewTerm = 0; assert( p->rc==SQLITE_OK ); pSeg->xNext(p, pSeg, &bNewTerm); if( pSeg->pLeaf==0 || bNewTerm - || fts5MultiIterAdvanceRowid(p, pIter, iFirst) + || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); fts5MultiIterSetEof(pIter); *pbNewTerm = 1; }else{ @@ -2792,34 +2806,37 @@ }while( fts5MultiIterIsEmpty(p, pIter) ); } } +static void fts5IterSetOutputs_Noop(Fts5Iter *pIter, Fts5SegIter *pSeg){ +} -static Fts5IndexIter *fts5MultiIterAlloc( +static Fts5Iter *fts5MultiIterAlloc( Fts5Index *p, /* FTS5 backend to iterate within */ int nSeg ){ - Fts5IndexIter *pNew; + Fts5Iter *pNew; int nSlot; /* Power of two >= nSeg */ for(nSlot=2; nSlotaSeg[] */ sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew ){ pNew->nSeg = nSlot; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->pIndex = p; + pNew->xSetOutputs = fts5IterSetOutputs_Noop; } return pNew; } /* -** Allocate a new Fts5IndexIter object. +** Allocate a new Fts5Iter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. @@ -2833,18 +2850,18 @@ int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ - Fts5IndexIter **ppOut /* New object */ + Fts5Iter **ppOut /* New object */ ){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5Buffer buf = {0,0,0}; /* Buffer used by fts5SegIterSeekInit() */ Fts5StructureLevel *pLvl; - Fts5IndexIter *pNew; + Fts5Iter *pNew; assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ if( p->rc==SQLITE_OK ){ @@ -2915,20 +2932,20 @@ } fts5BufferFree(&buf); } /* -** Create an Fts5IndexIter that iterates through the doclist provided +** Create an Fts5Iter that iterates through the doclist provided ** as the second argument. */ static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ int bDesc, /* True for descending rowid order */ - Fts5IndexIter **ppOut /* New object */ + Fts5Iter **ppOut /* New object */ ){ - Fts5IndexIter *pNew; + Fts5Iter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pNew->bFiltered = 1; @@ -2959,11 +2976,11 @@ /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ -static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){ +static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ assert( p->rc || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof ); return (p->rc || pIter->bEof); } @@ -2971,21 +2988,21 @@ /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ -static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){ +static i64 fts5MultiIterRowid(Fts5Iter *pIter){ assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ static void fts5MultiIterNextFrom( Fts5Index *p, - Fts5IndexIter *pIter, + Fts5Iter *pIter, i64 iMatch ){ while( 1 ){ i64 iRowid; fts5MultiIterNext(p, pIter, 1, iMatch); @@ -2998,11 +3015,11 @@ /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ -static const u8 *fts5MultiIterTerm(Fts5IndexIter *pIter, int *pn){ +static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } @@ -3580,11 +3597,11 @@ /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ -static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ +static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ int i; Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; inSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; @@ -3658,11 +3675,11 @@ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; - Fts5IndexIter *pIter = 0; /* Iterator to read input data */ + Fts5Iter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; @@ -4340,11 +4357,11 @@ } static int fts5AppendRowid( Fts5Index *p, i64 iDelta, - Fts5IndexIter *pMulti, + Fts5Iter *pMulti, Fts5Colset *pColset, Fts5Buffer *pBuf ){ fts5BufferAppendVarint(&p->rc, pBuf, iDelta); return 0; @@ -4365,11 +4382,11 @@ ** If an error occurs, an error code is left in p->rc. */ static int fts5AppendPoslist( Fts5Index *p, i64 iDelta, - Fts5IndexIter *pMulti, + Fts5Iter *pMulti, Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; @@ -4643,18 +4660,18 @@ Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ - Fts5IndexIter **ppIter /* OUT: New iterator */ + Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); - int (*xAppend)(Fts5Index*, i64, Fts5IndexIter*, Fts5Colset*, Fts5Buffer*); + int (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Colset*, Fts5Buffer*); if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ xMerge = fts5MergeRowidLists; xAppend = fts5AppendRowid; }else{ xMerge = fts5MergePrefixLists; @@ -4666,11 +4683,11 @@ if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN; int i; i64 iLastRowid = 0; - Fts5IndexIter *p1 = 0; /* Iterator used to gather data from index */ + Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; Fts5Buffer doclist; int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); @@ -4930,10 +4947,183 @@ } return rc; } + +static int fts5IndexExtractColset ( + Fts5Colset *pColset, /* Colset to filter on */ + const u8 *pPos, int nPos, /* Position list */ + Fts5Buffer *pBuf /* Output buffer */ +){ + int rc = SQLITE_OK; + int i; + + fts5BufferZero(pBuf); + for(i=0; inCol; i++){ + const u8 *pSub = pPos; + int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); + if( nSub ){ + fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); + } + } + return rc; +} + +/* +** xSetOutputs callback used by detail=none tables. +*/ +static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ + assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); + pIter->base.iRowid = pSeg->iRowid; + pIter->base.nData = pSeg->nPos; +} + +/* +** xSetOutputs callback used by detail=full and detail=col tables when no +** column filters are specified. +*/ +static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ + pIter->base.iRowid = pSeg->iRowid; + pIter->base.nData = pSeg->nPos; + + assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); + assert( pIter->pColset==0 || pIter->bFiltered ); + + if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ + /* All data is stored on the current page. Populate the output + ** variables to point into the body of the page object. */ + pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; + }else{ + /* The data is distributed over two or more pages. Copy it into the + ** Fts5Iter.poslist buffer and then set the output pointer to point + ** to this buffer. */ + fts5BufferZero(&pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); + pIter->base.pData = pIter->poslist.p; + } +} + +/* +** xSetOutputs callback used by detail=col when there is a column filter +** and there are 100 or more columns. Also called as a fallback from +** fts5IterSetOutputs_Col100 if the column-list spans more than one page. +*/ +static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ + fts5BufferZero(&pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); + pIter->base.iRowid = pSeg->iRowid; + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; +} + +/* +** xSetOutputs callback used when: +** +** * detail=col, +** * there is a column filter, and +** * the table contains 100 or fewer columns. +** +** The last point is to ensure all column numbers are stored as +** single-byte varints. +*/ +static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ + + assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); + assert( pIter->pColset ); + + if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ + fts5IterSetOutputs_Col(pIter, pSeg); + }else{ + u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; + u8 *pEnd = (u8*)&a[pSeg->nPos]; + int iPrev = 0; + int *aiCol = pIter->pColset->aiCol; + int *aiColEnd = &aiCol[pIter->pColset->nCol]; + + u8 *aOut = pIter->poslist.p; + int iPrevOut = 0; + + pIter->base.iRowid = pSeg->iRowid; + + while( abase.pData = pIter->poslist.p; + pIter->base.nData = aOut - pIter->poslist.p; + } +} + +/* +** xSetOutputs callback used by detail=full when there is a column filter. +*/ +static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ + Fts5Colset *pColset = pIter->pColset; + pIter->base.iRowid = pSeg->iRowid; + + assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); + assert( pColset ); + + if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ + /* All data is stored on the current page. Populate the output + ** variables to point into the body of the page object. */ + const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; + if( pColset->nCol==1 ){ + pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); + pIter->base.pData = a; + }else{ + fts5BufferZero(&pIter->poslist); + fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist); + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; + } + }else{ + /* The data is distributed over two or more pages. Copy it into the + ** Fts5Iter.poslist buffer and then set the output pointer to point + ** to this buffer. */ + fts5BufferZero(&pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; + } +} + +static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ + Fts5Config *pConfig = pIter->pIndex->pConfig; + if( pConfig->eDetail==FTS5_DETAIL_NONE ){ + pIter->xSetOutputs = fts5IterSetOutputs_None; + } + + else if( pIter->pColset==0 || pIter->bFiltered ){ + pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; + } + + else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ + pIter->xSetOutputs = fts5IterSetOutputs_Full; + } + + else{ + assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); + if( pConfig->nCol<=100 ){ + pIter->xSetOutputs = fts5IterSetOutputs_Col100; + sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); + }else{ + pIter->xSetOutputs = fts5IterSetOutputs_Col; + } + } +} + /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( @@ -4942,26 +5132,31 @@ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5Colset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; - Fts5IndexIter *pRet = 0; - int iIdx = 0; + Fts5Iter *pRet = 0; Fts5Buffer buf = {0, 0, 0}; /* If the QUERY_SCAN flag is set, all other flags must be clear. */ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ + int iIdx = 0; /* Index to search */ memcpy(&buf.p[1], pToken, nToken); -#ifdef SQLITE_DEBUG - /* If the QUERY_TEST_NOIDX flag was specified, then this must be a + /* Figure out which index to search and set iIdx accordingly. If this + ** is a prefix query for which there is no prefix index, set iIdx to + ** greater than pConfig->nPrefix to indicate that the query will be + ** satisfied by scanning multiple terms in the main index. + ** + ** If the QUERY_TEST_NOIDX flag was specified, then this must be a ** prefix-query. Instead of using a prefix-index (if one exists), ** evaluate the prefix query using the main FTS index. This is used ** for internal sanity checking by the integrity-check in debug ** mode only. */ +#ifdef SQLITE_DEBUG if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif @@ -4971,54 +5166,65 @@ if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } if( iIdx<=pConfig->nPrefix ){ + /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); if( pStruct ){ fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet); fts5StructureRelease(pStruct); } }else{ + /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); } + if( p->rc==SQLITE_OK ){ + Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; + pRet->pColset = pColset; + fts5IterSetOutputCb(&p->rc, pRet); + if( p->rc==SQLITE_OK && pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); + } if( p->rc ){ - sqlite3Fts5IterClose(pRet); + sqlite3Fts5IterClose(&pRet->base); pRet = 0; fts5CloseReader(p); } - *ppIter = pRet; + + *ppIter = &pRet->base; sqlite3Fts5BufferFree(&buf); } return fts5IndexReturn(p); } /* ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ - assert( pIter->pIndex->rc==SQLITE_OK ); - return pIter->bEof; + assert( ((Fts5Iter*)pIter)->pIndex->rc==SQLITE_OK ); + return ((Fts5Iter*)pIter)->bEof; } /* ** Move to the next matching rowid. */ -int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ +int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching term/rowid. Used by the fts5vocab module. */ -int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ +int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *p = pIter->pIndex; assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(p, pIter, 0, 0); @@ -5037,133 +5243,39 @@ /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ -int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ +int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); return fts5IndexReturn(pIter->pIndex); } /* ** Return the current rowid. */ -i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ - return fts5MultiIterRowid(pIter); +i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIndexIter){ + return fts5MultiIterRowid((Fts5Iter*)pIndexIter); } /* ** Return the current term. */ -const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ +const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ int n; - const char *z = (const char*)fts5MultiIterTerm(pIter, &n); + const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); *pn = n-1; return &z[1]; } - -static int fts5IndexExtractColset ( - Fts5Colset *pColset, /* Colset to filter on */ - const u8 *pPos, int nPos, /* Position list */ - Fts5Buffer *pBuf /* Output buffer */ -){ - int rc = SQLITE_OK; - int i; - - fts5BufferZero(pBuf); - for(i=0; inCol; i++){ - const u8 *pSub = pPos; - int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); - if( nSub ){ - fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); - } - } - return rc; -} - - -/* -** Return a pointer to a buffer containing a copy of the position list for -** the current entry. Output variable *pn is set to the size of the buffer -** in bytes before returning. -** -** The returned position list does not include the "number of bytes" varint -** field that starts the position list on disk. -*/ -int sqlite3Fts5IterPoslist( - Fts5IndexIter *pIter, - Fts5Colset *pColset, /* Column filter (or NULL) */ - const u8 **pp, /* OUT: Pointer to position-list data */ - int *pn, /* OUT: Size of position-list in bytes */ - i64 *piRowid /* OUT: Current rowid */ -){ - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; - int eDetail = pIter->pIndex->pConfig->eDetail; - - assert( pIter->pIndex->rc==SQLITE_OK ); - *piRowid = pSeg->iRowid; - if( eDetail==FTS5_DETAIL_NONE ){ - *pn = pSeg->nPos; - }else - if( eDetail==FTS5_DETAIL_FULL - && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf - ){ - u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset]; - if( pColset==0 || pIter->bFiltered ){ - *pn = pSeg->nPos; - *pp = pPos; - }else if( pColset->nCol==1 ){ - *pp = pPos; - *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]); - }else{ - fts5BufferZero(&pIter->poslist); - fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist); - *pp = pIter->poslist.p; - *pn = pIter->poslist.n; - } - }else{ - fts5BufferZero(&pIter->poslist); - fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); - if( eDetail==FTS5_DETAIL_FULL ){ - *pp = pIter->poslist.p; - } - *pn = pIter->poslist.n; - } - return fts5IndexReturn(pIter->pIndex); -} - -int sqlite3Fts5IterCollist( - Fts5IndexIter *pIter, - const u8 **pp, /* OUT: Pointer to position-list data */ - int *pn /* OUT: Size of position-list in bytes */ -){ - assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); - *pp = pIter->poslist.p; - *pn = pIter->poslist.n; - return SQLITE_OK; -} - -/* -** This function is similar to sqlite3Fts5IterPoslist(), except that it -** copies the position list into the buffer supplied as the second -** argument. -*/ -int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ - Fts5Index *p = pIter->pIndex; - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; - assert( p->rc==SQLITE_OK ); - fts5BufferZero(pBuf); - fts5SegiterPoslist(p, pSeg, 0, pBuf); - return fts5IndexReturn(p); -} - /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ -void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ - if( pIter ){ +void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ + if( pIndexIter ){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; fts5MultiIterFree(pIter->pIndex, pIter); fts5CloseReader(pIndex); } } @@ -5326,39 +5438,34 @@ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; - Fts5IndexIter *pIdxIter = 0; - Fts5Buffer buf = {0, 0, 0}; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter); + Fts5IndexIter *pIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); - while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ + i64 rowid = sqlite3Fts5IterRowid(pIter); if( eDetail==FTS5_DETAIL_NONE ){ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); }else{ - rc = sqlite3Fts5IterPoslistBuffer(pIdxIter, &buf); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(buf.p, buf.n, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); - } + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5IterNext(pIdxIter); + rc = sqlite3Fts5IterNext(pIter); } } - sqlite3Fts5IterClose(pIdxIter); - fts5BufferFree(&buf); + sqlite3Fts5IterClose(pIter); *pCksum = cksum; return rc; } @@ -5659,11 +5766,11 @@ */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ int eDetail = p->pConfig->eDetail; u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ - Fts5IndexIter *pIter; /* Used to iterate through entire index */ + Fts5Iter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ #ifdef SQLITE_DEBUG /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Index: ext/fts5/fts5_main.c ================================================================== --- ext/fts5/fts5_main.c +++ ext/fts5/fts5_main.c @@ -536,11 +536,11 @@ /* Set idxFlags flags for all WHERE clause terms that will be used. */ for(i=0; inConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; int j; - for(j=0; j<(int)ArraySize(aConstraint); j++){ + for(j=0; jiColumn==aColMap[pC->iCol] && p->op & pC->op ){ if( p->usable ){ pC->iConsIndex = i; idxFlags |= pC->fts5op; @@ -583,11 +583,11 @@ pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; } /* Assign argvIndex values to each constraint in use. */ iNext = 1; - for(i=0; i<(int)ArraySize(aConstraint); i++){ + for(i=0; iiConsIndex>=0 ){ pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++; pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit; } Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -336,11 +336,11 @@ int rc = SQLITE_OK; if( p ){ int i; /* Finalize all SQL statements */ - for(i=0; i<(int)ArraySize(p->aStmt); i++){ + for(i=0; iaStmt); i++){ sqlite3_finalize(p->aStmt[i]); } sqlite3_free(p); } Index: ext/fts5/fts5_tokenize.c ================================================================== --- ext/fts5/fts5_tokenize.c +++ ext/fts5/fts5_tokenize.c @@ -1218,11 +1218,11 @@ }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ - for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ + for(i=0; rc==SQLITE_OK && ixCreateTokenizer(pApi, aBuiltin[i].zName, (void*)pApi, &aBuiltin[i].x, 0 Index: ext/fts5/fts5_vocab.c ================================================================== --- ext/fts5/fts5_vocab.c +++ ext/fts5/fts5_vocab.c @@ -182,11 +182,11 @@ int nTab = (int)strlen(zTab)+1; int eType = 0; rc = fts5VocabTableType(zType, pzErr, &eType); if( rc==SQLITE_OK ){ - assert( eType>=0 && eType=0 && eTypeaDoc, 0, nCol * sizeof(i64)); pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ - i64 dummy; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ + pPos = pCsr->pIter->pData; + nPos = pCsr->pIter->nData; switch( pCsr->pConfig->eDetail ){ case FTS5_DETAIL_FULL: - rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy); - if( rc==SQLITE_OK ){ - if( pTab->eType==FTS5_VOCAB_ROW ){ - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - pCsr->aCnt[0]++; - } - pCsr->aDoc[0]++; - }else{ - int iCol = -1; - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - int ii = FTS5_POS2COLUMN(iPos); - pCsr->aCnt[ii]++; - if( iCol!=ii ){ - if( ii>=nCol ){ - rc = FTS5_CORRUPT; - break; - } - pCsr->aDoc[ii]++; - iCol = ii; - } + pPos = pCsr->pIter->pData; + nPos = pCsr->pIter->nData; + if( pTab->eType==FTS5_VOCAB_ROW ){ + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + pCsr->aCnt[0]++; + } + pCsr->aDoc[0]++; + }else{ + int iCol = -1; + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + int ii = FTS5_POS2COLUMN(iPos); + pCsr->aCnt[ii]++; + if( iCol!=ii ){ + if( ii>=nCol ){ + rc = FTS5_CORRUPT; + break; + } + pCsr->aDoc[ii]++; + iCol = ii; } } } break; case FTS5_DETAIL_COLUMNS: if( pTab->eType==FTS5_VOCAB_ROW ){ pCsr->aDoc[0]++; }else{ - Fts5Buffer buf = {0, 0, 0}; - rc = sqlite3Fts5IterPoslistBuffer(pCsr->pIter, &buf); - if( rc==SQLITE_OK ){ - while( 0==sqlite3Fts5PoslistNext64(buf.p, buf.n, &iOff,&iPos) ){ - assert_nc( iPos>=0 && iPos=nCol ){ - rc = FTS5_CORRUPT; - break; - } - pCsr->aDoc[iPos]++; - } - } - sqlite3Fts5BufferFree(&buf); + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ + assert_nc( iPos>=0 && iPos=nCol ){ + rc = FTS5_CORRUPT; + break; + } + pCsr->aDoc[iPos]++; + } } break; default: assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -46,11 +46,12 @@ $cmd xPhraseForeach $i c o { lappend res $i.$c.$o } } - set res + #set res + sort_poslist $res } proc fts5_test_collist {cmd} { set res [list] Index: ext/fts5/test/fts5ac.test ================================================================== --- ext/fts5/test/fts5ac.test +++ ext/fts5/test/fts5ac.test @@ -156,12 +156,12 @@ } #------------------------------------------------------------------------- # foreach {tn expr} { - 1.2 "a OR b" 1.1 "a AND b" + 1.2 "a OR b" 1.3 "o" 1.4 "b q" 1.5 "e a e" 1.6 "m d g q q b k b w f q q p p" 1.7 "l o o l v v k" @@ -247,11 +247,10 @@ set res [fts5_query_data $expr xx] do_execsql_test 1.$tn2.$tn.[llength $res].asc { SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) FROM xx WHERE xx match $expr } $res - set res [fts5_query_data $expr xx DESC] do_execsql_test 1.$tn2.$tn.[llength $res].desc { SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) FROM xx WHERE xx match $expr ORDER BY 1 DESC ADDED ext/fts5/test/fts5simple3.test Index: ext/fts5/test/fts5simple3.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5simple3.test @@ -0,0 +1,44 @@ +# 2015 September 05 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5simple3 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +fts5_aux_test_functions db + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, detail=col); + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('x', 'x', 'x'); +} + +do_execsql_test 1.1 { + SELECT rowid, fts5_test_collist(t1) FROM t1('a:a'); +} {1 0.0} + +do_execsql_test 1.2 { + SELECT rowid, fts5_test_collist(t1) FROM t1('b:x'); +} {2 0.1} + +do_execsql_test 1.3 { + SELECT rowid, fts5_test_collist(t1) FROM t1('b:a'); +} {} + + +finish_test + Index: ext/fts5/test/fts5synonym2.test ================================================================== --- ext/fts5/test/fts5synonym2.test +++ ext/fts5/test/fts5synonym2.test @@ -25,10 +25,25 @@ foreach_detail_mode $testprefix { fts5_tclnum_register db fts5_aux_test_functions db +proc fts5_test_bothlist {cmd} { + + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { + set bFirst 1 + $cmd xPhraseColumnForeach $i c { + lappend CL $i.$c + if {$bFirst} { $cmd xPhraseForeach $i c o { lappend PL $i.$c.$o } } + set bFirst 0 + } + } + + list [sort_poslist $PL] $CL +} +sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist + proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] } sqlite3_fts5_create_function db fts5_rowid fts5_rowid do_execsql_test 1.$tok.0.1 " CREATE VIRTUAL TABLE ss USING fts5(a, b, @@ -87,10 +102,12 @@ INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); } foreach {tn expr} { + 2.1 "one OR two OR three OR four" + 1.1 "one" 1.2 "two" 1.3 "three" 1.4 "four" 1.5 "v" 1.6 "vi" 1.7 "vii" 1.8 "viii" 1.9 "9" 1.10 "0" 1.11 "1" 1.12 "2" 2.1 "one OR two OR three OR four" @@ -111,19 +128,37 @@ continue } set res [fts5_query_data $expr ss ASC ::tclnum_syn] do_execsql_test 1.$tok.$tn.[llength $res].asc.1 { - SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) + SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr) } $res do_execsql_test 1.$tok.$tn.[llength $res].asc.2 { SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) + } $res + + do_execsql_test 1.$tok.$tn.[llength $res].asc.2 { + SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr) ORDER BY rank ASC } $res + + set res2 [list] + foreach {a b c} $res { lappend res2 $a $c $b } + do_execsql_test 1.$tok.$tn.[llength $res].asc.3 { + SELECT rowid, fts5_test_collist(ss), fts5_test_poslist2(ss) FROM ss($expr) + } $res2 + + set res3 [list] + foreach {a b c} $res { lappend res3 $a [list $b $c] } + do_execsql_test 1.$tok.$tn.[llength $res].asc.3 { + SELECT rowid, fts5_test_bothlist(ss) FROM ss($expr) + } $res3 + + } } } finish_test Index: ext/fts5/tool/fts5speed.tcl ================================================================== --- ext/fts5/tool/fts5speed.tcl +++ ext/fts5/tool/fts5speed.tcl @@ -9,10 +9,12 @@ {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} + + {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"} } proc usage {} { global Q puts stderr "Usage: $::argv0 DATABASE QUERY" Index: ext/fts5/tool/fts5txt2db.tcl ================================================================== --- ext/fts5/tool/fts5txt2db.tcl +++ ext/fts5/tool/fts5txt2db.tcl @@ -1,15 +1,129 @@ -proc usage {} { - puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..." +#------------------------------------------------------------------------- +# Command line options processor. +# +proc command_line_error {O E {msg ""}} { + if {$msg != ""} { + puts stderr "Error: $msg" + puts stderr "" + } + + set L [list] + foreach o $O { + if {[llength $o]==1} { + lappend L [string toupper $o] + } + } + + puts stderr "Usage: $::argv0 ?SWITCHES? $L" + puts stderr "" + puts stderr "Switches are:" + foreach o $O { + if {[llength $o]==3} { + foreach {a b c} $o {} + puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] + } elseif {[llength $o]==2} { + foreach {a b} $o {} + puts stderr [format " -%-15s %s" $a $b] + } + } puts stderr "" - puts stderr "Options are" - puts stderr " -fts5" - puts stderr " -fts4" - puts stderr " -colsize " - puts stderr { + puts stderr $E + exit -1 +} + +proc process_command_line {avar lArgs O E} { + + upvar $avar A + set zTrailing "" ;# True if ... is present in $O + set lPosargs [list] + + # Populate A() with default values. Also, for each switch in the command + # line spec, set an entry in the idx() array as follows: + # + # {tblname t1 "table name to use"} + # -> [set idx(-tblname) {tblname t1 "table name to use"} + # + # For each position parameter, append its name to $lPosargs. If the ... + # specifier is present, set $zTrailing to the name of the prefix. + # + foreach o $O { + set nm [lindex $o 0] + set nArg [llength $o] + switch -- $nArg { + 1 { + if {[string range $nm end-2 end]=="..."} { + set zTrailing [string range $nm 0 end-3] + } else { + lappend lPosargs $nm + } + } + 2 { + set A($nm) 0 + set idx(-$nm) $o + } + 3 { + set A($nm) [lindex $o 1] + set idx(-$nm) $o + } + default { + error "Error in command line specification" + } + } + } + + # Set explicitly specified option values + # + set nArg [llength $lArgs] + for {set i 0} {$i < $nArg} {incr i} { + set opt [lindex $lArgs $i] + if {[string range $opt 0 0]!="-" || $opt=="--"} break + set c [array names idx "${opt}*"] + if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"} + if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"} + + if {[llength $idx($c)]==3} { + if {$i==[llength $lArgs]-1} { + command_line_error $O $E "Option requires argument: $c" + } + incr i + set A([lindex $idx($c) 0]) [lindex $lArgs $i] + } else { + set A([lindex $idx($c) 0]) 1 + } + } + + # Deal with position arguments. + # + set nPosarg [llength $lPosargs] + set nRem [expr $nArg - $i] + if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { + command_line_error $O $E + } + for {set j 0} {$j < $nPosarg} {incr j} { + set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] + } + if {$zTrailing!=""} { + set A($zTrailing) [lrange $lArgs [expr $j+$i] end] + } +} +# End of command line options processor. +#------------------------------------------------------------------------- + + +process_command_line A $argv { + {fts5 "use fts5"} + {fts4 "use fts4"} + {colsize "10 10 10" "list of column sizes"} + {tblname "t1" "table name to create"} + {detail "full" "Fts5 detail mode to use"} + {repeat 1 "Load each file this many times"} + database + file... +} { This script is designed to create fts4/5 tables with more than one column. The -colsize option should be set to a Tcl list of integer values, one for each column in the table. Each value is the number of tokens that will be inserted into the column value for each row. For example, setting the -colsize option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 @@ -20,63 +134,31 @@ are used for the first column of the first row, where N1 is the first element of the -colsize list. The next N2 are used for the second column of the first row, and so on. Rows are added to the table until the entire list of tokens is exhausted. } - exit -1 -} - -set O(aColSize) [list 10 10 10] -set O(tblname) t1 -set O(fts) fts5 - - -set options_with_values {-colsize} - -for {set i 0} {$i < [llength $argv]} {incr i} { - set opt [lindex $argv $i] - if {[string range $opt 0 0]!="-"} break - - if {[lsearch $options_with_values $opt]>=0} { - incr i - if {$i==[llength $argv]} usage - set val [lindex $argv $i] - } - - switch -- $opt { - -colsize { - set O(aColSize) $val - } - - -fts4 { - set O(fts) fts4 - } - - -fts5 { - set O(fts) fts5 - } - } -} - -if {$i > [llength $argv]-2} usage -set O(db) [lindex $argv $i] -set O(files) [lrange $argv [expr $i+1] end] - -sqlite3 db $O(db) + +if {$A(fts4)} { + set A(fts) fts4 +} else { + set A(fts) fts5 +} + +sqlite3 db $A(database) # Create the FTS table in the db. Return a list of the table columns. # proc create_table {} { - global O + global A set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] - set nCol [llength $O(aColSize)] + set nCol [llength $A(colsize)] set cols [lrange $cols 0 [expr $nCol-1]] - set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) (" + set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" append sql [join $cols ,] - append sql ");" + if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" } db eval $sql return $cols } @@ -87,34 +169,42 @@ set data [read $fd] close $fd split $data } +proc repeat {L n} { + set res [list] + for {set i 0} {$i < $n} {incr i} { + set res [concat $res $L] + } + set res +} + # Load all the data into a big list of tokens. # set tokens [list] -foreach f $O(files) { - set tokens [concat $tokens [readfile $f]] +foreach f $A(file) { + set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] } set N [llength $tokens] set i 0 set cols [create_table] -set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]" +set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" foreach c [lrange $cols 1 end] { - append sql ", \$A($c)" + append sql ", \$R($c)" } append sql ")" db eval BEGIN while {$i < $N} { - foreach c $cols s $O(aColSize) { - set A($c) [lrange $tokens $i [expr $i+$s-1]] + foreach c $cols s $A(colsize) { + set R($c) [lrange $tokens $i [expr $i+$s-1]] incr i $s } db eval $sql } db eval COMMIT