Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -1455,27 +1455,31 @@ */ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts3Table *p = (Fts3Table *)pVTab; int i; /* Iterator variable */ int iCons = -1; /* Index of constraint to use */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient ** strategy is possible. */ pInfo->idxNum = FTS3_FULLSCAN_SEARCH; pInfo->estimatedCost = 5000000; for(i=0; inConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; if( pCons->usable==0 ) continue; + + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( iCons<0 - && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) - ){ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ pInfo->idxNum = FTS3_DOCID_SEARCH; pInfo->estimatedCost = 1.0; iCons = i; } @@ -1500,18 +1504,42 @@ if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && pCons->iColumn==p->nColumn + 2 ){ iLangidCons = i; } + + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; + + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } + } } + iIdx = 1; if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = 1; + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; pInfo->aConstraintUsage[iCons].omit = 1; } if( iLangidCons>=0 ){ - pInfo->aConstraintUsage[iLangidCons].argvIndex = 2; + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or ** docid) order. Both ascending and descending are possible. */ @@ -2953,10 +2981,37 @@ rc = fts3EvalNext((Fts3Cursor *)pCursor); } assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } + +/* +** The following are copied from sqliteInt.h. +** +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif + +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } + } + return iDefault; +} /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional ** information. @@ -2979,44 +3034,62 @@ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ int rc; char *zSql; /* SQL statement used to access %_content */ + int eSearch;; Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); - assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( nVal==0 || nVal==1 || nVal==2 ); - assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); + eSearch = (idxNum & 0x0000FFFF); + assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( p->pSegments==0 ); + + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + if( idxStr ){ pCsr->bDesc = (idxStr[0]=='D'); }else{ pCsr->bDesc = p->bDescIdx; } - pCsr->eSearch = (i16)idxNum; + pCsr->eSearch = (i16)eSearch; - if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ - int iCol = idxNum-FTS3_FULLTEXT_SEARCH; - const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); - if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ return SQLITE_NOMEM; } pCsr->iLangid = 0; - if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); assert( p->base.zErrMsg==0 ); rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, &p->base.zErrMsg @@ -3035,11 +3108,11 @@ /* Compile a SELECT statement for this cursor. For a full-table-scan, the ** statement loops through all rows of the %_content table. For a ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - if( idxNum==FTS3_FULLSCAN_SEARCH ){ + if( eSearch==FTS3_FULLSCAN_SEARCH ){ zSql = sqlite3_mprintf( "SELECT %s ORDER BY rowid %s", p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") ); if( zSql ){ @@ -3046,14 +3119,14 @@ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); sqlite3_free(zSql); }else{ rc = SQLITE_NOMEM; } - }else if( idxNum==FTS3_DOCID_SEARCH ){ + }else if( eSearch==FTS3_DOCID_SEARCH ){ rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); if( rc==SQLITE_OK ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); } } if( rc!=SQLITE_OK ) return rc; return fts3NextMethod(pCursor); @@ -3940,10 +4013,16 @@ } return SQLITE_OK; } +/* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. @@ -3953,27 +4032,47 @@ ** memory within this call. ** ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - int rc; /* Error code */ - Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - - if( pCsr->bDesc==pTab->bDescIdx - && bOptOk==1 - && p->nToken==1 - && pFirst->pSegcsr - && pFirst->pSegcsr->bLookup - && pFirst->bFirst==0 - ){ + int rc = SQLITE_OK; /* Error code */ + int i; + + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } + + if( bIncrOk && bHaveIncr ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); - rc = sqlite3Fts3MsrIncrStart( - pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); + for(i=0; rc==SQLITE_OK && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } p->bIncr = 1; - }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); p->bIncr = 0; } @@ -4077,10 +4176,220 @@ } } *ppIter = p; } + +/* +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof +){ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIterpNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; + + if( pPhrase->iDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; + } + } + + return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int fts3EvalIncrPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; + + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); + + if( p->nToken==1 && p->bIncr ){ + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistTokennToken; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; + } + } + assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); + assert( rc!=SQLITE_OK || bMaxSet ); + + /* Keep advancing iterators until they all point to the same document */ + for(i=0; inToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+1); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); + } + } + } + + *pbEof = bEof; + return rc; +} /* ** Attempt to move the phrase iterator to point to the next matching docid. ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. @@ -4097,59 +4406,18 @@ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ - assert( p->nToken==1 ); - assert( pDL->pNextDocid==0 ); - rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, - &pDL->iDocid, &pDL->pList, &pDL->nList - ); - if( rc==SQLITE_OK && !pDL->pList ){ - *pbEof = 1; - } + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ - char *pIter; /* Used to iterate through aAll */ - char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ - if( pDL->pNextDocid ){ - pIter = pDL->pNextDocid; - }else{ - pIter = pDL->aAll; - } - - if( pIter>=pEnd ){ - /* We have already reached the end of this doclist. EOF. */ - *pbEof = 1; - }else{ - sqlite3_int64 iDelta; - pIter += sqlite3Fts3GetVarint(pIter, &iDelta); - if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ - pDL->iDocid += iDelta; - }else{ - pDL->iDocid -= iDelta; - } - pDL->pList = pIter; - fts3PoslistCopy(0, &pIter); - pDL->nList = (int)(pIter - pDL->pList); - - /* pIter now points just past the 0x00 that terminates the position- - ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim(), then pIter may not actually - ** point to the start of the next docid value. The following line deals - ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim(). */ - while( pIterpNextDocid = pIter; - assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); - *pbEof = 0; - } + fts3EvalDlPhraseNext(pTab, pDL, pbEof); } return rc; } @@ -4170,11 +4438,10 @@ ** code before returning. */ static void fts3EvalStartReaders( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Expr *pExpr, /* Expression to initialize phrases in */ - int bOptOk, /* True to enable incremental loading */ int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; @@ -4181,14 +4448,14 @@ int nToken = pExpr->pPhrase->nToken; for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); - *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); }else{ - fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); - fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } @@ -4426,11 +4693,11 @@ /* Set nLoad4 to the value of (4^nOther) for the next iteration of the ** for-loop. Except, limit the value to 2^24 to prevent it from ** overflowing the 32-bit integer it is stored in. */ if( ii<12 ) nLoad4 = nLoad4*4; - if( ii==0 || pTC->pPhrase->nToken>1 ){ + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ /* Either this is the cheapest token in the entire query, or it is ** part of a multi-token phrase. Either way, the entire doclist will ** (eventually) be loaded into memory. It may as well be now. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; @@ -4506,11 +4773,11 @@ sqlite3_free(aTC); } } #endif - fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); + fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); return rc; } /* ** Invalidate the current position list for phrase pPhrase. @@ -4989,10 +5256,20 @@ pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); } + + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) + )){ + pCsr->isEof = 1; + } + return rc; } /* ** Restart interation for expression pExpr so that the next call to @@ -5012,16 +5289,20 @@ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ - assert( pPhrase->nToken==1 ); - assert( pPhrase->aToken[0].pSegcsr ); - sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr); + int i; + for(i=0; inToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); } - pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } pExpr->iDocid = 0; Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -265,10 +265,16 @@ ** verifying the operation of the SQLite core. */ int inTransaction; /* True after xBegin but before xCommit/xRollback */ int mxSavepoint; /* Largest valid xSavepoint integer */ #endif + +#ifdef SQLITE_TEST + /* True to disable the incremental doclist optimization. This is controled + ** by special insert command 'test-no-incr-doclist'. */ + int bNoIncrDoclist; +#endif }; /* ** When the core wants to read from the virtual table, it creates a ** virtual table cursor (an instance of the following structure) using @@ -290,11 +296,12 @@ int nDoclist; /* Size of buffer at aDoclist */ u8 bDesc; /* True to sort in descending order */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ sqlite3_int64 nDoc; /* Documents in table */ - + i64 iMinDocid; /* Minimum docid to return */ + i64 iMaxDocid; /* Maximum docid to return */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int nMatchinfo; /* Number of elements in aMatchinfo[] */ char *zMatchinfo; /* Matchinfo specification */ }; @@ -320,10 +327,19 @@ */ #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ +/* +** The lower 16-bits of the sqlite3_index_info.idxNum value set by +** the xBestIndex() method contains the Fts3Cursor.eSearch value described +** above. The upper 16-bits contain a combination of the following +** bits, used to describe extra constraints on full-text searches. +*/ +#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ +#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ +#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ struct Fts3Doclist { char *aAll; /* Array containing doclist (or NULL) */ int nAll; /* Size of a[] in bytes */ char *pNextDocid; /* Pointer to next docid */ Index: ext/fts3/fts3_write.c ================================================================== --- ext/fts3/fts3_write.c +++ ext/fts3/fts3_write.c @@ -5047,10 +5047,13 @@ }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ p->nNodeSize = atoi(&zVal[9]); rc = SQLITE_OK; }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ p->nMaxPendingData = atoi(&zVal[11]); + rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); rc = SQLITE_OK; #endif }else{ rc = SQLITE_ERROR; } ADDED test/fts4docid.test Index: test/fts4docid.test ================================================================== --- /dev/null +++ test/fts4docid.test @@ -0,0 +1,116 @@ +# 2012 March 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/fts3_common.tcl +set ::testprefix fts4docid + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +# Initialize a table with pseudo-randomly generated data. +# +do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts4; } +do_test 1.1 { + foreach {docid content} { + 0 {F N K B T I K V B A} 1 {D M J E S P H E L O} + 2 {W U T Q T Q T L H G} 3 {D W H M B R S Z B K} + 4 {F Q I N P Q J L Z D} 5 {J O Q E Y A O E L B} + 6 {O V R A C R K C Y H} 7 {Z J H T Q Q O R A G} + 8 {L K J W G D Y W B M} 9 {K E Y I A Q R Q T S} + 10 {N P H Y Z M R T I C} 11 {E X H O I S E S Z F} + 12 {B Y Q T J X C L L J} 13 {Q D C U U A Q E Z U} + 14 {S I T C J R X S J M} 15 {M X M K E X L H Q Y} + 16 {O W E I C H U Y S Y} 17 {P V V E M T H C C S} + 18 {L Y A M I E N M X O} 19 {S Y R U L S Q Y F P} + 20 {U J S T T J J S V X} 21 {T E I W P O V A A P} + 22 {W D K H D H F G O J} 23 {T X Y P G M J U I L} + 24 {F V X E B C N B K W} 25 {E B A Y N N T Z I C} + 26 {G E E B C P U D H G} 27 {J D J K N S B Q T M} + 28 {Q T G M D O D Y V G} 29 {P X W I W V P W Z G} + } { + execsql { INSERT INTO t1(docid, content) VALUES($docid, $content) } + } +} {} + +# Quick test regarding affinites and the docid/rowid column. +do_execsql_test 2.1.1 { SELECT docid FROM t1 WHERE docid = 5 } {5} +do_execsql_test 2.1.2 { SELECT docid FROM t1 WHERE docid = '5' } {5} +do_execsql_test 2.1.3 { SELECT docid FROM t1 WHERE docid = +5 } {5} +do_execsql_test 2.1.4 { SELECT docid FROM t1 WHERE docid = +'5' } {5} +do_execsql_test 2.1.5 { SELECT docid FROM t1 WHERE docid < 5 } {0 1 2 3 4} +do_execsql_test 2.1.6 { SELECT docid FROM t1 WHERE docid < '5' } {0 1 2 3 4} + +do_execsql_test 2.2.1 { SELECT rowid FROM t1 WHERE rowid = 5 } {5} +do_execsql_test 2.2.2 { SELECT rowid FROM t1 WHERE rowid = '5' } {5} +do_execsql_test 2.2.3 { SELECT rowid FROM t1 WHERE rowid = +5 } {5} +do_execsql_test 2.2.4 { SELECT rowid FROM t1 WHERE rowid = +'5' } {5} +do_execsql_test 2.2.5 { SELECT rowid FROM t1 WHERE rowid < 5 } {0 1 2 3 4} +do_execsql_test 2.2.6 { SELECT rowid FROM t1 WHERE rowid < '5' } {0 1 2 3 4} + +#------------------------------------------------------------------------- +# Now test a bunch of full-text queries featuring range constraints on +# the docid field. Each query is run so that the range constraint: +# +# * is on the docid field, +# * is on the docid field with a unary +, +# * is on the rowid field, +# * is on the rowid field with a unary +. +# +# Queries are run with both "ORDER BY docid DESC" and "ORDER BY docid ASC" +# clauses. +# +foreach {tn where result} { + 1 {WHERE t1 MATCH 'O' AND xxx < 17} {1 5 6 7 11 16} + 2 {WHERE t1 MATCH 'O' AND xxx < 4123456789123456} {1 5 6 7 11 16 18 21 22 28} + 3 {WHERE t1 MATCH 'O' AND xxx < 1} {} + 4 {WHERE t1 MATCH 'O' AND xxx < -4123456789123456} {} + + 5 {WHERE t1 MATCH 'O' AND xxx > 17} {18 21 22 28} + 6 {WHERE t1 MATCH 'O' AND xxx > 4123456789123456} {} + 7 {WHERE t1 MATCH 'O' AND xxx > 1} {5 6 7 11 16 18 21 22 28} + 8 {WHERE t1 MATCH 'O' AND xxx > -4123456789123456} {1 5 6 7 11 16 18 21 22 28} + + 9 {WHERE t1 MATCH '"Q T"' AND xxx < 27} {2 9 12} + 10 {WHERE t1 MATCH '"Q T"' AND xxx <= 27} {2 9 12 27} + 11 {WHERE t1 MATCH '"Q T"' AND xxx > 27} {28} + 12 {WHERE t1 MATCH '"Q T"' AND xxx >= 27} {27 28} +} { + foreach {tn2 ref order} { + 1 docid "ORDER BY docid ASC" + 2 +docid "ORDER BY docid ASC" + 3 rowid "ORDER BY docid ASC" + 4 +rowid "ORDER BY docid ASC" + + 5 docid "ORDER BY docid DESC" + 6 +docid "ORDER BY docid DESC" + 7 rowid "ORDER BY docid DESC" + 8 +rowid "ORDER BY docid DESC" + } { + set w [string map "xxx $ref" $where] + set q "SELECT docid FROM t1 $w $order" + + if {$tn2<5} { + set r [lsort -integer -increasing $result] + } else { + set r [lsort -integer -decreasing $result] + } + + do_execsql_test 3.$tn.$tn2 $q $r + } +} + +finish_test ADDED test/fts4incr.test Index: test/fts4incr.test ================================================================== --- /dev/null +++ test/fts4incr.test @@ -0,0 +1,53 @@ +# 2012 March 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/fts3_common.tcl +set ::testprefix fts4incr + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +# Create the fts_kjv_genesis procedure which fills and FTS3/4 table +# with the complete text of the Book of Genesis. +# +source $testdir/genesis.tcl + +do_test 1.0 { + execsql { CREATE VIRTUAL TABLE t1 USING fts4(words) } + fts_kjv_genesis +} {} + +do_execsql_test 1.1 { + SELECT min(docid), max(docid) FROM t1; +} {1001001 1050026} + +foreach {tn q res} { + 1 { SELECT count(*) FROM t1 WHERE t1 MATCH 'and' AND docid < 1010000} 224 + 2 { SELECT count(*) FROM t1 WHERE t1 MATCH '"in the"' AND docid < 1010000} 47 + 3 { SELECT count(*) FROM t1 WHERE t1 MATCH '"And God"' AND docid < 1010000} 33 + 4 { SELECT count(*) FROM t1 WHERE t1 + MATCH '"land of canaan"' AND docid < 1030000 } 7 +} { + foreach s {0 1} { + execsql "INSERT INTO t1(t1) VALUES('test-no-incr-doclist=$s')" + do_execsql_test 2.$tn.$s $q $res + set t($s) [lindex [time [list execsql $q] 100] 0] + } + puts "with optimization: $t(0) without: $t(1)" +} + +finish_test