Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -2253,12 +2253,12 @@ } for(ii=0; iinToken; ii++){ Fts3PhraseToken *pTok; /* Token to find doclist for */ int iTok; /* The token being queried this iteration */ - char *pList; /* Pointer to token doclist */ - int nList; /* Size of buffer at pList */ + char *pList = 0; /* Pointer to token doclist */ + int nList = 0; /* Size of buffer at pList */ /* Select a token to process. If this is an xFilter() call, then tokens ** are processed in order from least to most costly. Otherwise, tokens ** are processed in the order in which they occur in the phrase. */ @@ -2296,12 +2296,13 @@ } if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); }else{ - assert( pTok->pArray ); - rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); + if( pTok->pArray ){ + rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); + } pTok->bFulltext = 1; } assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pArray==0 ); if( rc!=SQLITE_OK ) break; @@ -2525,11 +2526,14 @@ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int ii; nCost = 0; for(ii=0; iinToken; ii++){ - nCost += pPhrase->aToken[ii].pArray->nCost; + Fts3SegReaderArray *pArray = pPhrase->aToken[ii].pArray; + if( pArray ){ + nCost += pPhrase->aToken[ii].pArray->nCost; + } } }else{ nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); } return nCost; @@ -2553,11 +2557,11 @@ if( pExpr->eType==FTSQUERY_AND ){ fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); fts3ExprAssignCosts(pExpr->pRight, ppExprCost); }else{ (*ppExprCost)->pExpr = pExpr; - (*ppExprCost)->nCost = fts3ExprCost(pExpr);; + (*ppExprCost)->nCost = fts3ExprCost(pExpr); (*ppExprCost)++; } } /* @@ -3271,13 +3275,17 @@ sqlite3_context *pContext, /* SQLite function call context */ int nVal, /* Size of argument array */ sqlite3_value **apVal /* Array of arguments */ ){ Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ - assert( nVal==1 ); + assert( nVal==1 || nVal==2 ); if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ - sqlite3Fts3Matchinfo(pContext, pCsr); + const char *zArg = 0; + if( nVal>1 ){ + zArg = (const char *)sqlite3_value_text(apVal[1]); + } + sqlite3Fts3Matchinfo(pContext, pCsr, zArg); } } /* ** This routine implements the xFindFunction method for the FTS3 @@ -3462,10 +3470,11 @@ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) ){ rc = sqlite3_create_module_v2( db, "fts3", &fts3Module, (void *)pHash, hashDestroy ); Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -160,19 +160,23 @@ i16 eSearch; /* Search strategy (see below) */ u8 isEof; /* True if at End Of Results */ u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ + int nPhrase; /* Number of matchable phrases in query */ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ - int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ - u32 *aMatchinfo; /* Information about most recent match */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ + + int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ + u32 *aMatchinfo; /* Information about most recent match */ + int nMatchinfo; /* Number of elements in aMatchinfo[] */ + char *zMatchinfo; /* Matchinfo specification */ }; #define FTS3_EVAL_FILTER 0 #define FTS3_EVAL_NEXT 1 #define FTS3_EVAL_MATCHINFO 2 @@ -285,15 +289,16 @@ Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *, int (*)(Fts3Table *, void *, char *, int, char *, int), void * ); int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **); -int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor*, u32*); -int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor*, u32*); int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); +int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); +int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); + void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); @@ -337,11 +342,11 @@ /* fts3_snippet.c */ void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, const char *, const char *, int, int ); -void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *); +void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); /* fts3_expr.c */ int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, const char *, int, Fts3Expr ** ); Index: ext/fts3/fts3_snippet.c ================================================================== --- ext/fts3/fts3_snippet.c +++ ext/fts3/fts3_snippet.c @@ -15,10 +15,26 @@ #include "fts3Int.h" #include #include +/* +** Characters that may appear in the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ +#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ +#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ +#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ +#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ +#define FTS3_MATCHINFO_LCS 's' /* nCol values */ +#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ + +/* +** The default value for the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_DEFAULT "pcx" + /* ** Used as an fts3ExprIterate() context when loading phrase doclists to ** Fts3Expr.aDoclist[]/nDoclist. */ @@ -68,10 +84,12 @@ */ typedef struct MatchInfo MatchInfo; struct MatchInfo { Fts3Cursor *pCursor; /* FTS3 Cursor */ int nCol; /* Number of columns in table */ + int nPhrase; /* Number of matchable phrases in query */ + sqlite3_int64 nDoc; /* Number of docs in database */ u32 *aMatchinfo; /* Pre-allocated buffer */ }; @@ -268,10 +286,20 @@ } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; } + +static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + (*(int *)ctx)++; + return SQLITE_OK; +} +static int fts3ExprPhraseCount(Fts3Expr *pExpr){ + int nPhrase = 0; + (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + return nPhrase; +} /* ** Advance the position list iterator specified by the first two ** arguments so that it points to the first element with a value greater ** than or equal to parameter iNext. @@ -781,24 +809,46 @@ *pp = pCsr; } /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats -** for a single query. The "global" stats are those elements of the matchinfo -** array that are constant for all rows returned by the current query. +** for a single query. +** +** fts3ExprIterate() callback to load the 'global' elements of a +** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements +** of the matchinfo array that are constant for all rows returned by the +** current query. +** +** Argument pCtx is actually a pointer to a struct of type MatchInfo. This +** function populates Matchinfo.aMatchinfo[] as follows: +** +** for(iCol=0; iColpCursor; char *pIter; char *pEnd; char *pFree = 0; - const int iStart = 2 + (iPhrase * p->nCol * 3) + 1; + u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; assert( pExpr->isLoaded ); assert( pExpr->eType==FTSQUERY_PHRASE ); if( pCsr->pDeferred ){ @@ -812,14 +862,14 @@ int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); if( rc!=SQLITE_OK ) return rc; pIter = pFree; pEnd = &pFree[nFree]; }else{ - int nDoc = p->aMatchinfo[2 + 3*p->nCol*p->aMatchinfo[0]]; - for(ii=0; iinCol; ii++){ - p->aMatchinfo[iStart + ii*3] = nDoc; - p->aMatchinfo[iStart + ii*3 + 1] = nDoc; + int iCol; /* Column index */ + for(iCol=0; iColnCol; iCol++){ + aOut[iCol*3 + 1] = (u32)p->nDoc; + aOut[iCol*3 + 2] = (u32)p->nDoc; } return SQLITE_OK; } }else{ pIter = pExpr->aDoclist; @@ -827,32 +877,32 @@ } /* Fill in the global hit count matrix row for this phrase. */ while( pIteraMatchinfo[iStart], 1); + fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); } sqlite3_free(pFree); return SQLITE_OK; } /* -** fts3ExprIterate() callback used to collect the "local" matchinfo stats -** for a single query. The "local" stats are those elements of the matchinfo +** fts3ExprIterate() callback used to collect the "local" part of the +** FTS3_MATCHINFO_HITS array. The local stats are those elements of the ** array that are different for each row returned by the query. */ -static int fts3ExprLocalMatchinfoCb( +static int fts3ExprLocalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; if( pExpr->aDoclist ){ char *pCsr; - int iStart = 2 + (iPhrase * p->nCol * 3); + int iStart = iPhrase * p->nCol * 3; int i; for(i=0; inCol; i++) p->aMatchinfo[iStart+i*3] = 0; pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); @@ -861,71 +911,403 @@ } } return SQLITE_OK; } + +static int fts3MatchinfoCheck( + Fts3Table *pTab, + char cArg, + char **pzErr +){ + if( (cArg==FTS3_MATCHINFO_NPHRASE) + || (cArg==FTS3_MATCHINFO_NCOL) + || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) + || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) + || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) + || (cArg==FTS3_MATCHINFO_LCS) + || (cArg==FTS3_MATCHINFO_HITS) + ){ + return SQLITE_OK; + } + *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); + return SQLITE_ERROR; +} + +static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + int nVal; /* Number of integers output by cArg */ + + switch( cArg ){ + case FTS3_MATCHINFO_NDOC: + case FTS3_MATCHINFO_NPHRASE: + case FTS3_MATCHINFO_NCOL: + nVal = 1; + break; + + case FTS3_MATCHINFO_AVGLENGTH: + case FTS3_MATCHINFO_LENGTH: + case FTS3_MATCHINFO_LCS: + nVal = pInfo->nCol; + break; + + default: + assert( cArg==FTS3_MATCHINFO_HITS ); + nVal = pInfo->nCol * pInfo->nPhrase * 3; + break; + } + + return nVal; +} + +static int fts3MatchinfoSelectDoctotal( + Fts3Table *pTab, + sqlite3_stmt **ppStmt, + sqlite3_int64 *pnDoc, + const char **paLen +){ + sqlite3_stmt *pStmt; + const char *a; + sqlite3_int64 nDoc; + + if( !*ppStmt ){ + int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); + if( rc!=SQLITE_OK ) return rc; + } + pStmt = *ppStmt; + + a = sqlite3_column_blob(pStmt, 0); + a += sqlite3Fts3GetVarint(a, &nDoc); + *pnDoc = (u32)nDoc; + + if( paLen ) *paLen = a; + return SQLITE_OK; +} + +/* +** An instance of the following structure is used to store state while +** iterating through a multi-column position-list corresponding to the +** hits for a single phrase on a single row in order to calculate the +** values for a matchinfo() FTS3_MATCHINFO_LCS request. +*/ +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPosOffset; /* Tokens count up to end of this phrase */ + int iCol; /* Current column number */ + int iPos; /* Current position */ +}; + +/* +** If LcsIterator.iCol is set to the following value, the iterator has +** finished iterating through all offsets for all columns. +*/ +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} + +/* +** Advance the iterator passed as an argument to the next position. Return +** 1 if the iterator is at EOF or if it now points to the start of the +** position list for the next column. +*/ +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead = pIter->pRead; + sqlite3_int64 iRead; + int rc = 0; + + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 ){ + pIter->iCol = LCS_ITERATOR_FINISHED; + rc = 1; + }else{ + if( iRead==1 ){ + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + pIter->iCol = iRead; + pIter->iPos = pIter->iPosOffset; + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + rc = 1; + } + pIter->iPos += (iRead-2); + } + + pIter->pRead = pRead; + return rc; +} + +/* +** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. +** +** If the call is successful, the longest-common-substring lengths for each +** column are written into the first nCol elements of the pInfo->aMatchinfo[] +** array before returning. SQLITE_OK is returned in this case. +** +** Otherwise, if an error occurs, an SQLite error code is returned and the +** data written to the first nCol elements of pInfo->aMatchinfo[] is +** undefined. +*/ +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; + + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; + pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); + if( pIter->pRead ){ + pIter->iPos = pIter->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + }else{ + pIter->iCol = LCS_ITERATOR_FINISHED; + } + } + + for(iCol=0; iColnCol; iCol++){ + int nLcs = 0; /* LCS value for this column */ + int nLive = 0; /* Number of iterators in aIter not at EOF */ + + /* Loop through the iterators in aIter[]. Set nLive to the number of + ** iterators that point to a position-list corresponding to column iCol. + */ + for(i=0; inPhrase; i++){ + assert( aIter[i].iCol>=iCol ); + if( aIter[i].iCol==iCol ) nLive++; + } + + /* The following loop runs until all iterators in aIter[] have finished + ** iterating through positions in column iCol. Exactly one of the + ** iterators is advanced each time the body of the loop is run. + */ + while( nLive>0 ){ + LcsIterator *pAdv = 0; /* The iterator to advance by one position */ + int nThisLcs = 0; /* LCS for the current iterator positions */ + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + if( iCol!=pIter->iCol ){ + /* This iterator is already at EOF for this column. */ + nThisLcs = 0; + }else{ + if( pAdv==0 || pIter->iPosiPos ){ + pAdv = pIter; + } + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } + + pInfo->aMatchinfo[iCol] = nLcs; + } + + sqlite3_free(aIter); + return SQLITE_OK; +} + +/* +** Populate the buffer pInfo->aMatchinfo[] with an array of integers to +** be returned by the matchinfo() function. Argument zArg contains the +** format string passed as the second argument to matchinfo (or the +** default value "pcx" if no second argument was specified). The format +** string has already been validated and the pInfo->aMatchinfo[] array +** is guaranteed to be large enough for the output. +** +** If bGlobal is true, then populate all fields of the matchinfo() output. +** If it is false, then assume that those fields that do not change between +** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) +** have already been populated. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. If a value other than SQLITE_OK is returned, the state the +** pInfo->aMatchinfo[] buffer is left in is undefined. +*/ +static int fts3MatchinfoValues( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + int bGlobal, /* True to grab the global stats */ + MatchInfo *pInfo, /* Matchinfo context object */ + const char *zArg /* Matchinfo format string */ +){ + int rc = SQLITE_OK; + int i; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_stmt *pSelect = 0; + + for(i=0; rc==SQLITE_OK && zArg[i]; i++){ + + switch( zArg[i] ){ + case FTS3_MATCHINFO_NPHRASE: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; + break; + + case FTS3_MATCHINFO_NCOL: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; + break; + + case FTS3_MATCHINFO_NDOC: + if( bGlobal ){ + sqlite3_int64 nDoc; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); + pInfo->aMatchinfo[0] = (u32)nDoc; + } + break; + + case FTS3_MATCHINFO_AVGLENGTH: + if( bGlobal ){ + sqlite3_int64 nDoc; /* Number of rows in table */ + const char *a; /* Aggregate column length array */ + + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); + if( rc==SQLITE_OK ){ + int iCol; + for(iCol=0; iColnCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + pInfo->aMatchinfo[iCol] = ((u32)(nToken&0xffffffff)+nDoc/2)/nDoc; + } + } + } + break; + + case FTS3_MATCHINFO_LENGTH: { + sqlite3_stmt *pSelectDocsize = 0; + rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); + if( rc==SQLITE_OK ){ + int iCol; + const char *a = sqlite3_column_blob(pSelectDocsize, 0); + for(iCol=0; iColnCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + pInfo->aMatchinfo[iCol] = (u32)nToken; + } + } + sqlite3_reset(pSelectDocsize); + break; + } + + case FTS3_MATCHINFO_LCS: + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3MatchinfoLcs(pCsr, pInfo); + } + break; + + default: { + assert( zArg[i]==FTS3_MATCHINFO_HITS ); + Fts3Expr *pExpr = pCsr->pExpr; + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc!=SQLITE_OK ) break; + if( bGlobal ){ + if( pCsr->pDeferred ){ + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); + if( rc!=SQLITE_OK ) break; + } + rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + if( rc!=SQLITE_OK ) break; + } + (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + break; + } + } + + pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); + } + + sqlite3_reset(pSelect); + return rc; +} + /* ** Populate pCsr->aMatchinfo[] with data for the current row. The ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). */ -static int fts3GetMatchinfo(Fts3Cursor *pCsr){ +static int fts3GetMatchinfo( + Fts3Cursor *pCsr, /* FTS3 Cursor object */ + const char *zArg /* Second argument to matchinfo() function */ +){ MatchInfo sInfo; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; + int bGlobal = 0; /* Collect 'global' stats as well as local */ + memset(&sInfo, 0, sizeof(MatchInfo)); sInfo.pCursor = pCsr; sInfo.nCol = pTab->nColumn; + /* If there is cached matchinfo() data, but the format string for the + ** cache does not match the format string for this request, discard + ** the cached data. */ + if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ + assert( pCsr->aMatchinfo ); + sqlite3_free(pCsr->aMatchinfo); + pCsr->zMatchinfo = 0; + pCsr->aMatchinfo = 0; + } + + /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the + ** matchinfo function has been called for this query. In this case + ** allocate the array used to accumulate the matchinfo data and + ** initialize those elements that are constant for every row. + */ if( pCsr->aMatchinfo==0 ){ - /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the - ** matchinfo function has been called for this query. In this case - ** allocate the array used to accumulate the matchinfo data and - ** initialize those elements that are constant for every row. - */ - int nPhrase; /* Number of phrases */ - int nMatchinfo; /* Number of u32 elements in match-info */ - - /* Load doclists for each phrase in the query. */ - rc = fts3ExprLoadDoclists(pCsr, &nPhrase, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - nMatchinfo = 2 + 3*sInfo.nCol*nPhrase; - if( pTab->bHasDocsize ){ - nMatchinfo += 1 + 2*pTab->nColumn; - } - - sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo); - if( !sInfo.aMatchinfo ){ - return SQLITE_NOMEM; - } - memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo); - - /* First element of match-info is the number of phrases in the query */ - sInfo.aMatchinfo[0] = nPhrase; - sInfo.aMatchinfo[1] = sInfo.nCol; - if( pTab->bHasDocsize ){ - int ofst = 2 + 3*sInfo.aMatchinfo[0]*sInfo.aMatchinfo[1]; - rc = sqlite3Fts3MatchinfoDocsizeGlobal(pCsr, &sInfo.aMatchinfo[ofst]); - } - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo); - pCsr->aMatchinfo = sInfo.aMatchinfo; + int nMatchinfo = 0; /* Number of u32 elements in match-info */ + int nArg; /* Bytes in zArg */ + int i; /* Used to iterate through zArg */ + + /* Determine the number of phrases in the query */ + pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); + sInfo.nPhrase = pCsr->nPhrase; + + /* Determine the number of integers in the buffer returned by this call. */ + for(i=0; zArg[i]; i++){ + nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); + } + + /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ + nArg = strlen(zArg); + pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); + if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; + + pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; + pCsr->nMatchinfo = nMatchinfo; + memcpy(pCsr->zMatchinfo, zArg, nArg+1); + memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); pCsr->isMatchinfoNeeded = 1; + bGlobal = 1; } sInfo.aMatchinfo = pCsr->aMatchinfo; - if( rc==SQLITE_OK && pCsr->isMatchinfoNeeded ){ - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo); - if( pTab->bHasDocsize ){ - int ofst = 2 + 3*sInfo.aMatchinfo[0]*sInfo.aMatchinfo[1]; - rc = sqlite3Fts3MatchinfoDocsizeLocal(pCsr, &sInfo.aMatchinfo[ofst]); - } + sInfo.nPhrase = pCsr->nPhrase; + if( pCsr->isMatchinfoNeeded ){ + rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); pCsr->isMatchinfoNeeded = 0; } - return SQLITE_OK; + return rc; } /* ** Implementation of snippet() function. */ @@ -982,11 +1364,11 @@ /* Loop through all columns of the table being considered for snippets. ** If the iCol argument to this function was negative, this means all ** columns of the FTS3 table. Otherwise, only column iCol is considered. */ for(iRead=0; iReadnColumn; iRead++){ - SnippetFragment sF; + SnippetFragment sF = {0, 0, 0, 0}; int iS; if( iCol>=0 && iRead!=iCol ) continue; /* Find the best snippet of nFToken tokens in column iRead. */ rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); @@ -1209,26 +1591,47 @@ } /* ** Implementation of matchinfo() function. */ -void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){ +void sqlite3Fts3Matchinfo( + sqlite3_context *pContext, /* Function call context */ + Fts3Cursor *pCsr, /* FTS3 table cursor */ + const char *zArg /* Second arg to matchinfo() function */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc; + int i; + const char *zFormat; + + if( zArg ){ + for(i=0; zArg[i]; i++){ + char *zErr = 0; + if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ + sqlite3_result_error(pContext, zErr, -1); + sqlite3_free(zErr); + return; + } + } + zFormat = zArg; + }else{ + zFormat = FTS3_MATCHINFO_DEFAULT; + } + if( !pCsr->pExpr ){ sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); return; } - rc = fts3GetMatchinfo(pCsr); - sqlite3Fts3SegmentsClose((Fts3Table *)pCsr->base.pVtab ); + + /* Retrieve matchinfo() data. */ + rc = fts3GetMatchinfo(pCsr, zFormat); + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pContext, rc); }else{ - Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab; - int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3); - if( pTab->bHasDocsize ){ - n += sizeof(u32)*(1 + 2*pTab->nColumn); - } + int n = pCsr->nMatchinfo * sizeof(u32); sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); } } #endif Index: ext/fts3/fts3_write.c ================================================================== --- ext/fts3/fts3_write.c +++ ext/fts3/fts3_write.c @@ -280,10 +280,58 @@ } } *pp = pStmt; return rc; } + +static int fts3SelectDocsize( + Fts3Table *pTab, /* FTS3 table handle */ + int eStmt, /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */ + sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ + int rc; /* Return code */ + + assert( eStmt==SQL_SELECT_DOCSIZE || eStmt==SQL_SELECT_DOCTOTAL ); + + rc = fts3SqlStmt(pTab, eStmt, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( eStmt==SQL_SELECT_DOCSIZE ){ + sqlite3_bind_int64(pStmt, 1, iDocid); + } + rc = sqlite3_step(pStmt); + if( rc!=SQLITE_ROW ){ + rc = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = SQLITE_CORRUPT; + pStmt = 0; + }else{ + rc = SQLITE_OK; + } + } + + *ppStmt = pStmt; + return rc; +} + +int sqlite3Fts3SelectDoctotal( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + return fts3SelectDocsize(pTab, SQL_SELECT_DOCTOTAL, 0, ppStmt); +} + +int sqlite3Fts3SelectDocsize( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_int64 iDocid, /* Docid to read size data for */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); +} + +void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){ +} /* ** Similar to fts3SqlStmt(). Except, after binding the parameters in ** array apVal[] to the SQL statement identified by eStmt, the statement ** is executed. @@ -2441,79 +2489,10 @@ assert(j<=nBuf); a[i] = (u32)(x & 0xffffffff); } } -/* -** Fill in the document size auxiliary information for the matchinfo -** structure. The auxiliary information is: -** -** N Total number of documents in the full-text index -** a0 Average length of column 0 over the whole index -** n0 Length of column 0 on the matching row -** ... -** aM Average length of column M over the whole index -** nM Length of column M on the matching row -** -** The fts3MatchinfoDocsizeLocal() routine fills in the nX values. -** The fts3MatchinfoDocsizeGlobal() routine fills in N and the aX values. -*/ -int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor *pCur, u32 *a){ - const char *pBlob; /* The BLOB holding %_docsize info */ - int nBlob; /* Size of the BLOB */ - sqlite3_stmt *pStmt; /* Statement for reading and writing */ - int i, j; /* Loop counters */ - sqlite3_int64 x; /* Varint value */ - int rc; /* Result code from subfunctions */ - Fts3Table *p; /* The FTS table */ - - p = (Fts3Table*)pCur->base.pVtab; - rc = fts3SqlStmt(p, SQL_SELECT_DOCSIZE, &pStmt, 0); - if( rc ){ - return rc; - } - sqlite3_bind_int64(pStmt, 1, pCur->iPrevId); - if( sqlite3_step(pStmt)==SQLITE_ROW ){ - nBlob = sqlite3_column_bytes(pStmt, 0); - pBlob = (const char*)sqlite3_column_blob(pStmt, 0); - for(i=j=0; inColumn && jbase.pVtab; - rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0); - if( rc ){ - return rc; - } - if( sqlite3_step(pStmt)==SQLITE_ROW ){ - nBlob = sqlite3_column_bytes(pStmt, 0); - pBlob = (const char*)sqlite3_column_blob(pStmt, 0); - j = sqlite3Fts3GetVarint(pBlob, &x); - a[0] = nDoc = (u32)(x & 0xffffffff); - for(i=0; inColumn && jiPrevDocid. The sizes are encoded as ** a blob of varints. */ Index: test/fts3defer2.test ================================================================== --- test/fts3defer2.test +++ test/fts3defer2.test @@ -51,20 +51,20 @@ do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.2 { - SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1)) + SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 8 8 1 8 8 8 5001 9] ] do_execsql_test 1.2.3 { - SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1)) + SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)'; } [list \ {[a] b c d [e] [f] [a] x y} \ {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 8 8 2 8 8 8 5001 9] @@ -89,12 +89,13 @@ 3 { UPDATE t2_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } } { execsql $sql + do_execsql_test 2.2.$tn { - SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'a b'; + SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 7] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] } @@ -122,12 +123,12 @@ WHERE length(block)>10000; } } { execsql $sql do_execsql_test 2.4.$tn { - SELECT docid, mit(matchinfo(t3)) FROM t3 WHERE t3 MATCH '"a b c"'; + SELECT docid, mit(matchinfo(t3, 'pcxnal')) FROM t3 WHERE t3 MATCH '"a b c"'; } {1 {1 1 1 4 4 11 912 6} 3 {1 1 1 4 4 11 912 6}} } finish_test Index: test/fts3fault.test ================================================================== --- test/fts3fault.test +++ test/fts3fault.test @@ -15,10 +15,12 @@ set ::testprefix fts3fault # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } + +if 1 { # Test error handling in the sqlite3Fts3Init() function. This is the # function that registers the FTS3 module and various support functions # with SQLite. # @@ -152,7 +154,80 @@ execsql { CREATE VIRTUAL TABLE t1 USING fts4(a, b, matchnfo=fts3) } } -test { faultsim_test_result {1 {unrecognized parameter: matchnfo=fts3}} \ {1 {vtable constructor failed: t1}} } + +} + +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} + +do_test 8.0 { + faultsim_delete_and_reopen + execsql { CREATE VIRTUAL TABLE t8 USING fts4 } + execsql "INSERT INTO t8 VALUES('a b c')" + execsql "INSERT INTO t8 VALUES('b b b')" + execsql "INSERT INTO t8 VALUES('[string repeat {c } 50000]')" + execsql "INSERT INTO t8 VALUES('d d d')" + execsql "INSERT INTO t8 VALUES('e e e')" + execsql "INSERT INTO t8(t8) VALUES('optimize')" + faultsim_save_and_close +} {} + +do_faultsim_test 8.1 -prep { + faultsim_restore_and_reopen + db func mit mit +} -body { + execsql { SELECT mit(matchinfo(t8, 'x')) FROM t8 WHERE t8 MATCH 'a b c' } +} -test { + faultsim_test_result {0 {{1 1 1 1 4 2 1 5 5}}} +} +do_faultsim_test 8.2 -faults oom-t* -prep { + faultsim_restore_and_reopen + db func mit mit +} -body { + execsql { SELECT mit(matchinfo(t8, 's')) FROM t8 WHERE t8 MATCH 'a b c' } +} -test { + faultsim_test_result {0 3} +} +do_faultsim_test 8.3 -prep { + faultsim_restore_and_reopen + db func mit mit +} -body { + execsql { SELECT mit(matchinfo(t8, 'a')) FROM t8 WHERE t8 MATCH 'a b c' } +} -test { + faultsim_test_result {0 10002} +} +do_faultsim_test 8.4 -prep { + faultsim_restore_and_reopen + db func mit mit +} -body { + execsql { SELECT mit(matchinfo(t8, 'l')) FROM t8 WHERE t8 MATCH 'a b c' } +} -test { + faultsim_test_result {0 3} +} + +do_test 9.0 { + faultsim_delete_and_reopen + execsql { + CREATE VIRTUAL TABLE t9 USING fts4(tokenize=porter); + INSERT INTO t9 VALUES( + 'this record is used toooooooooooooooooooooooooooooooooooooo try to' + ); + SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to*'; + } + faultsim_save_and_close +} {} +do_faultsim_test 9.1 -prep { + faultsim_restore_and_reopen +} -body { + execsql { SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to*' } +} -test { + faultsim_test_result {0 {{0 0 20 39 0 0 64 2}}} +} finish_test Index: test/fts3matchinfo.test ================================================================== --- test/fts3matchinfo.test +++ test/fts3matchinfo.test @@ -39,19 +39,17 @@ INSERT INTO t1(content) VALUES('When all at once I saw a crowd,'); INSERT INTO t1(content) VALUES('A host, of golden daffodils,'); SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I'; } {{1 1 1 2 2} {1 1 1 2 2}} -# Now create an FTS4 table that does not specify matchinfo=fts3. The -# %_docsize table is created in this case and the array of integers returned -# by matchinfo() includes the extra data. +# Now create an FTS4 table that does not specify matchinfo=fts3. # do_execsql_test 1.2 { CREATE VIRTUAL TABLE t2 USING fts4; INSERT INTO t2 SELECT * FROM t1; SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I'; -} {{1 1 1 2 2 4 7 6} {1 1 1 2 2 4 7 8}} +} {{1 1 1 2 2} {1 1 1 2 2}} # Test some syntax-error handling. # do_catchsql_test 2.0 { CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3); @@ -66,6 +64,258 @@ CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3); INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees'); SELECT mtchinfo FROM t3; } {{Beside the lake, beneath the trees}} +do_execsql_test 3.2 { + CREATE VIRTUAL TABLE xx USING FTS4; + SELECT * FROM xx WHERE xx MATCH 'abc'; + SELECT * FROM xx WHERE xx MATCH 'a b c'; +} + + +#-------------------------------------------------------------------------- +# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function. +# +# The first argument - $tn - is a test identifier. This may be either a +# full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix +# is set, just the numeric component (i.e. "1.1"). +# +# The second argument is the name of an FTSX table. The third is the +# full text of a WHERE/MATCH expression to query the table for +# (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a +# key-value list (serialized array) with matchinfo() format specifiers +# as keys, and the results of executing the statement: +# +# SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr +# +# For example: +# +# CREATE VIRTUAL TABLE t1 USING fts4; +# INSERT INTO t1 VALUES('abc'); +# INSERT INTO t1 VALUES('def'); +# INSERT INTO t1 VALUES('abc abc'); +# +# do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" { +# n {3 3} +# p {1 1} +# c {1 1} +# x {{1 3 2} {2 3 2}} +# } +# +# If the $results list contains keys mapped to "-" instead of a matchinfo() +# result, then this command computes the expected results based on other +# mappings to test the matchinfo() function. For example, the command above +# could be changed to: +# +# do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" { +# n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}} +# pcx - +# } +# +# And this command would compute the expected results for matchinfo(t1, 'pcx') +# based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and +# matchinfo(t1, 'x') in order to test 'pcx'. +# +proc do_matchinfo_test {tn tbl expr results} { + + foreach {fmt res} $results { + if {$res == "-"} continue + set resarray($fmt) $res + } + + set nRow 0 + foreach {fmt res} [array get resarray] { + if {[llength $res]>$nRow} { set nRow [llength $res] } + } + + # Construct expected results for any formats for which the caller + # supplied result is "-". + # + foreach {fmt res} $results { + if {$res == "-"} { + set res [list] + for {set iRow 0} {$iRow<$nRow} {incr iRow} { + set rowres [list] + foreach c [split $fmt ""] { + set rowres [concat $rowres [lindex $resarray($c) $iRow]] + } + lappend res $rowres + } + set resarray($fmt) $res + } + } + + # Test each matchinfo() request individually. + # + foreach {fmt res} [array get resarray] { + set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr" + do_execsql_test $tn.$fmt $sql [normalize2 $res] + } + + # Test them all executed together (multiple invocations of matchinfo()). + # + set exprlist [list] + foreach {format res} [array get resarray] { + lappend exprlist "mit(matchinfo($tbl, '$format'))" + } + set allres [list] + for {set iRow 0} {$iRow<$nRow} {incr iRow} { + foreach {format res} [array get resarray] { + lappend allres [lindex $res $iRow] + } + } + set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr" + do_execsql_test $tn.multi $sql [normalize2 $allres] +} +proc normalize2 {list_of_lists} { + set res [list] + foreach elem $list_of_lists { + lappend res [list {*}$elem] + } + return $res +} + + +do_execsql_test 4.1.0 { + CREATE VIRTUAL TABLE t4 USING fts4(x, y); + INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); + INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); +} + +do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { + p {3 3} + c {2 2} + x { + {1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1} + {0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1} + } + n {2 2} + l {{5 5} {5 5}} + a {{5 5} {5 5}} + + s {{3 0} {0 3}} + + xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - + xpxsscplax - +} + +do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { + p {1 1} + c {2 2} + x { + {0 1 1 1 1 1} + {1 1 1 0 1 1} + } + n {2 2} + l {{5 5} {5 5}} + a {{5 5} {5 5}} + + s {{0 1} {1 0}} + + xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - + sxsxs - +} + +do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } +do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} } + +do_execsql_test 4.2.0 { + CREATE VIRTUAL TABLE t5 USING fts4; + INSERT INTO t5 VALUES('a a a a a'); + INSERT INTO t5 VALUES('a b a b a'); + INSERT INTO t5 VALUES('c b c b c'); + INSERT INTO t5 VALUES('x x x x x'); +} +do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { + x {{5 8 2 5 8 2} {3 8 2 3 8 2}} + s {2 1} +} +do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} } +do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } +do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } + +do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; + +do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { + x {{5 8 2 5 5 5} {3 8 2 3 5 5}} + s {2 1} +} + +do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } +do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } +do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } + +do_execsql_test 4.4.0 { + INSERT INTO t5(t5) VALUES('optimize'); + UPDATE t5_segments + SET block = zeroblob(length(block)) + WHERE length(block)>10000; +} + +do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } +do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} } +do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } +do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} } +do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} } +do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } + +do_execsql_test 4.5.0 { + CREATE VIRTUAL TABLE t6 USING fts4(a, b, c); + INSERT INTO t6 VALUES('a', 'b', 'c'); +} +do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'} { s {{1 1 1}} } + + +#------------------------------------------------------------------------- +# Check the following restrictions: +# +# + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3. +# + Matchinfo flag 'l' cannot be used with matchinfo=fts3. +# +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE t7 USING fts3(a, b); + INSERT INTO t7 VALUES('u v w', 'x y z'); + + CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3); + INSERT INTO t8 VALUES('u v w', 'x y z'); +} + +do_catchsql_test 5.2.1 { + SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y' +} {1 {unrecognized matchinfo request: a}} +do_catchsql_test 5.2.2 { + SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y' +} {1 {unrecognized matchinfo request: l}} +do_catchsql_test 5.2.3 { + SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y' +} {1 {unrecognized matchinfo request: n}} + +do_catchsql_test 5.3.1 { + SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y' +} {1 {unrecognized matchinfo request: l}} + +#------------------------------------------------------------------------- +# Test that the offsets() function handles corruption in the %_content +# table correctly. +# +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE t9 USING fts4; + INSERT INTO t9 VALUES( + 'this record is used to try to dectect corruption' + ); + SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to'; +} {{0 0 20 2 0 0 27 2}} + +do_catchsql_test 6.2 { + UPDATE t9_content SET c0content = 'this record is used to'; + SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to'; +} {1 {database disk image is malformed}} + finish_test + Index: test/fts3query.test ================================================================== --- test/fts3query.test +++ test/fts3query.test @@ -150,12 +150,10 @@ } do_select_tests 5.2 -errorformat { wrong number of arguments to function %s() } { 1 "SELECT matchinfo() FROM t2 WHERE t2 MATCH 'history'" matchinfo - 2 "SELECT matchinfo(t2, t2) FROM t2 WHERE t2 MATCH 'history'" matchinfo - 3 "SELECT snippet(t2, 1, 2, 3, 4, 5, 6) FROM t2 WHERE t2 MATCH 'history'" snippet } do_select_tests 5.3 -errorformat { illegal first argument to %s @@ -172,11 +170,16 @@ 1 "SELECT matchinfo(content) FROM t2 WHERE t2 MATCH 'history'" matchinfo 2 "SELECT offsets(content) FROM t2 WHERE t2 MATCH 'history'" offsets 3 "SELECT snippet(content) FROM t2 WHERE t2 MATCH 'history'" snippet 4 "SELECT optimize(content) FROM t2 WHERE t2 MATCH 'history'" optimize } +do_catchsql_test 5.5.1 { + SELECT matchinfo(t2, 'abc') FROM t2 WHERE t2 MATCH 'history' +} {1 {unrecognized matchinfo request: b}} + do_execsql_test 5.5 { DROP TABLE t2 } + # Test the snippet() function with 1 to 6 arguments. # do_execsql_test 6.1 { CREATE VIRTUAL TABLE t3 USING FTS4(a, b);