Index: Makefile.in ================================================================== --- Makefile.in +++ Makefile.in @@ -377,11 +377,12 @@ $(TOP)/src/test_tclvar.c \ $(TOP)/src/test_thread.c \ $(TOP)/src/test_vfs.c \ $(TOP)/src/test_wholenumber.c \ $(TOP)/src/test_wsd.c \ - $(TOP)/ext/fts3/fts3_term.c + $(TOP)/ext/fts3/fts3_term.c \ + $(TOP)/ext/fts3/fts3_test.c # Source code to the library files needed by the test fixture # TESTSRC2 = \ $(TOP)/src/attach.c \ Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -421,16 +421,16 @@ /* ** When this function is called, *pp points to the first byte following a ** varint that is part of a doclist (or position-list, or any other list ** of varints). This function moves *pp to point to the start of that varint, -** and decrements the value stored in *pVal by the varint value. +** and sets *pVal by the varint value. ** ** Argument pStart points to the first byte of the doclist that the ** varint is part of. */ -static void fts3GetReverseDeltaVarint( +static void fts3GetReverseVarint( char **pp, char *pStart, sqlite3_int64 *pVal ){ sqlite3_int64 iVal; @@ -442,25 +442,11 @@ for(p = (*pp)-2; p>=pStart && *p&0x80; p--); p++; *pp = p; sqlite3Fts3GetVarint(p, &iVal); - *pVal -= iVal; -} - -/* -** As long as *pp has not reached its end (pEnd), then do the same -** as fts3GetDeltaVarint(): read a single varint and add it to *pVal. -** But if we have reached the end of the varint, just set *pp=0 and -** leave *pVal unchanged. -*/ -static void fts3GetDeltaVarint2(char **pp, char *pEnd, sqlite3_int64 *pVal){ - if( *pp>=pEnd ){ - *pp = 0; - }else{ - fts3GetDeltaVarint(pp, pVal); - } + *pVal = iVal; } /* ** The xDisconnect() virtual table method. */ @@ -829,10 +815,62 @@ fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); } sqlite3_free(zFree); return zRet; } + +static int fts3GobbleInt(const char **pp, int *pnOut){ + const char *p = *pp; + int nInt = 0; + for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ + nInt = nInt * 10 + (p[0] - '0'); + } + if( p==*pp ) return SQLITE_ERROR; + *pnOut = nInt; + *pp = p; + return SQLITE_OK; +} + + +static int fts3PrefixParameter( + const char *zParam, /* ABC in prefix=ABC parameter to parse */ + int *pnIndex, /* OUT: size of *apIndex[] array */ + struct Fts3Index **apIndex, /* OUT: Array of indexes for this table */ + struct Fts3Index **apFree /* OUT: Free this with sqlite3_free() */ +){ + struct Fts3Index *aIndex; + int nIndex = 1; + + if( zParam && zParam[0] ){ + const char *p; + nIndex++; + for(p=zParam; *p; p++){ + if( *p==',' ) nIndex++; + } + } + + aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); + *apIndex = *apFree = aIndex; + *pnIndex = nIndex; + if( !aIndex ){ + return SQLITE_NOMEM; + } + + memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); + if( zParam ){ + const char *p = zParam; + int i; + for(i=1; i MATCHINFO */ + { "prefix", 6, 0 }, /* 1 -> PREFIX */ + { "compress", 8, 0 }, /* 2 -> COMPRESS */ + { "uncompress", 10, 0 }, /* 3 -> UNCOMPRESS */ + { "order", 5, 0 } /* 4 -> ORDER */ + }; + + int iOpt; if( !zVal ){ rc = SQLITE_NOMEM; - goto fts3_init_out; - } - if( nKey==9 && 0==sqlite3_strnicmp(z, "matchinfo", 9) ){ - if( strlen(zVal)==4 && 0==sqlite3_strnicmp(zVal, "fts3", 4) ){ - bNoDocsize = 1; - }else{ - *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); - rc = SQLITE_ERROR; - } - }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ - zCompress = zVal; - zVal = 0; - }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ - zUncompress = zVal; - zVal = 0; - }else{ - *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); - rc = SQLITE_ERROR; - } - sqlite3_free(zVal); + }else{ + for(iOpt=0; iOptnOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ + break; + } + } + if( iOpt==SizeofArray(aFts4Opt) ){ + *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); + rc = SQLITE_ERROR; + }else{ + switch( iOpt ){ + case 0: /* MATCHINFO */ + if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ + *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); + rc = SQLITE_ERROR; + } + bNoDocsize = 1; + break; + + case 1: /* PREFIX */ + sqlite3_free(zPrefix); + zPrefix = zVal; + zVal = 0; + break; + + case 2: /* COMPRESS */ + sqlite3_free(zCompress); + zCompress = zVal; + zVal = 0; + break; + + case 3: /* UNCOMPRESS */ + sqlite3_free(zUncompress); + zUncompress = zVal; + zVal = 0; + break; + + case 4: /* ORDER */ + if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) + && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 3)) + ){ + *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal); + rc = SQLITE_ERROR; + } + bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); + break; + } + } + sqlite3_free(zVal); + } } /* Otherwise, the argument is a column name. */ else { nString += (int)(strlen(z) + 1); @@ -953,14 +1042,21 @@ rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr); if( rc!=SQLITE_OK ) goto fts3_init_out; } assert( pTokenizer ); + rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree); + if( rc==SQLITE_ERROR ){ + assert( zPrefix ); + *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix); + } + if( rc!=SQLITE_OK ) goto fts3_init_out; /* Allocate and populate the Fts3Table structure. */ - nByte = sizeof(Fts3Table) + /* Fts3Table */ + nByte = sizeof(Fts3Table) + /* Fts3Table */ nCol * sizeof(char *) + /* azColumn */ + nIndex * sizeof(struct Fts3Index) + /* aIndex */ nName + /* zName */ nDb + /* zDb */ nString; /* Space for azColumn strings */ p = (Fts3Table*)sqlite3_malloc(nByte); if( p==0 ){ @@ -971,20 +1067,26 @@ p->db = db; p->nColumn = nCol; p->nPendingData = 0; p->azColumn = (char **)&p[1]; p->pTokenizer = pTokenizer; - p->nNodeSize = 1000; p->nMaxPendingData = FTS3_MAX_PENDING_DATA; p->bHasDocsize = (isFts4 && bNoDocsize==0); p->bHasStat = isFts4; + p->bDescIdx = bDescIdx; TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); - fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1); + + p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; + memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); + p->nIndex = nIndex; + for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); + } /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->azColumn[nCol]; + zCsr = (char *)&p->aIndex[nIndex]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; p->zDb = zCsr; memcpy(zCsr, argv[1], nDb); @@ -1018,19 +1120,20 @@ if( isCreate ){ rc = fts3CreateTables(p); } /* Figure out the page-size for the database. This is required in order to - ** estimate the cost of loading large doclists from the database (see - ** function sqlite3Fts3SegReaderCost() for details). - */ + ** estimate the cost of loading large doclists from the database. */ fts3DatabasePageSize(&rc, p); + p->nNodeSize = p->nPgsz-35; /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: + sqlite3_free(zPrefix); + sqlite3_free(aFree); sqlite3_free(zCompress); sqlite3_free(zUncompress); sqlite3_free((void *)aCol); if( rc!=SQLITE_OK ){ if( p ){ @@ -1037,10 +1140,11 @@ fts3DisconnectMethod((sqlite3_vtab *)p); }else if( pTokenizer ){ pTokenizer->pModule->xDestroy(pTokenizer); } }else{ + assert( p->pSegments==0 ); *ppVTab = &p->base; } return rc; } @@ -1134,14 +1238,15 @@ if( pOrder->desc ){ pInfo->idxStr = "DESC"; }else{ pInfo->idxStr = "ASC"; } + pInfo->orderByConsumed = 1; } - pInfo->orderByConsumed = 1; } + assert( p->pSegments==0 ); return SQLITE_OK; } /* ** Implementation of xOpen method. @@ -1173,10 +1278,11 @@ sqlite3_finalize(pCsr->pStmt); sqlite3Fts3ExprFree(pCsr->pExpr); sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); sqlite3_free(pCsr); return SQLITE_OK; } /* @@ -1184,12 +1290,12 @@ ** of the %_content table that contains the last match. Return ** SQLITE_OK on success. */ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ if( pCsr->isRequireSeek ){ - pCsr->isRequireSeek = 0; sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); + pCsr->isRequireSeek = 0; if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ return SQLITE_OK; }else{ int rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ @@ -1366,21 +1472,21 @@ if( rc==SQLITE_OK && iHeight>1 ){ char *zBlob = 0; /* Blob read from %_segments table */ int nBlob; /* Size of zBlob in bytes */ if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ - rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob); + rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); if( rc==SQLITE_OK ){ rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); } sqlite3_free(zBlob); piLeaf = 0; zBlob = 0; } if( rc==SQLITE_OK ){ - rc = sqlite3Fts3ReadBlock(p, piLeaf ? *piLeaf : *piLeaf2, &zBlob, &nBlob); + rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); } if( rc==SQLITE_OK ){ rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); } sqlite3_free(zBlob); @@ -1752,11 +1858,23 @@ *pp = p; return 1; } /* -** Merge two position-lists as required by the NEAR operator. +** Merge two position-lists as required by the NEAR operator. The argument +** position lists correspond to the left and right phrases of an expression +** like: +** +** "phrase 1" NEAR "phrase number 2" +** +** Position list *pp1 corresponds to the left-hand side of the NEAR +** expression and *pp2 to the right. As usual, the indexes in the position +** lists are the offsets of the last token in each phrase (tokens "1" and "2" +** in the example above). +** +** The output position list - written to *pp - is a copy of *pp2 with those +** entries that are not sufficiently NEAR entries in *pp1 removed. */ static int fts3PoslistNearMerge( char **pp, /* Output buffer */ char *aTmp, /* Temporary buffer space */ int nRight, /* Maximum difference in token positions */ @@ -1765,218 +1883,31 @@ char **pp2 /* IN/OUT: Right input list */ ){ char *p1 = *pp1; char *p2 = *pp2; - if( !pp ){ - if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1; - *pp1 = p1; - *pp2 = p2; - return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1); - }else{ - char *pTmp1 = aTmp; - char *pTmp2; - char *aTmp2; - int res = 1; - - fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); - aTmp2 = pTmp2 = pTmp1; - *pp1 = p1; - *pp2 = p2; - fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); - if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ - fts3PoslistMerge(pp, &aTmp, &aTmp2); - }else if( pTmp1!=aTmp ){ - fts3PoslistCopy(pp, &aTmp); - }else if( pTmp2!=aTmp2 ){ - fts3PoslistCopy(pp, &aTmp2); - }else{ - res = 0; - } - - return res; - } -} - -/* -** Values that may be used as the first parameter to fts3DoclistMerge(). -*/ -#define MERGE_NOT 2 /* D + D -> D */ -#define MERGE_AND 3 /* D + D -> D */ -#define MERGE_OR 4 /* D + D -> D */ -#define MERGE_POS_OR 5 /* P + P -> P */ -#define MERGE_PHRASE 6 /* P + P -> D */ -#define MERGE_POS_PHRASE 7 /* P + P -> P */ -#define MERGE_NEAR 8 /* P + P -> D */ -#define MERGE_POS_NEAR 9 /* P + P -> P */ - -/* -** Merge the two doclists passed in buffer a1 (size n1 bytes) and a2 -** (size n2 bytes). The output is written to pre-allocated buffer aBuffer, -** which is guaranteed to be large enough to hold the results. The number -** of bytes written to aBuffer is stored in *pnBuffer before returning. -** -** If successful, SQLITE_OK is returned. Otherwise, if a malloc error -** occurs while allocating a temporary buffer as part of the merge operation, -** SQLITE_NOMEM is returned. -*/ -static int fts3DoclistMerge( - int mergetype, /* One of the MERGE_XXX constants */ - int nParam1, /* Used by MERGE_NEAR and MERGE_POS_NEAR */ - int nParam2, /* Used by MERGE_NEAR and MERGE_POS_NEAR */ - char *aBuffer, /* Pre-allocated output buffer */ - int *pnBuffer, /* OUT: Bytes written to aBuffer */ - char *a1, /* Buffer containing first doclist */ - int n1, /* Size of buffer a1 */ - char *a2, /* Buffer containing second doclist */ - int n2, /* Size of buffer a2 */ - int *pnDoc /* OUT: Number of docids in output */ -){ - sqlite3_int64 i1 = 0; - sqlite3_int64 i2 = 0; - sqlite3_int64 iPrev = 0; - - char *p = aBuffer; - char *p1 = a1; - char *p2 = a2; - char *pEnd1 = &a1[n1]; - char *pEnd2 = &a2[n2]; - int nDoc = 0; - - assert( mergetype==MERGE_OR || mergetype==MERGE_POS_OR - || mergetype==MERGE_AND || mergetype==MERGE_NOT - || mergetype==MERGE_PHRASE || mergetype==MERGE_POS_PHRASE - || mergetype==MERGE_NEAR || mergetype==MERGE_POS_NEAR - ); - - if( !aBuffer ){ - *pnBuffer = 0; - return SQLITE_NOMEM; - } - - /* Read the first docid from each doclist */ - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - - switch( mergetype ){ - case MERGE_OR: - case MERGE_POS_OR: - while( p1 || p2 ){ - if( p2 && p1 && i1==i2 ){ - fts3PutDeltaVarint(&p, &iPrev, i1); - if( mergetype==MERGE_POS_OR ) fts3PoslistMerge(&p, &p1, &p2); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - }else if( !p2 || (p1 && i1=pEnd ){ + *pp = 0; + }else{ + sqlite3_int64 iVal; + *pp += sqlite3Fts3GetVarint(*pp, &iVal); + if( bDescIdx ){ + *pVal -= iVal; + }else{ + *pVal += iVal; + } + } +} + +static void fts3PutDeltaVarint3( + char **pp, /* IN/OUT: Output pointer */ + int bDescIdx, /* True for descending docids */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + int *pbFirst, /* IN/OUT: True after first int written */ + sqlite3_int64 iVal /* Write this value to the list */ +){ + sqlite3_int64 iWrite; + if( bDescIdx==0 || *pbFirst==0 ){ + iWrite = iVal - *piPrev; + }else{ + iWrite = *piPrev - iVal; + } + assert( *pbFirst || *piPrev==0 ); + assert( *pbFirst==0 || iWrite>0 ); + *pp += sqlite3Fts3PutVarint(*pp, iWrite); + *piPrev = iVal; + *pbFirst = 1; +} + +#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2)) + +static int fts3DoclistOrMerge( + int bDescIdx, /* True if arguments are desc */ + char *a1, int n1, /* First doclist */ + char *a2, int n2, /* Second doclist */ + char **paOut, int *pnOut /* OUT: Malloc'd doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *pEnd1 = &a1[n1]; + char *pEnd2 = &a2[n2]; + char *p1 = a1; + char *p2 = a2; + char *p; + char *aOut; + int bFirstOut = 0; + + *paOut = 0; + *pnOut = 0; + aOut = sqlite3_malloc(n1+n2); + if( !aOut ) return SQLITE_NOMEM; + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + while( p1 || p2 ){ + sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + + if( p2 && p1 && iDiff==0 ){ + fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PoslistMerge(&p, &p1, &p2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + }else if( !p2 || (p1 && iDiff<0) ){ + fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PoslistCopy(&p, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + }else{ + fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2); + fts3PoslistCopy(&p, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + } + } + + *paOut = aOut; + *pnOut = (p-aOut); + return SQLITE_OK; +} + +static void fts3DoclistPhraseMerge( + int bDescIdx, /* True if arguments are desc */ + int nDist, /* Distance from left to right (1=adjacent) */ + char *aLeft, int nLeft, /* Left doclist */ + char *aRight, int *pnRight /* IN/OUT: Right/output doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *pEnd1 = &aLeft[nLeft]; + char *pEnd2 = &aRight[*pnRight]; + char *p1 = aLeft; + char *p2 = aRight; + char *p; + int bFirstOut = 0; + char *aOut = aRight; + + assert( nDist>0 ); + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + + while( p1 && p2 ){ + sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + if( iDiff==0 ){ + char *pSave = p; + sqlite3_int64 iPrevSave = iPrev; + int bFirstOutSave = bFirstOut; + + fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ + p = pSave; + iPrev = iPrevSave; + bFirstOut = bFirstOutSave; + } + fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + }else if( iDiff<0 ){ + fts3PoslistCopy(0, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + }else{ + fts3PoslistCopy(0, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + } + } + + *pnRight = p - aOut; +} + /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. @@ -1995,12 +2068,11 @@ ** ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is ** the responsibility of the caller to free any doclists left in the ** TermSelect.aaOutput[] array. */ -static int fts3TermSelectMerge(TermSelect *pTS){ - int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); +static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){ char *aOut = 0; int nOut = 0; int i; /* Loop through the doclists in the aaOutput[] array. Merge them all @@ -2011,19 +2083,21 @@ if( !aOut ){ aOut = pTS->aaOutput[i]; nOut = pTS->anOutput[i]; pTS->aaOutput[i] = 0; }else{ - int nNew = nOut + pTS->anOutput[i]; - char *aNew = sqlite3_malloc(nNew); - if( !aNew ){ + int nNew; + char *aNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, + pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ sqlite3_free(aOut); - return SQLITE_NOMEM; + return rc; } - fts3DoclistMerge(mergetype, 0, 0, - aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, 0 - ); + sqlite3_free(pTS->aaOutput[i]); sqlite3_free(aOut); pTS->aaOutput[i] = 0; aOut = aNew; nOut = nNew; @@ -2055,217 +2129,241 @@ UNUSED_PARAMETER(zTerm); UNUSED_PARAMETER(nTerm); if( pTS->aaOutput[0]==0 ){ /* If this is the first term selected, copy the doclist to the output - ** buffer using memcpy(). TODO: Add a way to transfer control of the - ** aDoclist buffer from the caller so as to avoid the memcpy(). - */ + ** buffer using memcpy(). */ pTS->aaOutput[0] = sqlite3_malloc(nDoclist); pTS->anOutput[0] = nDoclist; if( pTS->aaOutput[0] ){ memcpy(pTS->aaOutput[0], aDoclist, nDoclist); }else{ return SQLITE_NOMEM; } }else{ - int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); char *aMerge = aDoclist; int nMerge = nDoclist; int iOut; for(iOut=0; iOutaaOutput); iOut++){ - char *aNew; - int nNew; if( pTS->aaOutput[iOut]==0 ){ assert( iOut>0 ); pTS->aaOutput[iOut] = aMerge; pTS->anOutput[iOut] = nMerge; break; - } - - nNew = nMerge + pTS->anOutput[iOut]; - aNew = sqlite3_malloc(nNew); - if( !aNew ){ - if( aMerge!=aDoclist ){ - sqlite3_free(aMerge); - } - return SQLITE_NOMEM; - } - fts3DoclistMerge(mergetype, 0, 0, aNew, &nNew, - pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge, 0 - ); - - if( iOut>0 ) sqlite3_free(aMerge); - sqlite3_free(pTS->aaOutput[iOut]); - pTS->aaOutput[iOut] = 0; - - aMerge = aNew; - nMerge = nNew; - if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ - pTS->aaOutput[iOut] = aMerge; - pTS->anOutput[iOut] = nMerge; - } - } - } - return SQLITE_OK; -} - -static int fts3DeferredTermSelect( - Fts3DeferredToken *pToken, /* Phrase token */ - int isTermPos, /* True to include positions */ - int *pnOut, /* OUT: Size of list */ - char **ppOut /* OUT: Body of list */ -){ - char *aSource; - int nSource; - - aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource); - if( !aSource ){ - *pnOut = 0; - *ppOut = 0; - }else if( isTermPos ){ - *ppOut = sqlite3_malloc(nSource); - if( !*ppOut ) return SQLITE_NOMEM; - memcpy(*ppOut, aSource, nSource); - *pnOut = nSource; - }else{ - sqlite3_int64 docid; - *pnOut = sqlite3Fts3GetVarint(aSource, &docid); - *ppOut = sqlite3_malloc(*pnOut); - if( !*ppOut ) return SQLITE_NOMEM; - sqlite3Fts3PutVarint(*ppOut, docid); - } - - return SQLITE_OK; -} - -int sqlite3Fts3SegReaderCursor( + }else{ + char *aNew; + int nNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, + pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + return rc; + } + + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + sqlite3_free(pTS->aaOutput[iOut]); + pTS->aaOutput[iOut] = 0; + + aMerge = aNew; + nMerge = nNew; + if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + } + } + } + } + return SQLITE_OK; +} + +/* +** Append SegReader object pNew to the end of the pCsr->apSegment[] array. +*/ +static int fts3SegReaderCursorAppend( + Fts3MultiSegReader *pCsr, + Fts3SegReader *pNew +){ + if( (pCsr->nSegment%16)==0 ){ + Fts3SegReader **apNew; + int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); + if( !apNew ){ + sqlite3Fts3SegReaderFree(pNew); + return SQLITE_NOMEM; + } + pCsr->apSegment = apNew; + } + pCsr->apSegment[pCsr->nSegment++] = pNew; + return SQLITE_OK; +} + +static int fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ - Fts3SegReaderCursor *pCsr /* Cursor object to populate */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ int rc = SQLITE_OK; int rc2; - int iAge = 0; sqlite3_stmt *pStmt = 0; - Fts3SegReader *pPending = 0; - - assert( iLevel==FTS3_SEGCURSOR_ALL - || iLevel==FTS3_SEGCURSOR_PENDING - || iLevel>=0 - ); - assert( FTS3_SEGCURSOR_PENDING<0 ); - assert( FTS3_SEGCURSOR_ALL<0 ); - assert( iLevel==FTS3_SEGCURSOR_ALL || (zTerm==0 && isPrefix==1) ); - assert( isPrefix==0 || isScan==0 ); - - - memset(pCsr, 0, sizeof(Fts3SegReaderCursor)); - - /* If iLevel is less than 0, include a seg-reader for the pending-terms. */ - assert( isScan==0 || fts3HashCount(&p->pendingTerms)==0 ); - if( iLevel<0 && isScan==0 ){ - rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pPending); - if( rc==SQLITE_OK && pPending ){ - int nByte = (sizeof(Fts3SegReader *) * 16); - pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); - if( pCsr->apSegment==0 ){ - rc = SQLITE_NOMEM; - }else{ - pCsr->apSegment[0] = pPending; - pCsr->nSegment = 1; - pPending = 0; - } + + /* If iLevel is less than 0 and this is not a scan, include a seg-reader + ** for the pending-terms. If this is a scan, then this call must be being + ** made by an fts4aux module, not an FTS table. In this case calling + ** Fts3SegReaderPending might segfault, as the data structures used by + ** fts4aux are not completely populated. So it's easiest to filter these + ** calls out here. */ + if( iLevel<0 && p->aIndex ){ + Fts3SegReader *pSeg = 0; + rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg); + if( rc==SQLITE_OK && pSeg ){ + rc = fts3SegReaderCursorAppend(pCsr, pSeg); } } if( iLevel!=FTS3_SEGCURSOR_PENDING ){ if( rc==SQLITE_OK ){ - rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt); + rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt); } + while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ + Fts3SegReader *pSeg = 0; /* Read the values returned by the SELECT into local variables. */ sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); int nRoot = sqlite3_column_bytes(pStmt, 4); char const *zRoot = sqlite3_column_blob(pStmt, 4); - /* If nSegment is a multiple of 16 the array needs to be extended. */ - if( (pCsr->nSegment%16)==0 ){ - Fts3SegReader **apNew; - int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); - apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); - if( !apNew ){ - rc = SQLITE_NOMEM; - goto finished; - } - pCsr->apSegment = apNew; - } - /* If zTerm is not NULL, and this segment is not stored entirely on its ** root node, the range of leaves scanned can be reduced. Do this. */ if( iStartBlock && zTerm ){ sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); if( rc!=SQLITE_OK ) goto finished; if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; } - rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock, - iEndBlock, zRoot, nRoot, &pCsr->apSegment[pCsr->nSegment] + rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, + iStartBlock, iLeavesEndBlock, iEndBlock, zRoot, nRoot, &pSeg ); if( rc!=SQLITE_OK ) goto finished; - pCsr->nSegment++; - iAge++; + rc = fts3SegReaderCursorAppend(pCsr, pSeg); } } finished: rc2 = sqlite3_reset(pStmt); if( rc==SQLITE_DONE ) rc = rc2; - sqlite3Fts3SegReaderFree(pPending); return rc; } +/* +** Set up a cursor object for iterating through a full-text index or a +** single level therein. +*/ +int sqlite3Fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + assert( iIndex>=0 && iIndexnIndex ); + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevelaIndex==0 ); + + memset(pCsr, 0, sizeof(Fts3MultiSegReader)); + + return fts3SegReaderCursor( + p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr + ); +} + +static int fts3SegReaderCursorAddZero( + Fts3Table *p, + const char *zTerm, + int nTerm, + Fts3MultiSegReader *pCsr +){ + return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr); +} + + +int sqlite3Fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ - Fts3SegReaderCursor **ppSegcsr /* OUT: Allocated seg-reader cursor */ + Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ - Fts3SegReaderCursor *pSegcsr; /* Object to allocate and return */ + Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ - pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor)); + pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int i; - int nCost = 0; - rc = sqlite3Fts3SegReaderCursor( - p, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr); - - for(i=0; rc==SQLITE_OK && inSegment; i++){ - rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); - } - pSegcsr->nCost = nCost; + int bFound = 0; /* True once an index has been found */ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + + if( isPrefix ){ + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor( + p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr); + pSegcsr->bLookup = 1; + } + } + + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm+1 ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor( + p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + ); + if( rc==SQLITE_OK ){ + rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr); + } + } + } + } + + if( bFound==0 ){ + rc = sqlite3Fts3SegReaderCursor( + p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr + ); + pSegcsr->bLookup = !isPrefix; + } } *ppSegcsr = pSegcsr; return rc; } -static void fts3SegReaderCursorFree(Fts3SegReaderCursor *pSegcsr){ +static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ sqlite3Fts3SegReaderFinish(pSegcsr); sqlite3_free(pSegcsr); } /* @@ -2286,11 +2384,11 @@ int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ - Fts3SegReaderCursor *pSegcsr; /* Seg-reader cursor for this term */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ TermSelect tsc; /* Context object for fts3TermSelectCb() */ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); @@ -2312,11 +2410,11 @@ pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist ); } if( rc==SQLITE_OK ){ - rc = fts3TermSelectMerge(&tsc); + rc = fts3TermSelectMerge(p, &tsc); } if( rc==SQLITE_OK ){ *ppOut = tsc.aaOutput[0]; *pnOut = tsc.anOutput[0]; }else{ @@ -2362,664 +2460,10 @@ } return nDoc; } -/* -** Call sqlite3Fts3DeferToken() for each token in the expression pExpr. -*/ -static int fts3DeferExpression(Fts3Cursor *pCsr, Fts3Expr *pExpr){ - int rc = SQLITE_OK; - if( pExpr ){ - rc = fts3DeferExpression(pCsr, pExpr->pLeft); - if( rc==SQLITE_OK ){ - rc = fts3DeferExpression(pCsr, pExpr->pRight); - } - if( pExpr->eType==FTSQUERY_PHRASE ){ - int iCol = pExpr->pPhrase->iColumn; - int i; - for(i=0; rc==SQLITE_OK && ipPhrase->nToken; i++){ - Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; - if( pToken->pDeferred==0 ){ - rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); - } - } - } - } - return rc; -} - -/* -** This function removes the position information from a doclist. When -** called, buffer aList (size *pnList bytes) contains a doclist that includes -** position information. This function removes the position information so -** that aList contains only docids, and adjusts *pnList to reflect the new -** (possibly reduced) size of the doclist. -*/ -static void fts3DoclistStripPositions( - char *aList, /* IN/OUT: Buffer containing doclist */ - int *pnList /* IN/OUT: Size of doclist in bytes */ -){ - if( aList ){ - char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */ - char *p = aList; /* Input cursor */ - char *pOut = aList; /* Output cursor */ - - while( piColumn; - int isTermPos = (pPhrase->nToken>1 || isReqPos); - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - int isFirst = 1; - - int iPrevTok = 0; - int nDoc = 0; - - /* If this is an xFilter() evaluation, create a segment-reader for each - ** phrase token. Or, if this is an xNext() or snippet/offsets/matchinfo - ** evaluation, only create segment-readers if there are no Fts3DeferredToken - ** objects attached to the phrase-tokens. - */ - for(ii=0; iinToken; ii++){ - Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pSegcsr==0 ){ - if( (pCsr->eEvalmode==FTS3_EVAL_FILTER) - || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0) - || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext) - ){ - rc = fts3TermSegReaderCursor( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr - ); - if( rc!=SQLITE_OK ) return rc; - } - } - } - - for(ii=0; iinToken; ii++){ - Fts3PhraseToken *pTok; /* Token to find doclist for */ - int iTok = 0; /* The token being queried this iteration */ - char *pList = 0; /* Pointer to token doclist */ - int nList = 0; /* Size of buffer at pList */ - - /* Select a token to process. If this is an xFilter() call, then tokens - ** are processed in order from least to most costly. Otherwise, tokens - ** are processed in the order in which they occur in the phrase. - */ - if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){ - assert( isReqPos ); - iTok = ii; - pTok = &pPhrase->aToken[iTok]; - if( pTok->bFulltext==0 ) continue; - }else if( pCsr->eEvalmode==FTS3_EVAL_NEXT || isReqPos ){ - iTok = ii; - pTok = &pPhrase->aToken[iTok]; - }else{ - int nMinCost = 0x7FFFFFFF; - int jj; - - /* Find the remaining token with the lowest cost. */ - for(jj=0; jjnToken; jj++){ - Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[jj].pSegcsr; - if( pSegcsr && pSegcsr->nCostnCost; - } - } - pTok = &pPhrase->aToken[iTok]; - - /* This branch is taken if it is determined that loading the doclist - ** for the next token would require more IO than loading all documents - ** currently identified by doclist pOut/nOut. No further doclists will - ** be loaded from the full-text index for this phrase. - */ - if( nMinCost>nDoc && ii>0 ){ - rc = fts3DeferExpression(pCsr, pCsr->pExpr); - break; - } - } - - if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ - rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); - }else{ - if( pTok->pSegcsr ){ - rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); - } - pTok->bFulltext = 1; - } - assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pSegcsr==0 ); - if( rc!=SQLITE_OK ) break; - - if( isFirst ){ - pOut = pList; - nOut = nList; - if( pCsr->eEvalmode==FTS3_EVAL_FILTER && pPhrase->nToken>1 ){ - nDoc = fts3DoclistCountDocids(1, pOut, nOut); - } - isFirst = 0; - iPrevTok = iTok; - }else{ - /* Merge the new term list and the current output. */ - char *aLeft, *aRight; - int nLeft, nRight; - int nDist; - int mt; - - /* If this is the final token of the phrase, and positions were not - ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE. - ** This drops the position information from the output list. - */ - mt = MERGE_POS_PHRASE; - if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE; - - assert( iPrevTok!=iTok ); - if( iPrevToknToken ){ - assert( pCsr->eEvalmode==FTS3_EVAL_FILTER && isReqPos==0 ); - fts3DoclistStripPositions(pOut, &nOut); - } - *paOut = pOut; - *pnOut = nOut; - }else{ - sqlite3_free(pOut); - } - return rc; -} - -/* -** This function merges two doclists according to the requirements of a -** NEAR operator. -** -** Both input doclists must include position information. The output doclist -** includes position information if the first argument to this function -** is MERGE_POS_NEAR, or does not if it is MERGE_NEAR. -*/ -static int fts3NearMerge( - int mergetype, /* MERGE_POS_NEAR or MERGE_NEAR */ - int nNear, /* Parameter to NEAR operator */ - int nTokenLeft, /* Number of tokens in LHS phrase arg */ - char *aLeft, /* Doclist for LHS (incl. positions) */ - int nLeft, /* Size of LHS doclist in bytes */ - int nTokenRight, /* As nTokenLeft */ - char *aRight, /* As aLeft */ - int nRight, /* As nRight */ - char **paOut, /* OUT: Results of merge (malloced) */ - int *pnOut /* OUT: Sized of output buffer */ -){ - char *aOut; /* Buffer to write output doclist to */ - int rc; /* Return code */ - - assert( mergetype==MERGE_POS_NEAR || MERGE_NEAR ); - - aOut = sqlite3_malloc(nLeft+nRight+1); - if( aOut==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft, - aOut, pnOut, aLeft, nLeft, aRight, nRight, 0 - ); - if( rc!=SQLITE_OK ){ - sqlite3_free(aOut); - aOut = 0; - } - } - - *paOut = aOut; - return rc; -} - -/* -** This function is used as part of the processing for the snippet() and -** offsets() functions. -** -** Both pLeft and pRight are expression nodes of type FTSQUERY_PHRASE. Both -** have their respective doclists (including position information) loaded -** in Fts3Expr.aDoclist/nDoclist. This function removes all entries from -** each doclist that are not within nNear tokens of a corresponding entry -** in the other doclist. -*/ -int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ - int rc; /* Return code */ - - assert( pLeft->eType==FTSQUERY_PHRASE ); - assert( pRight->eType==FTSQUERY_PHRASE ); - assert( pLeft->isLoaded && pRight->isLoaded ); - - if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){ - sqlite3_free(pLeft->aDoclist); - sqlite3_free(pRight->aDoclist); - pRight->aDoclist = 0; - pLeft->aDoclist = 0; - rc = SQLITE_OK; - }else{ - char *aOut; /* Buffer in which to assemble new doclist */ - int nOut; /* Size of buffer aOut in bytes */ - - rc = fts3NearMerge(MERGE_POS_NEAR, nNear, - pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, - pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, - &aOut, &nOut - ); - if( rc!=SQLITE_OK ) return rc; - sqlite3_free(pRight->aDoclist); - pRight->aDoclist = aOut; - pRight->nDoclist = nOut; - - rc = fts3NearMerge(MERGE_POS_NEAR, nNear, - pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, - pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, - &aOut, &nOut - ); - sqlite3_free(pLeft->aDoclist); - pLeft->aDoclist = aOut; - pLeft->nDoclist = nOut; - } - return rc; -} - - -/* -** Allocate an Fts3SegReaderArray for each token in the expression pExpr. -** The allocated objects are stored in the Fts3PhraseToken.pArray member -** variables of each token structure. -*/ -static int fts3ExprAllocateSegReaders( - Fts3Cursor *pCsr, /* FTS3 table */ - Fts3Expr *pExpr, /* Expression to create seg-readers for */ - int *pnExpr /* OUT: Number of AND'd expressions */ -){ - int rc = SQLITE_OK; /* Return code */ - - assert( pCsr->eEvalmode==FTS3_EVAL_FILTER ); - if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ - (*pnExpr)++; - pnExpr = 0; - } - - if( pExpr->eType==FTSQUERY_PHRASE ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int ii; - - for(ii=0; rc==SQLITE_OK && iinToken; ii++){ - Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pSegcsr==0 ){ - rc = fts3TermSegReaderCursor( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr - ); - } - } - }else{ - rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); - if( rc==SQLITE_OK ){ - rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr); - } - } - return rc; -} - -/* -** Free the Fts3SegReaderArray objects associated with each token in the -** expression pExpr. In other words, this function frees the resources -** allocated by fts3ExprAllocateSegReaders(). -*/ -static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ - if( pExpr ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - if( pPhrase ){ - int kk; - for(kk=0; kknToken; kk++){ - fts3SegReaderCursorFree(pPhrase->aToken[kk].pSegcsr); - pPhrase->aToken[kk].pSegcsr = 0; - } - } - fts3ExprFreeSegReaders(pExpr->pLeft); - fts3ExprFreeSegReaders(pExpr->pRight); - } -} - -/* -** Return the sum of the costs of all tokens in the expression pExpr. This -** function must be called after Fts3SegReaderArrays have been allocated -** for all tokens using fts3ExprAllocateSegReaders(). -*/ -static int fts3ExprCost(Fts3Expr *pExpr){ - int nCost; /* Return value */ - if( pExpr->eType==FTSQUERY_PHRASE ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int ii; - nCost = 0; - for(ii=0; iinToken; ii++){ - Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[ii].pSegcsr; - if( pSegcsr ) nCost += pSegcsr->nCost; - } - }else{ - nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); - } - return nCost; -} - -/* -** The following is a helper function (and type) for fts3EvalExpr(). It -** must be called after Fts3SegReaders have been allocated for every token -** in the expression. See the context it is called from in fts3EvalExpr() -** for further explanation. -*/ -typedef struct ExprAndCost ExprAndCost; -struct ExprAndCost { - Fts3Expr *pExpr; - int nCost; -}; -static void fts3ExprAssignCosts( - Fts3Expr *pExpr, /* Expression to create seg-readers for */ - ExprAndCost **ppExprCost /* OUT: Write to *ppExprCost */ -){ - if( pExpr->eType==FTSQUERY_AND ){ - fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); - fts3ExprAssignCosts(pExpr->pRight, ppExprCost); - }else{ - (*ppExprCost)->pExpr = pExpr; - (*ppExprCost)->nCost = fts3ExprCost(pExpr); - (*ppExprCost)++; - } -} - -/* -** Evaluate the full-text expression pExpr against FTS3 table pTab. Store -** the resulting doclist in *paOut and *pnOut. This routine mallocs for -** the space needed to store the output. The caller is responsible for -** freeing the space when it has finished. -** -** This function is called in two distinct contexts: -** -** * From within the virtual table xFilter() method. In this case, the -** output doclist contains entries for all rows in the table, based on -** data read from the full-text index. -** -** In this case, if the query expression contains one or more tokens that -** are very common, then the returned doclist may contain a superset of -** the documents that actually match the expression. -** -** * From within the virtual table xNext() method. This call is only made -** if the call from within xFilter() found that there were very common -** tokens in the query expression and did return a superset of the -** matching documents. In this case the returned doclist contains only -** entries that correspond to the current row of the table. Instead of -** reading the data for each token from the full-text index, the data is -** already available in-memory in the Fts3PhraseToken.pDeferred structures. -** See fts3EvalDeferred() for how it gets there. -** -** In the first case above, Fts3Cursor.doDeferred==0. In the second (if it is -** required) Fts3Cursor.doDeferred==1. -** -** If the SQLite invokes the snippet(), offsets() or matchinfo() function -** as part of a SELECT on an FTS3 table, this function is called on each -** individual phrase expression in the query. If there were very common tokens -** found in the xFilter() call, then this function is called once for phrase -** for each row visited, and the returned doclist contains entries for the -** current row only. Otherwise, if there were no very common tokens, then this -** function is called once only for each phrase in the query and the returned -** doclist contains entries for all rows of the table. -** -** Fts3Cursor.doDeferred==1 when this function is called on phrases as a -** result of a snippet(), offsets() or matchinfo() invocation. -*/ -static int fts3EvalExpr( - Fts3Cursor *p, /* Virtual table cursor handle */ - Fts3Expr *pExpr, /* Parsed fts3 expression */ - char **paOut, /* OUT: Pointer to malloc'd result buffer */ - int *pnOut, /* OUT: Size of buffer at *paOut */ - int isReqPos /* Require positions in output buffer */ -){ - int rc = SQLITE_OK; /* Return code */ - - /* Zero the output parameters. */ - *paOut = 0; - *pnOut = 0; - - if( pExpr ){ - assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT - || pExpr->eType==FTSQUERY_PHRASE - ); - assert( pExpr->eType==FTSQUERY_PHRASE || isReqPos==0 ); - - if( pExpr->eType==FTSQUERY_PHRASE ){ - rc = fts3PhraseSelect(p, pExpr->pPhrase, - isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), - paOut, pnOut - ); - fts3ExprFreeSegReaders(pExpr); - }else if( p->eEvalmode==FTS3_EVAL_FILTER && pExpr->eType==FTSQUERY_AND ){ - ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */ - int nExpr = 0; /* Size of aExpr[] */ - char *aRet = 0; /* Doclist to return to caller */ - int nRet = 0; /* Length of aRet[] in bytes */ - int nDoc = 0x7FFFFFFF; - - assert( !isReqPos ); - - rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr); - if( rc==SQLITE_OK ){ - assert( nExpr>1 ); - aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr); - if( !aExpr ) rc = SQLITE_NOMEM; - } - if( rc==SQLITE_OK ){ - int ii; /* Used to iterate through expressions */ - - fts3ExprAssignCosts(pExpr, &aExpr); - aExpr -= nExpr; - for(ii=0; iipExpr && (pBest==0 || pCand->nCostnCost) ){ - pBest = pCand; - } - } - - if( pBest->nCost>nDoc ){ - rc = fts3DeferExpression(p, p->pExpr); - break; - }else{ - rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0); - if( rc!=SQLITE_OK ) break; - pBest->pExpr = 0; - if( ii==0 ){ - aRet = aNew; - nRet = nNew; - nDoc = fts3DoclistCountDocids(0, aRet, nRet); - }else{ - fts3DoclistMerge( - MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew, &nDoc - ); - sqlite3_free(aNew); - } - } - } - } - - if( rc==SQLITE_OK ){ - *paOut = aRet; - *pnOut = nRet; - }else{ - assert( *paOut==0 ); - sqlite3_free(aRet); - } - sqlite3_free(aExpr); - fts3ExprFreeSegReaders(pExpr); - - }else{ - char *aLeft; - char *aRight; - int nLeft; - int nRight; - - assert( pExpr->eType==FTSQUERY_NEAR - || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_NOT - || (pExpr->eType==FTSQUERY_AND && p->eEvalmode==FTS3_EVAL_NEXT) - ); - - if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) - && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) - ){ - switch( pExpr->eType ){ - case FTSQUERY_NEAR: { - Fts3Expr *pLeft; - Fts3Expr *pRight; - int mergetype = MERGE_NEAR; - if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ - mergetype = MERGE_POS_NEAR; - } - pLeft = pExpr->pLeft; - while( pLeft->eType==FTSQUERY_NEAR ){ - pLeft=pLeft->pRight; - } - pRight = pExpr->pRight; - assert( pRight->eType==FTSQUERY_PHRASE ); - assert( pLeft->eType==FTSQUERY_PHRASE ); - - rc = fts3NearMerge(mergetype, pExpr->nNear, - pLeft->pPhrase->nToken, aLeft, nLeft, - pRight->pPhrase->nToken, aRight, nRight, - paOut, pnOut - ); - sqlite3_free(aLeft); - break; - } - - case FTSQUERY_OR: { - /* Allocate a buffer for the output. The maximum size is the - ** sum of the sizes of the two input buffers. The +1 term is - ** so that a buffer of zero bytes is never allocated - this can - ** cause fts3DoclistMerge() to incorrectly return SQLITE_NOMEM. - */ - char *aBuffer = sqlite3_malloc(nRight+nLeft+1); - rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut, - aLeft, nLeft, aRight, nRight, 0 - ); - *paOut = aBuffer; - sqlite3_free(aLeft); - break; - } - - default: { - assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND ); - fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut, - aLeft, nLeft, aRight, nRight, 0 - ); - *paOut = aLeft; - break; - } - } - } - sqlite3_free(aRight); - } - } - - assert( rc==SQLITE_OK || *paOut==0 ); - return rc; -} - -/* -** This function is called from within xNext() for each row visited by -** an FTS3 query. If evaluating the FTS3 query expression within xFilter() -** was able to determine the exact set of matching rows, this function sets -** *pbRes to true and returns SQLITE_IO immediately. -** -** Otherwise, if evaluating the query expression within xFilter() returned a -** superset of the matching documents instead of an exact set (this happens -** when the query includes very common tokens and it is deemed too expensive to -** load their doclists from disk), this function tests if the current row -** really does match the FTS3 query. -** -** If an error occurs, an SQLite error code is returned. Otherwise, SQLITE_OK -** is returned and *pbRes is set to true if the current row matches the -** FTS3 query (and should be included in the results returned to SQLite), or -** false otherwise. -*/ -static int fts3EvalDeferred( - Fts3Cursor *pCsr, /* FTS3 cursor pointing at row to test */ - int *pbRes /* OUT: Set to true if row is a match */ -){ - int rc = SQLITE_OK; - if( pCsr->pDeferred==0 ){ - *pbRes = 1; - }else{ - rc = fts3CursorSeek(0, pCsr); - if( rc==SQLITE_OK ){ - sqlite3Fts3FreeDeferredDoclists(pCsr); - rc = sqlite3Fts3CacheDeferredDoclists(pCsr); - } - if( rc==SQLITE_OK ){ - char *a = 0; - int n = 0; - rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); - assert( n>=0 ); - *pbRes = (n>0); - sqlite3_free(a); - } - } - return rc; -} - /* ** Advance the cursor to the next row in the %_content table that ** matches the search criteria. For a MATCH search, this will be ** the next row that matches. For a full-table scan, this will be ** simply the next row in the %_content table. For a docid lookup, @@ -3028,43 +2472,24 @@ ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned ** even if we reach end-of-file. The fts3EofMethod() will be called ** subsequently to determine whether or not an EOF was hit. */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ - int res; - int rc = SQLITE_OK; /* Return code */ + int rc; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - - pCsr->eEvalmode = FTS3_EVAL_NEXT; - do { - if( pCsr->aDoclist==0 ){ - if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ - pCsr->isEof = 1; - rc = sqlite3_reset(pCsr->pStmt); - break; - } + if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + }else{ pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); - }else{ - if( pCsr->desc==0 ){ - if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ - pCsr->isEof = 1; - break; - } - fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); - }else{ - fts3GetReverseDeltaVarint(&pCsr->pNextId,pCsr->aDoclist,&pCsr->iPrevId); - if( pCsr->pNextId<=pCsr->aDoclist ){ - pCsr->isEof = 1; - break; - } - } - sqlite3_reset(pCsr->pStmt); - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - } - }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); - + rc = SQLITE_OK; + } + }else{ + rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor); + } + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } /* ** This is the xFilter interface for the virtual table. See @@ -3087,15 +2512,11 @@ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ - const char *azSql[] = { - "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ - "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s", /* full-scan */ - }; - int rc; /* Return code */ + int rc; char *zSql; /* SQL statement used to access %_content */ Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; UNUSED_PARAMETER(idxStr); @@ -3110,10 +2531,17 @@ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); + if( idxStr ){ + pCsr->bDesc = (idxStr[0]=='D'); + }else{ + pCsr->bDesc = p->bDescIdx; + } + pCsr->eSearch = (i16)idxNum; + if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ int iCol = idxNum-FTS3_FULLTEXT_SEARCH; const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ @@ -3123,20 +2551,21 @@ rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr ); if( rc!=SQLITE_OK ){ if( rc==SQLITE_ERROR ){ - p->base.zErrMsg = sqlite3_mprintf("malformed MATCH expression: [%s]", - zQuery); + static const char *zErr = "malformed MATCH expression: [%s]"; + p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery); } return rc; } rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; - rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1); + sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; } @@ -3144,41 +2573,28 @@ /* Compile a SELECT statement for this cursor. For a full-table-scan, the ** statement loops through all rows of the %_content table. For a ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH]; - zSql = sqlite3_mprintf( - zSql, p->zReadExprlist, p->zDb, p->zName, (idxStr ? idxStr : "ASC") - ); - if( !zSql ){ - rc = SQLITE_NOMEM; + if( idxNum==FTS3_FULLSCAN_SEARCH ){ + const char *zSort = (pCsr->bDesc ? "DESC" : "ASC"); + const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s"; + zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName, zSort); }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - } - if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - } - pCsr->eSearch = (i16)idxNum; - - assert( pCsr->desc==0 ); + const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?"; + zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName); + } + if( !zSql ) return SQLITE_NOMEM; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); if( rc!=SQLITE_OK ) return rc; - if( rc==SQLITE_OK && pCsr->nDoclist>0 && idxStr && idxStr[0]=='D' ){ - sqlite3_int64 iDocid = 0; - char *csr = pCsr->aDoclist; - while( csr<&pCsr->aDoclist[pCsr->nDoclist] ){ - fts3GetDeltaVarint(&csr, &iDocid); - } - pCsr->pNextId = csr; - pCsr->iPrevId = iDocid; - pCsr->desc = 1; - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - pCsr->eEvalmode = FTS3_EVAL_NEXT; - return SQLITE_OK; - } + + if( idxNum==FTS3_DOCID_SEARCH ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + if( rc!=SQLITE_OK ) return rc; + } + return fts3NextMethod(pCursor); } /* ** This is the xEof method of the virtual table. SQLite calls this @@ -3194,20 +2610,11 @@ ** exposes %_content.docid as the rowid for the virtual table. The ** rowid should be written to *pRowid. */ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; - if( pCsr->aDoclist ){ - *pRowid = pCsr->iPrevId; - }else{ - /* This branch runs if the query is implemented using a full-table scan - ** (not using the full-text index). In this case grab the rowid from the - ** SELECT statement. - */ - assert( pCsr->isRequireSeek==0 ); - *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); - } + *pRowid = pCsr->iPrevId; return SQLITE_OK; } /* ** This is the xColumn method, called by SQLite to request a value from @@ -3216,11 +2623,11 @@ static int fts3ColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ - int rc; /* Return Code */ + int rc = SQLITE_OK; /* Return Code */ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; Fts3Table *p = (Fts3Table *)pCursor->pVtab; /* The column value supplied by SQLite must be in range. */ assert( iCol>=0 && iCol<=p->nColumn+1 ); @@ -3227,25 +2634,24 @@ if( iCol==p->nColumn+1 ){ /* This call is a request for the "docid" column. Since "docid" is an ** alias for "rowid", use the xRowid() method to obtain the value. */ - sqlite3_int64 iRowid; - rc = fts3RowidMethod(pCursor, &iRowid); - sqlite3_result_int64(pContext, iRowid); + sqlite3_result_int64(pContext, pCsr->iPrevId); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. ** Return a blob which is a pointer to the cursor. */ sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); - rc = SQLITE_OK; }else{ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1)); } } + + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } /* ** This function is the implementation of the xUpdate callback used by @@ -3275,10 +2681,11 @@ ** Implementation of xBegin() method. This is a no-op. */ static int fts3BeginMethod(sqlite3_vtab *pVtab){ UNUSED_PARAMETER(pVtab); TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + assert( p->pSegments==0 ); assert( p->nPendingData==0 ); assert( p->inTransaction!=1 ); TESTONLY( p->inTransaction = 1 ); TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; @@ -3292,10 +2699,11 @@ static int fts3CommitMethod(sqlite3_vtab *pVtab){ UNUSED_PARAMETER(pVtab); TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); assert( p->nPendingData==0 ); assert( p->inTransaction!=0 ); + assert( p->pSegments==0 ); TESTONLY( p->inTransaction = 0 ); TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } @@ -3309,132 +2717,30 @@ assert( p->inTransaction!=0 ); TESTONLY( p->inTransaction = 0 ); TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } - -/* -** Load the doclist associated with expression pExpr to pExpr->aDoclist. -** The loaded doclist contains positions as well as the document ids. -** This is used by the matchinfo(), snippet() and offsets() auxillary -** functions. -*/ -int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ - int rc; - assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); - assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); - rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); - return rc; -} - -int sqlite3Fts3ExprLoadFtDoclist( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - char **paDoclist, - int *pnDoclist -){ - int rc; - assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); - assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); - pCsr->eEvalmode = FTS3_EVAL_MATCHINFO; - rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1); - pCsr->eEvalmode = FTS3_EVAL_NEXT; - return rc; -} - /* ** When called, *ppPoslist must point to the byte immediately following the ** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function ** moves *ppPoslist so that it instead points to the first byte of the ** same position list. */ static void fts3ReversePoslist(char *pStart, char **ppPoslist){ - char *p = &(*ppPoslist)[-3]; - char c = p[1]; + char *p = &(*ppPoslist)[-2]; + char c; + + while( p>pStart && (c=*p--)==0 ); while( p>pStart && (*p & 0x80) | c ){ c = *p--; } if( p>pStart ){ p = &p[2]; } while( *p++&0x80 ); *ppPoslist = p; } - -/* -** After ExprLoadDoclist() (see above) has been called, this function is -** used to iterate/search through the position lists that make up the doclist -** stored in pExpr->aDoclist. -*/ -char *sqlite3Fts3FindPositions( - Fts3Cursor *pCursor, /* Associate FTS3 cursor */ - Fts3Expr *pExpr, /* Access this expressions doclist */ - sqlite3_int64 iDocid, /* Docid associated with requested pos-list */ - int iCol /* Column of requested pos-list */ -){ - assert( pExpr->isLoaded ); - if( pExpr->aDoclist ){ - char *pEnd = &pExpr->aDoclist[pExpr->nDoclist]; - char *pCsr; - - if( pExpr->pCurrent==0 ){ - if( pCursor->desc==0 ){ - pExpr->pCurrent = pExpr->aDoclist; - pExpr->iCurrent = 0; - fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iCurrent); - }else{ - pCsr = pExpr->aDoclist; - while( pCsriCurrent); - fts3PoslistCopy(0, &pCsr); - } - fts3ReversePoslist(pExpr->aDoclist, &pCsr); - pExpr->pCurrent = pCsr; - } - } - pCsr = pExpr->pCurrent; - assert( pCsr ); - - while( (pCursor->desc==0 && pCsrdesc && pCsr>pExpr->aDoclist) - ){ - if( pCursor->desc==0 && pExpr->iCurrentiCurrent); - } - pExpr->pCurrent = pCsr; - }else if( pCursor->desc && pExpr->iCurrent>iDocid ){ - fts3GetReverseDeltaVarint(&pCsr, pExpr->aDoclist, &pExpr->iCurrent); - fts3ReversePoslist(pExpr->aDoclist, &pCsr); - pExpr->pCurrent = pCsr; - }else{ - if( pExpr->iCurrent==iDocid ){ - int iThis = 0; - if( iCol<0 ){ - /* If iCol is negative, return a pointer to the start of the - ** position-list (instead of a pointer to the start of a list - ** of offsets associated with a specific column). - */ - return pCsr; - } - while( iThisinTransaction ); - assert( p->mxSavepoint < iSavepoint ); - TESTONLY( p->mxSavepoint = iSavepoint ); - return sqlite3Fts3PendingTermsFlush(p); + assert( ((Fts3Table *)pVtab)->inTransaction ); + assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); + TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); + return fts3SyncMethod(pVtab); } static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); UNUSED_PARAMETER(iSavepoint); UNUSED_PARAMETER(pVtab); @@ -3838,6 +3143,1279 @@ SQLITE_EXTENSION_INIT2(pApi) return sqlite3Fts3Init(db); } #endif + +/* +** Allocate an Fts3MultiSegReader for each token in the expression headed +** by pExpr. +** +** An Fts3SegReader object is a cursor that can seek or scan a range of +** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple +** Fts3SegReader objects internally to provide an interface to seek or scan +** within the union of all segments of a b-tree. Hence the name. +** +** If the allocated Fts3MultiSegReader just seeks to a single entry in a +** segment b-tree (if the term is not a prefix or it is a prefix for which +** there exists prefix b-tree of the right length) then it may be traversed +** and merged incrementally. Otherwise, it has to be merged into an in-memory +** doclist and then traversed. +*/ +static void fts3EvalAllocateReaders( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pnToken, /* OUT: Total number of tokens in phrase. */ + int *pnOr, /* OUT: Total number of OR nodes in expr. */ + int *pRc +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + *pnToken += nToken; + for(i=0; ipPhrase->aToken[i]; + int rc = sqlite3Fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + } + }else{ + *pnOr += (pExpr->eType==FTSQUERY_OR); + fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); + fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); + } + } +} + +static int fts3EvalPhraseLoad( + Fts3Cursor *pCsr, + Fts3Phrase *p +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int iToken; + int rc = SQLITE_OK; + + char *aDoclist = 0; + int nDoclist = 0; + int iPrev = -1; + + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &p->aToken[iToken]; + assert( pToken->pSegcsr || pToken->pDeferred ); + + if( pToken->pDeferred==0 ){ + int nThis = 0; + char *pThis = 0; + rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); + if( rc==SQLITE_OK ){ + if( pThis==0 ){ + sqlite3_free(aDoclist); + aDoclist = 0; + nDoclist = 0; + break; + }else if( aDoclist==0 ){ + aDoclist = pThis; + nDoclist = nThis; + }else{ + assert( iPrev>=0 ); + fts3DoclistPhraseMerge(pTab->bDescIdx, + iToken-iPrev, aDoclist, nDoclist, pThis, &nThis + ); + sqlite3_free(aDoclist); + aDoclist = pThis; + nDoclist = nThis; + } + iPrev = iToken; + } + } + } + + if( rc==SQLITE_OK ){ + p->doclist.aAll = aDoclist; + p->doclist.nAll = nDoclist; + }else{ + sqlite3_free(aDoclist); + } + return rc; +} + +static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ + int iToken; + int rc = SQLITE_OK; + + int nMaxUndeferred = -1; + char *aPoslist = 0; + int nPoslist = 0; + int iPrev = -1; + + assert( pPhrase->doclist.bFreeList==0 ); + + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + Fts3DeferredToken *pDeferred = pToken->pDeferred; + + if( pDeferred ){ + char *pList; + int nList; + rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); + if( rc!=SQLITE_OK ) return rc; + + if( pList==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + + }else if( aPoslist==0 ){ + aPoslist = pList; + nPoslist = nList; + + }else{ + assert( iPrev>=0 ); + + char *aOut = pList; + char *p1 = aPoslist; + char *p2 = aOut; + + fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); + sqlite3_free(aPoslist); + aPoslist = pList; + nPoslist = aOut - aPoslist; + if( nPoslist==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + } + } + iPrev = iToken; + }else{ + nMaxUndeferred = iToken; + } + } + + if( iPrev>=0 ){ + if( nMaxUndeferred<0 ){ + pPhrase->doclist.pList = aPoslist; + pPhrase->doclist.nList = nPoslist; + pPhrase->doclist.iDocid = pCsr->iPrevId; + pPhrase->doclist.bFreeList = 1; + }else{ + int nDistance; + char *p1; + char *p2; + char *aOut; + + if( nMaxUndeferred>iPrev ){ + p1 = aPoslist; + p2 = pPhrase->doclist.pList; + nDistance = nMaxUndeferred - iPrev; + }else{ + p1 = pPhrase->doclist.pList; + p2 = aPoslist; + nDistance = iPrev - nMaxUndeferred; + } + + aOut = (char *)sqlite3_malloc(nPoslist+8); + if( !aOut ){ + sqlite3_free(aPoslist); + return SQLITE_NOMEM; + } + + pPhrase->doclist.pList = aOut; + if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ + pPhrase->doclist.bFreeList = 1; + pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList); + }else{ + sqlite3_free(aOut); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + } + sqlite3_free(aPoslist); + } + } + + return SQLITE_OK; +} + +/* +** This function is called for each Fts3Phrase in a full-text query +** expression to initialize the mechanism for returning rows. Once this +** function has been called successfully on an Fts3Phrase, it may be +** used with fts3EvalPhraseNext() to iterate through the matching docids. +*/ +static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ + int rc; + Fts3PhraseToken *pFirst = &p->aToken[0]; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + assert( p->doclist.aAll==0 ); + if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 + && pFirst->pSegcsr && pFirst->pSegcsr->bLookup + ){ + /* Use the incremental approach. */ + int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); + rc = sqlite3Fts3MsrIncrStart( + pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); + p->bIncr = 1; + + }else{ + /* Load the full doclist for the phrase into memory. */ + rc = fts3EvalPhraseLoad(pCsr, p); + p->bIncr = 0; + } + + assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + return rc; +} + +/* +** This function is used to iterate backwards (from the end to start) +** through doclists. +*/ +void sqlite3Fts3DoclistPrev( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + int *pnList, /* IN/OUT: List length pointer */ + u8 *pbEof /* OUT: End-of-file flag */ +){ + char *p = *ppIter; + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + + if( p==0 ){ + sqlite3_int64 iDocid = 0; + char *pNext = 0; + char *pDocid = aDoclist; + char *pEnd = &aDoclist[nDoclist]; + int iMul = 1; + + while( pDociddoclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + assert( p->nToken==1 ); + assert( pDL->pNextDocid==0 ); + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( rc==SQLITE_OK && !pDL->pList ){ + *pbEof = 1; + } + }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ + sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, + &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof + ); + pDL->pList = pDL->pNextDocid; + }else{ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim2(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim2(). */ + while( pIterpNextDocid = pIter; + assert( *pIter || pIter>=&pDL->aAll[pDL->nAll] ); + *pbEof = 0; + } + } + + return rc; +} + +static void fts3EvalStartReaders( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int bOptOk, + int *pRc +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; + } + pExpr->bDeferred = (i==nToken); + *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); + }else{ + fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); + pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); + } + } +} + + +typedef struct Fts3TokenAndCost Fts3TokenAndCost; +struct Fts3TokenAndCost { + Fts3PhraseToken *pToken; + Fts3Expr *pRoot; + int nOvfl; + int iCol; +}; + +static void fts3EvalTokenCosts( + Fts3Cursor *pCsr, + Fts3Expr *pRoot, + Fts3Expr *pExpr, + Fts3TokenAndCost **ppTC, + Fts3Expr ***ppOr, + int *pRc +){ + if( *pRc==SQLITE_OK && pExpr ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + for(i=0; *pRc==SQLITE_OK && inToken; i++){ + Fts3TokenAndCost *pTC = (*ppTC)++; + pTC->pRoot = pRoot; + pTC->pToken = &pPhrase->aToken[i]; + pTC->iCol = pPhrase->iColumn; + *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); + } + }else if( pExpr->eType!=FTSQUERY_NOT ){ + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pLeft; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pRight; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); + } + } +} + +static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + int rc; + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + sqlite3_stmt *pStmt; + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *pEnd; + const char *a; + + rc = sqlite3Fts3SelectDoctotal(p, &pStmt); + if( rc!=SQLITE_OK ) return rc; + a = sqlite3_column_blob(pStmt, 0); + assert( a ); + + pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( anDoc = nDoc; + pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); + assert( pCsr->nRowAvg>0 ); + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + + *pnPage = pCsr->nRowAvg; + return SQLITE_OK; +} + +static int fts3EvalSelectDeferred( + Fts3Cursor *pCsr, + Fts3Expr *pRoot, + Fts3TokenAndCost *aTC, + int nTC +){ + int nDocSize = 0; + int nDocEst = 0; + int rc = SQLITE_OK; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int ii; + + int nOvfl = 0; + int nTerm = 0; + + for(ii=0; iinOvfl) + ){ + pTC = &aTC[jj]; + } + } + assert( pTC ); + + /* At this point pTC points to the cheapest remaining token. */ + if( ii==0 ){ + if( pTC->nOvfl ){ + nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; + }else{ + /* TODO: Fix this so that the doclist need not be read twice. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); + if( rc==SQLITE_OK ){ + nDocEst = fts3DoclistCountDocids(1, pList, nList); + } + sqlite3_free(pList); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + } + } + }else{ + if( pTC->nOvfl>=(nDocEst*nDocSize) ){ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + } + nDocEst = 1 + (nDocEst/4); + } + pTC->pToken = 0; + } + + return rc; +} + +int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int nToken = 0; + int nOr = 0; + + /* Allocate a MultiSegReader for each token in the expression. */ + fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc); + + /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: + ** This call will eventually also be responsible for determining which + ** tokens are 'deferred' until the document text is loaded into memory. + ** + ** Each token in each phrase is dealt with using one of the following + ** three strategies: + ** + ** 1. Entire doclist loaded into memory as part of the + ** fts3EvalStartReaders() call. + ** + ** 2. Doclist loaded into memory incrementally, as part of each + ** sqlite3Fts3EvalNext() call. + ** + ** 3. Token doclist is never loaded. Instead, documents are loaded into + ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() + ** call. This is known as a "deferred" token. + */ + + /* If bOptOk is true, check if there are any tokens that should be deferred. + */ + if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ + Fts3TokenAndCost *aTC; + Fts3Expr **apOr; + aTC = (Fts3TokenAndCost *)sqlite3_malloc( + sizeof(Fts3TokenAndCost) * nToken + + sizeof(Fts3Expr *) * nOr * 2 + ); + apOr = (Fts3Expr **)&aTC[nToken]; + + if( !aTC ){ + rc = SQLITE_NOMEM; + }else{ + int ii; + Fts3TokenAndCost *pTC = aTC; + Fts3Expr **ppOr = apOr; + + fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); + nToken = pTC-aTC; + nOr = ppOr-apOr; + + if( rc==SQLITE_OK ){ + rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); + for(ii=0; rc==SQLITE_OK && iidoclist.bFreeList ){ + sqlite3_free(pPhrase->doclist.pList); + } + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + pPhrase->doclist.bFreeList = 0; +} + +static int fts3EvalNearTrim2( + int nNear, + char *aTmp, /* Temporary space to use */ + char **paPoslist, /* IN/OUT: Position list */ + int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ + Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +){ + int nParam1 = nNear + pPhrase->nToken; + int nParam2 = nNear + *pnToken; + int nNew; + char *p2; + char *pOut; + int res; + + assert( pPhrase->doclist.pList ); + + p2 = pOut = pPhrase->doclist.pList; + res = fts3PoslistNearMerge( + &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 + ); + if( res ){ + nNew = (pOut - pPhrase->doclist.pList) - 1; + assert( pPhrase->doclist.pList[nNew]=='\0' ); + assert( nNew<=pPhrase->doclist.nList && nNew>0 ); + memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); + pPhrase->doclist.nList = nNew; + *paPoslist = pPhrase->doclist.pList; + *pnToken = pPhrase->nToken; + } + + return res; +} + +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; + + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && pExpr->bEof==0 + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + int nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ + + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; + + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear = p->pParent->nNear; + Fts3Phrase *pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + } + + sqlite3_free(aTmp); + } + + return res; +} + +/* +** This macro is used by the fts3EvalNext() function. The two arguments are +** 64-bit docid values. If the current query is "ORDER BY docid ASC", then +** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then +** it returns (i2 - i1). This allows the same code to be used for merging +** doclists in ascending or descending order. +*/ +#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2)) + +static void fts3EvalNext( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( *pRc==SQLITE_OK ){ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; + + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); + if( pLeft->bDeferred ){ + fts3EvalNext(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + fts3EvalNext(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNext(pCsr, pRight, pRc); + + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNext(pCsr, pLeft, pRc); + }else{ + fts3EvalNext(pCsr, pRight, pRc); + } + } + + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + + assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); + + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNext(pCsr, pLeft, pRc); + }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ + fts3EvalNext(pCsr, pRight, pRc); + }else{ + fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNext(pCsr, pRight, pRc); + } + + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } + + break; + } + + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + + if( pRight->bStart==0 ){ + fts3EvalNext(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } + + fts3EvalNext(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNext(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } + + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalZeroPoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } +} + +static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ + int bHit = 1; + if( *pRc==SQLITE_OK ){ + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: + bHit = ( + fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) + && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + && fts3EvalNearTest(pExpr, pRc) + ); + + /* If the NEAR expression does not match any rows, zero the doclist for + ** all phrases involved in the NEAR. This is because the snippet(), + ** offsets() and matchinfo() functions are not supposed to recognize + ** any instances of phrases that are part of unmatched NEAR queries. + ** For example if this expression: + ** + ** ... MATCH 'a OR (b NEAR c)' + ** + ** is matched against a row containing: + ** + ** 'a b d e' + ** + ** then any snippet() should ony highlight the "a" term, not the "b" + ** (as "b" is part of a non-matching NEAR clause). + */ + if( bHit==0 + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + for(p=pExpr; p->pPhrase==0; p=p->pLeft){ + if( p->pRight->iDocid==pCsr->iPrevId ){ + fts3EvalZeroPoslist(p->pRight->pPhrase); + } + } + if( p->iDocid==pCsr->iPrevId ){ + fts3EvalZeroPoslist(p->pPhrase); + } + } + + break; + + case FTSQUERY_OR: { + int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); + bHit = bHit1 || bHit2; + break; + } + + case FTSQUERY_NOT: + bHit = ( + fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) + && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + ); + break; + + default: { + if( pCsr->pDeferred + && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) + ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); + if( pExpr->bDeferred ){ + fts3EvalZeroPoslist(pPhrase); + } + *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); + bHit = (pPhrase->doclist.pList!=0); + pExpr->iDocid = pCsr->iPrevId; + }else{ + bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); + } + break; + } + } + } + return bHit; +} + +/* +** Return 1 if both of the following are true: +** +** 1. *pRc is SQLITE_OK when this function returns, and +** +** 2. After scanning the current FTS table row for the deferred tokens, +** it is determined that the row does not match the query. +** +** Or, if no error occurs and it seems the current row does match the FTS +** query, return 0. +*/ +static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ + int rc = *pRc; + int bMiss = 0; + if( rc==SQLITE_OK ){ + if( pCsr->pDeferred ){ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + } + bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); + sqlite3Fts3FreeDeferredDoclists(pCsr); + *pRc = rc; + } + return (rc==SQLITE_OK && bMiss); +} + +/* +** Advance to the next document that matches the FTS expression in +** Fts3Cursor.pExpr. +*/ +int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Expr *pExpr = pCsr->pExpr; + assert( pCsr->isEof==0 ); + if( pExpr==0 ){ + pCsr->isEof = 1; + }else{ + do { + if( pCsr->isRequireSeek==0 ){ + sqlite3_reset(pCsr->pStmt); + } + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + fts3EvalNext(pCsr, pExpr, &rc); + pCsr->isEof = pExpr->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pExpr->iDocid; + }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); + } + return rc; +} + +/* +** Restart interation for expression pExpr so that the next call to +** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental +** loading or merging of phrase doclists for this iteration. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op. If an error occurs within this function, *pRc is set to an +** SQLite error code before returning. +*/ +static void fts3EvalRestart( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( pExpr && *pRc==SQLITE_OK ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + + if( pPhrase ){ + fts3EvalZeroPoslist(pPhrase); + if( pPhrase->bIncr ){ + sqlite3Fts3EvalPhraseCleanup(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + *pRc = sqlite3Fts3EvalStart(pCsr, pExpr, 0); + }else{ + pPhrase->doclist.pNextDocid = 0; + pPhrase->doclist.iDocid = 0; + } + } + + pExpr->iDocid = 0; + pExpr->bEof = 0; + pExpr->bStart = 0; + + fts3EvalRestart(pCsr, pExpr->pLeft, pRc); + fts3EvalRestart(pCsr, pExpr->pRight, pRc); + } +} + +/* +** After allocating the Fts3Expr.aMI[] array for each phrase in the +** expression rooted at pExpr, the cursor iterates through all rows matched +** by pExpr, calling this function for each row. This function increments +** the values in Fts3Expr.aMI[] according to the position-list currently +** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase +** expression nodes. +*/ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase && pPhrase->doclist.pList ){ + int iCol = 0; + char *p = pPhrase->doclist.pList; + + assert( *p ); + while( 1 ){ + u8 c = 0; + int iCnt = 0; + while( 0xFE & (*p | c) ){ + if( (c&0x80)==0 ) iCnt++; + c = *p++ & 0x80; + } + + /* aMI[iCol*3 + 1] = Number of occurrences + ** aMI[iCol*3 + 2] = Number of rows containing at least one instance + */ + pExpr->aMI[iCol*3 + 1] += iCnt; + pExpr->aMI[iCol*3 + 2] += (iCnt>0); + if( *p==0x00 ) break; + p++; + p += sqlite3Fts3GetVarint32(p, &iCol); + } + } + + fts3EvalUpdateCounts(pExpr->pLeft); + fts3EvalUpdateCounts(pExpr->pRight); + } +} + +/* +** Expression pExpr must be of type FTSQUERY_PHRASE. +** +** If it is not already allocated and populated, this function allocates and +** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part +** of a NEAR expression, then it also allocates and populates the same array +** for all other phrases that are part of the NEAR expression. +** +** SQLITE_OK is returned if the aMI[] array is successfully allocated and +** populated. Otherwise, if an error occurs, an SQLite error code is returned. +*/ +static int fts3EvalGatherStats( + Fts3Cursor *pCsr, /* Cursor object */ + Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pExpr->eType==FTSQUERY_PHRASE ); + if( pExpr->aMI==0 ){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + Fts3Expr *pRoot; /* Root of NEAR expression */ + Fts3Expr *p; /* Iterator used for several purposes */ + + sqlite3_int64 iPrevId = pCsr->iPrevId; + sqlite3_int64 iDocid; + u8 bEof; + + /* Find the root of the NEAR expression */ + pRoot = pExpr; + while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + pRoot = pRoot->pParent; + } + iDocid = pRoot->iDocid; + bEof = pRoot->bEof; + assert( pRoot->bStart ); + + /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ + for(p=pRoot; p; p=p->pLeft){ + Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); + assert( pE->aMI==0 ); + pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); + if( !pE->aMI ) return SQLITE_NOMEM; + memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + } + + fts3EvalRestart(pCsr, pRoot, &rc); + + while( pCsr->isEof==0 && rc==SQLITE_OK ){ + + do { + /* Ensure the %_content statement is reset. */ + if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + + /* Advance to the next document */ + fts3EvalNext(pCsr, pRoot, &rc); + pCsr->isEof = pRoot->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pRoot->iDocid; + }while( pCsr->isEof==0 + && pRoot->eType==FTSQUERY_NEAR + && fts3EvalLoadDeferred(pCsr, &rc) + ); + + if( rc==SQLITE_OK && pCsr->isEof==0 ){ + fts3EvalUpdateCounts(pRoot); + } + } + + pCsr->isEof = 0; + pCsr->iPrevId = iPrevId; + + if( bEof ){ + pRoot->bEof = bEof; + }else{ + /* Caution: pRoot may iterate through docids in ascending or descending + ** order. For this reason, even though it seems more defensive, the + ** do loop can not be written: + ** + ** do {...} while( pRoot->iDocidbEof==0 ); + }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); + fts3EvalLoadDeferred(pCsr, &rc); + } + } + return rc; +} + +/* +** This function is used by the matchinfo() module to query a phrase +** expression node for the following information: +** +** 1. The total number of occurrences of the phrase in each column of +** the FTS table (considering all rows), and +** +** 2. For each column, the number of rows in the table for which the +** column contains at least one instance of the phrase. +** +** If no error occurs, SQLITE_OK is returned and the values for each column +** written into the array aiOut as follows: +** +** aiOut[iCol*3 + 1] = Number of occurrences +** aiOut[iCol*3 + 2] = Number of rows containing at least one instance +** +** Caveats: +** +** * If a phrase consists entirely of deferred tokens, then all output +** values are set to the number of documents in the table. In other +** words we assume that very common tokens occur exactly once in each +** column of each row of the table. +** +** * If a phrase contains some deferred tokens (and some non-deferred +** tokens), count the potential occurrence identified by considering +** the non-deferred tokens instead of actual phrase occurrences. +** +** * If the phrase is part of a NEAR expression, then only phrase instances +** that meet the NEAR constraint are included in the counts. +*/ +int sqlite3Fts3EvalPhraseStats( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Phrase expression */ + u32 *aiOut /* Array to write results into (see above) */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int iCol; + + if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ + assert( pCsr->nDoc>0 ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pCsr->nDoc; + aiOut[iCol*3 + 2] = pCsr->nDoc; + } + }else{ + rc = fts3EvalGatherStats(pCsr, pExpr); + if( rc==SQLITE_OK ){ + assert( pExpr->aMI ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; + aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; + } + } + } + + return rc; +} + +/* +** The expression pExpr passed as the second argument to this function +** must be of type FTSQUERY_PHRASE. +** +** The returned value is either NULL or a pointer to a buffer containing +** a position-list indicating the occurrences of the phrase in column iCol +** of the current row. +** +** More specifically, the returned buffer contains 1 varint for each +** occurence of the phrase in the column, stored using the normal (delta+2) +** compression and is terminated by either an 0x01 or 0x00 byte. For example, +** if the requested column contains "a b X c d X X" and the position-list +** for 'X' is requested, the buffer returned may contain: +** +** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** +** This function works regardless of whether or not the phrase is deferred, +** incremental, or neither. +*/ +char *sqlite3Fts3EvalPhrasePoslist( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + Fts3Expr *pExpr, /* Phrase to return doclist for */ + int iCol /* Column to return position list for */ +){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + char *pIter = pPhrase->doclist.pList; + int iThis; + + assert( iCol>=0 && iColnColumn ); + if( !pIter + || pExpr->bEof + || pExpr->iDocid!=pCsr->iPrevId + || (pPhrase->iColumnnColumn && pPhrase->iColumn!=iCol) + ){ + return 0; + } + + assert( pPhrase->doclist.nList>0 ); + if( *pIter==0x01 ){ + pIter++; + pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + }else{ + iThis = 0; + } + while( iThisdoclist, and +** * any Fts3MultiSegReader objects held by phrase tokens. +*/ +void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ + if( pPhrase ){ + int i; + sqlite3_free(pPhrase->doclist.aAll); + fts3EvalZeroPoslist(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + for(i=0; inToken; i++){ + fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); + pPhrase->aToken[i].pSegcsr = 0; + } + } +} + #endif Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -45,16 +45,39 @@ ** similar macro called ArraySize(). Use a different name to avoid ** a collision when building an amalgamation with built-in FTS3. */ #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) + +#ifndef MIN +# define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + /* ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 +/* +** FTS4 virtual tables may maintain multiple indexes - one index of all terms +** in the document set and zero or more prefix indexes. All indexes are stored +** as one or more b+-trees in the %_segments and %_segdir tables. +** +** It is possible to determine which index a b+-tree belongs to based on the +** value stored in the "%_segdir.level" column. Given this value L, the index +** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with +** level values between 0 and 1023 (inclusive) belong to index 0, all levels +** between 1024 and 2047 to index 1, and so on. +** +** It is considered impossible for an index to use more than 1024 levels. In +** theory though this may happen, but only after at least +** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. +*/ +#define FTS3_SEGDIR_MAXLEVEL 1024 +#define FTS3_SEGDIR_MAXLEVEL_STR "1024" + /* ** The testcase() macro is only used by the amalgamation. If undefined, ** make it a no-op. */ #ifndef testcase @@ -122,14 +145,15 @@ typedef struct Fts3Cursor Fts3Cursor; typedef struct Fts3Expr Fts3Expr; typedef struct Fts3Phrase Fts3Phrase; typedef struct Fts3PhraseToken Fts3PhraseToken; +typedef struct Fts3Doclist Fts3Doclist; typedef struct Fts3SegFilter Fts3SegFilter; typedef struct Fts3DeferredToken Fts3DeferredToken; typedef struct Fts3SegReader Fts3SegReader; -typedef struct Fts3SegReaderCursor Fts3SegReaderCursor; +typedef struct Fts3MultiSegReader Fts3MultiSegReader; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. @@ -146,33 +170,45 @@ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ - sqlite3_stmt *aStmt[24]; + sqlite3_stmt *aStmt[27]; char *zReadExprlist; char *zWriteExprlist; int nNodeSize; /* Soft limit for node size */ u8 bHasStat; /* True if %_stat table exists */ u8 bHasDocsize; /* True if %_docsize table exists */ + u8 bDescIdx; /* True if doclists are in reverse order */ int nPgsz; /* Page size for host database */ char *zSegmentsTbl; /* Name of %_segments table */ sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ - /* The following hash table is used to buffer pending index updates during + /* TODO: Fix the first paragraph of this comment. + ** + ** The following hash table is used to buffer pending index updates during ** transactions. Variable nPendingData estimates the memory size of the ** pending data, including hash table overhead, but not malloc overhead. ** When nPendingData exceeds nMaxPendingData, the buffer is flushed ** automatically. Variable iPrevDocid is the docid of the most recently ** inserted record. + ** + ** A single FTS4 table may have multiple full-text indexes. For each index + ** there is an entry in the aIndex[] array. Index 0 is an index of all the + ** terms that appear in the document set. Each subsequent index in aIndex[] + ** is an index of prefixes of a specific length. */ - int nMaxPendingData; - int nPendingData; - sqlite_int64 iPrevDocid; - Fts3Hash pendingTerms; + int nIndex; /* Size of aIndex[] */ + struct Fts3Index { + int nPrefix; /* Prefix length (0 for main terms index) */ + Fts3Hash hPending; /* Pending terms table for this index */ + } *aIndex; + int nMaxPendingData; /* Max pending data before flush to disk */ + int nPendingData; /* Current bytes of pending data */ + sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ #if defined(SQLITE_DEBUG) /* State variables used for validating that the transaction control ** methods of the virtual table are called at appropriate times. These ** values do not contribution to the FTS computation; they are used for @@ -199,13 +235,14 @@ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ - int desc; /* True to sort in descending order */ + u8 bDesc; /* True to sort in descending order */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ + int nDoc; /* Documents in table */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int nMatchinfo; /* Number of elements in aMatchinfo[] */ char *zMatchinfo; /* Matchinfo specification */ @@ -232,66 +269,90 @@ */ #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ + +struct Fts3Doclist { + char *aAll; /* Array containing doclist (or NULL) */ + int nAll; /* Size of a[] in bytes */ + char *pNextDocid; /* Pointer to next docid */ + + sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ + int bFreeList; /* True if pList should be sqlite3_free()d */ + char *pList; /* Pointer to position list following iDocid */ + int nList; /* Length of position list */ +} doclist; + /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. -** -** The nDocMatch and nMatch variables contain data that may be used by the -** matchinfo() function. They are populated when the full-text index is -** queried for hits on the phrase. If one or more tokens in the phrase -** are deferred, the nDocMatch and nMatch variables are populated based -** on the assumption that the */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ + + /* Variables above this point are populated when the expression is + ** parsed (by code in fts3_expr.c). Below this point the variables are + ** used when evaluating the expression. */ int bFulltext; /* True if full-text index was used */ - Fts3SegReaderCursor *pSegcsr; /* Segment-reader for this token */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ + Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ }; struct Fts3Phrase { - /* Variables populated by fts3_expr.c when parsing a MATCH expression */ + /* Cache of doclist for this phrase. */ + Fts3Doclist doclist; + int bIncr; /* True if doclist is loaded incrementally */ + + /* Variables below this point are populated by fts3_expr.c when parsing + ** a MATCH expression. Everything above is part of the evaluation phase. + */ int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ - int isNot; /* Phrase prefixed by unary not (-) operator */ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* ** A tree of these objects forms the RHS of a MATCH operator. ** -** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded -** is true, then aDoclist points to a malloced buffer, size nDoclist bytes, -** containing the results of the NEAR or phrase query in FTS3 doclist -** format. As usual, the initial "Length" field found in doclists stored -** on disk is omitted from this buffer. +** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist +** points to a malloced buffer, size nDoclist bytes, containing the results +** of this phrase query in FTS3 doclist format. As usual, the initial +** "Length" field found in doclists stored on disk is omitted from this +** buffer. +** +** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global +** matchinfo data. If it is not NULL, it points to an array of size nCol*3, +** where nCol is the number of columns in the queried FTS table. The array +** is populated as follows: +** +** aMI[iCol*3 + 0] = Undefined +** aMI[iCol*3 + 1] = Number of occurrences +** aMI[iCol*3 + 2] = Number of rows containing at least one instance ** -** Variable pCurrent always points to the start of a docid field within -** aDoclist. Since the doclist is usually scanned in docid order, this can -** be used to accelerate seeking to the required docid within the doclist. +** The aMI array is allocated using sqlite3_malloc(). It should be freed +** when the expression node is. */ struct Fts3Expr { int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ - int isLoaded; /* True if aDoclist/nDoclist are initialized. */ - char *aDoclist; /* Buffer containing doclist */ - int nDoclist; /* Size of aDoclist in bytes */ + /* The following are used by the fts3_eval.c module. */ + sqlite3_int64 iDocid; /* Current docid */ + u8 bEof; /* True this expression is at EOF already */ + u8 bStart; /* True if iDocid is valid */ + u8 bDeferred; /* True if this expression is entirely deferred */ - sqlite3_int64 iCurrent; - char *pCurrent; + u32 *aMI; }; /* ** Candidate values for Fts3Query.eType. Note that the order of the first ** four values is in order of precedence when parsing expressions. For @@ -315,35 +376,36 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); -int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**); +int sqlite3Fts3SegReaderPending( + Fts3Table*,int,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); -int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); -int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **); +int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **); int sqlite3Fts3ReadLock(Fts3Table *); -int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); +int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); -char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); void sqlite3Fts3SegmentsClose(Fts3Table *); -#define FTS3_SEGCURSOR_PENDING -1 -#define FTS3_SEGCURSOR_ALL -2 +/* Special values interpreted by sqlite3SegReaderCursor() */ +#define FTS3_SEGCURSOR_PENDING -1 +#define FTS3_SEGCURSOR_ALL -2 -int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*); -int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *); -void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *); +int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); +int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); +void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); + int sqlite3Fts3SegReaderCursor( - Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *); + Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *); /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 @@ -356,21 +418,24 @@ int nTerm; int iCol; int flags; }; -struct Fts3SegReaderCursor { +struct Fts3MultiSegReader { /* Used internally by sqlite3Fts3SegReaderXXX() calls */ Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ int nSegment; /* Size of apSegment array */ int nAdvance; /* How many seg-readers to advance */ Fts3SegFilter *pFilter; /* Pointer to filter object */ char *aBuffer; /* Buffer to merge doclists in */ int nBuffer; /* Allocated size of aBuffer[] in bytes */ - /* Cost of running this iterator. Used by fts3.c only. */ - int nCost; + int iColFilter; /* If >=0, filter for this column */ + + /* Used by fts3.c only. */ + int nCost; /* Cost of running iterator */ + int bLookup; /* True if a lookup of a single entry. */ /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ char *zTerm; /* Pointer to term buffer */ int nTerm; /* Size of zTerm in bytes */ char *aDoclist; /* Pointer to doclist buffer */ @@ -381,15 +446,13 @@ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); +void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); -char *sqlite3Fts3FindPositions(Fts3Cursor *, Fts3Expr *, sqlite3_int64, int); -int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); -int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); -int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); +int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, @@ -414,7 +477,30 @@ int sqlite3Fts3InitTerm(sqlite3 *db); #endif /* fts3_aux.c */ int sqlite3Fts3InitAux(sqlite3 *db); + +int sqlite3Fts3TermSegReaderCursor( + Fts3Cursor *pCsr, /* Virtual table cursor handle */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ +); + +void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); + +int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int); +int sqlite3Fts3EvalNext(Fts3Cursor *pCsr); + +int sqlite3Fts3MsrIncrStart( + Fts3Table*, Fts3MultiSegReader*, int, const char*, int); +int sqlite3Fts3MsrIncrNext( + Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); +char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol); +int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); + +int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); + #endif /* _FTSINT_H */ Index: ext/fts3/fts3_aux.c ================================================================== --- ext/fts3/fts3_aux.c +++ ext/fts3/fts3_aux.c @@ -26,11 +26,11 @@ Fts3Table *pFts3Tab; }; struct Fts3auxCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - Fts3SegReaderCursor csr; /* Must be right after "base" */ + Fts3MultiSegReader csr; /* Must be right after "base" */ Fts3SegFilter filter; char *zStop; int nStop; /* Byte-length of string zStop */ int isEof; /* True if cursor is at EOF */ sqlite3_int64 iRowid; /* Current rowid */ @@ -94,10 +94,11 @@ p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; p->pFts3Tab->db = db; + p->pFts3Tab->nIndex = 1; memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); @@ -374,11 +375,11 @@ pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx])); pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]); if( pCsr->zStop==0 ) return SQLITE_NOMEM; } - rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL, + rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); } Index: ext/fts3/fts3_expr.c ================================================================== --- ext/fts3/fts3_expr.c +++ ext/fts3/fts3_expr.c @@ -79,16 +79,25 @@ #include "fts3Int.h" #include #include +/* +** isNot: +** This variable is used by function getNextNode(). When getNextNode() is +** called, it sets ParseContext.isNot to true if the 'next node' is a +** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the +** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to +** zero. +*/ typedef struct ParseContext ParseContext; struct ParseContext { sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ const char **azCol; /* Array of column names for fts3 table */ int nCol; /* Number of entries in azCol[] */ int iDefaultCol; /* Default column to query */ + int isNot; /* True if getNextNode() sees a unary - */ sqlite3_context *pCtx; /* Write error message here */ int nNest; /* Number of nested brackets */ }; /* @@ -170,11 +179,11 @@ if( iEndpPhrase->aToken[0].isPrefix = 1; iEnd++; } if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ - pRet->pPhrase->isNot = 1; + pParse->isNot = 1; } } nConsumed = iEnd; } @@ -222,71 +231,86 @@ Fts3Expr *p = 0; sqlite3_tokenizer_cursor *pCursor = 0; char *zTemp = 0; int nTemp = 0; + const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); + int nToken = 0; + + /* The final Fts3Expr data structure, including the Fts3Phrase, + ** Fts3PhraseToken structures token buffers are all stored as a single + ** allocation so that the expression can be freed with a single call to + ** sqlite3_free(). Setting this up requires a two pass approach. + ** + ** The first pass, in the block below, uses a tokenizer cursor to iterate + ** through the tokens in the expression. This pass uses fts3ReallocOrFree() + ** to assemble data in two dynamic buffers: + ** + ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase + ** structure, followed by the array of Fts3PhraseToken + ** structures. This pass only populates the Fts3PhraseToken array. + ** + ** Buffer zTemp: Contains copies of all tokens. + ** + ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, + ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase + ** structures. + */ rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); if( rc==SQLITE_OK ){ int ii; pCursor->pTokenizer = pTokenizer; for(ii=0; rc==SQLITE_OK; ii++){ - const char *zToken; - int nToken, iBegin, iEnd, iPos; - rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); + const char *zByte; + int nByte, iBegin, iEnd, iPos; + rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ - int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); - p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); - zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); - if( !p || !zTemp ){ - goto no_mem; - } - if( ii==0 ){ - memset(p, 0, nByte); - p->pPhrase = (Fts3Phrase *)&p[1]; - } - p->pPhrase = (Fts3Phrase *)&p[1]; - memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); - p->pPhrase->nToken = ii+1; - p->pPhrase->aToken[ii].n = nToken; - memcpy(&zTemp[nTemp], zToken, nToken); - nTemp += nToken; - if( iEndpPhrase->aToken[ii].isPrefix = 1; - }else{ - p->pPhrase->aToken[ii].isPrefix = 0; - } + Fts3PhraseToken *pToken; + + p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); + if( !p ) goto no_mem; + + zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); + if( !zTemp ) goto no_mem; + + assert( nToken==ii ); + pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; + memset(pToken, 0, sizeof(Fts3PhraseToken)); + + memcpy(&zTemp[nTemp], zByte, nByte); + nTemp += nByte; + + pToken->n = nByte; + pToken->isPrefix = (iEndxClose(pCursor); pCursor = 0; } if( rc==SQLITE_DONE ){ int jj; - char *zNew = NULL; - int nNew = 0; - int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); - nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); - p = fts3ReallocOrFree(p, nByte + nTemp); - if( !p ){ - goto no_mem; - } - if( zTemp ){ - zNew = &(((char *)p)[nByte]); - memcpy(zNew, zTemp, nTemp); - }else{ - memset(p, 0, nByte+nTemp); - } + char *zBuf = 0; + + p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); + if( !p ) goto no_mem; + memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); + p->eType = FTSQUERY_PHRASE; p->pPhrase = (Fts3Phrase *)&p[1]; + p->pPhrase->iColumn = pParse->iDefaultCol; + p->pPhrase->nToken = nToken; + + zBuf = (char *)&p->pPhrase->aToken[nToken]; + memcpy(zBuf, zTemp, nTemp); + sqlite3_free(zTemp); + for(jj=0; jjpPhrase->nToken; jj++){ - p->pPhrase->aToken[jj].z = &zNew[nNew]; - nNew += p->pPhrase->aToken[jj].n; + p->pPhrase->aToken[jj].z = zBuf; + zBuf += p->pPhrase->aToken[jj].n; } - sqlite3_free(zTemp); - p->eType = FTSQUERY_PHRASE; - p->pPhrase->iColumn = pParse->iDefaultCol; rc = SQLITE_OK; } *ppExpr = p; return rc; @@ -338,10 +362,12 @@ int rc; Fts3Expr *pRet = 0; const char *zInput = z; int nInput = n; + + pParse->isNot = 0; /* Skip over any whitespace before checking for a keyword, an open or ** close bracket, or a quoted string. */ while( nInput>0 && fts3isspace(*zInput) ){ @@ -557,11 +583,11 @@ rc = getNextNode(pParse, zIn, nIn, &p, &nByte); if( rc==SQLITE_OK ){ int isPhrase; if( !sqlite3_fts3_enable_parentheses - && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot + && p->eType==FTSQUERY_PHRASE && pParse->isNot ){ /* Create an implicit NOT operator. */ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); if( !pNot ){ sqlite3Fts3ExprFree(p); @@ -575,11 +601,10 @@ } pNotBranch = pNot; p = pPrev; }else{ int eType = p->eType; - assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); /* The isRequirePhrase variable is set to true if a phrase or ** an expression contained in parenthesis is required. If a ** binary operator (AND, OR, NOT or NEAR) is encounted when @@ -738,13 +763,15 @@ /* ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); - sqlite3_free(p->aDoclist); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); sqlite3_free(p); } } /**************************************************************************** @@ -798,11 +825,11 @@ switch( pExpr->eType ){ case FTSQUERY_PHRASE: { Fts3Phrase *pPhrase = pExpr->pPhrase; int i; zBuf = sqlite3_mprintf( - "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); + "%zPHRASE %d 0", zBuf, pPhrase->iColumn); for(i=0; zBuf && inToken; i++){ zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, pPhrase->aToken[i].n, pPhrase->aToken[i].z, (pPhrase->aToken[i].isPrefix?"+":"") ); Index: ext/fts3/fts3_snippet.c ================================================================== --- ext/fts3/fts3_snippet.c +++ ext/fts3/fts3_snippet.c @@ -174,76 +174,24 @@ ){ int iPhrase = 0; /* Variable used as the phrase counter */ return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); } -/* -** The argument to this function is always a phrase node. Its doclist -** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes -** to the left of this one in the query tree have already been loaded. -** -** If this phrase node is part of a series of phrase nodes joined by -** NEAR operators (and is not the left-most of said series), then elements are -** removed from the phrases doclist consistent with the NEAR restriction. If -** required, elements may be removed from the doclists of phrases to the -** left of this one that are part of the same series of NEAR operator -** connected phrases. -** -** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. -*/ -static int fts3ExprNearTrim(Fts3Expr *pExpr){ - int rc = SQLITE_OK; - Fts3Expr *pParent = pExpr->pParent; - - assert( pExpr->eType==FTSQUERY_PHRASE ); - while( rc==SQLITE_OK - && pParent - && pParent->eType==FTSQUERY_NEAR - && pParent->pRight==pExpr - ){ - /* This expression (pExpr) is the right-hand-side of a NEAR operator. - ** Find the expression to the left of the same operator. - */ - int nNear = pParent->nNear; - Fts3Expr *pLeft = pParent->pLeft; - - if( pLeft->eType!=FTSQUERY_PHRASE ){ - assert( pLeft->eType==FTSQUERY_NEAR ); - assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); - pLeft = pLeft->pRight; - } - - rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); - - pExpr = pLeft; - pParent = pExpr->pParent; - } - - return rc; -} - /* ** This is an fts3ExprIterate() callback used while loading the doclists ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also ** fts3ExprLoadDoclists(). */ static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ int rc = SQLITE_OK; + Fts3Phrase *pPhrase = pExpr->pPhrase; LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; UNUSED_PARAMETER(iPhrase); p->nPhrase++; - p->nToken += pExpr->pPhrase->nToken; - - if( pExpr->isLoaded==0 ){ - rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); - pExpr->isLoaded = 1; - if( rc==SQLITE_OK ){ - rc = fts3ExprNearTrim(pExpr); - } - } + p->nToken += pPhrase->nToken; return rc; } /* @@ -413,11 +361,11 @@ SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; char *pCsr; pPhrase->nToken = pExpr->pPhrase->nToken; - pCsr = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->pCsr->iPrevId, p->iCol); + pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); if( pCsr ){ int iFirst = 0; pPhrase->pList = pCsr; fts3GetDeltaPosition(&pCsr, &iFirst); pPhrase->pHead = pCsr; @@ -770,30 +718,10 @@ *ppCollist = pEnd; return nEntry; } -static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){ - char *pCsr = *pp; - while( *pCsr ){ - int nHit; - sqlite3_int64 iCol = 0; - if( *pCsr==0x01 ){ - pCsr++; - pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); - } - nHit = fts3ColumnlistCount(&pCsr); - assert( nHit>0 ); - if( isGlobal ){ - aOut[iCol*3+1]++; - } - aOut[iCol*3] += nHit; - } - pCsr++; - *pp = pCsr; -} - /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ** for a single query. ** ** fts3ExprIterate() callback to load the 'global' elements of a @@ -823,52 +751,13 @@ Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; - Fts3Cursor *pCsr = p->pCursor; - char *pIter; - char *pEnd; - char *pFree = 0; - u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; - - assert( pExpr->isLoaded ); - assert( pExpr->eType==FTSQUERY_PHRASE ); - - if( pCsr->pDeferred ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int ii; - for(ii=0; iinToken; ii++){ - if( pPhrase->aToken[ii].bFulltext ) break; - } - if( iinToken ){ - int nFree = 0; - int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); - if( rc!=SQLITE_OK ) return rc; - pIter = pFree; - pEnd = &pFree[nFree]; - }else{ - int iCol; /* Column index */ - for(iCol=0; iColnCol; iCol++){ - aOut[iCol*3 + 1] = (u32)p->nDoc; - aOut[iCol*3 + 2] = (u32)p->nDoc; - } - return SQLITE_OK; - } - }else{ - pIter = pExpr->aDoclist; - pEnd = &pExpr->aDoclist[pExpr->nDoclist]; - } - - /* Fill in the global hit count matrix row for this phrase. */ - while( pIterpCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] + ); } /* ** fts3ExprIterate() callback used to collect the "local" part of the ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the @@ -881,18 +770,17 @@ ){ MatchInfo *p = (MatchInfo *)pCtx; int iStart = iPhrase * p->nCol * 3; int i; - for(i=0; inCol; i++) p->aMatchinfo[iStart+i*3] = 0; - - if( pExpr->aDoclist ){ + for(i=0; inCol; i++){ char *pCsr; - - pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1); + pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i); if( pCsr ){ - fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); + p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); + }else{ + p->aMatchinfo[iStart+i*3] = 0; } } return SQLITE_OK; } @@ -974,13 +862,12 @@ ** values for a matchinfo() FTS3_MATCHINFO_LCS request. */ typedef struct LcsIterator LcsIterator; struct LcsIterator { Fts3Expr *pExpr; /* Pointer to phrase expression */ - char *pRead; /* Cursor used to iterate through aDoclist */ int iPosOffset; /* Tokens count up to end of this phrase */ - int iCol; /* Current column number */ + char *pRead; /* Cursor used to iterate through aDoclist */ int iPos; /* Current position */ }; /* ** If LcsIterator.iCol is set to the following value, the iterator has @@ -1007,21 +894,14 @@ char *pRead = pIter->pRead; sqlite3_int64 iRead; int rc = 0; pRead += sqlite3Fts3GetVarint(pRead, &iRead); - if( iRead==0 ){ - pIter->iCol = LCS_ITERATOR_FINISHED; + if( iRead==0 || iRead==1 ){ + pRead = 0; rc = 1; }else{ - if( iRead==1 ){ - pRead += sqlite3Fts3GetVarint(pRead, &iRead); - pIter->iCol = (int)iRead; - pIter->iPos = pIter->iPosOffset; - pRead += sqlite3Fts3GetVarint(pRead, &iRead); - rc = 1; - } pIter->iPos += (int)(iRead-2); } pIter->pRead = pRead; return rc; @@ -1049,46 +929,38 @@ **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + for(i=0; inPhrase; i++){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; - pIter->pRead = sqlite3Fts3FindPositions(pCsr,pIter->pExpr,pCsr->iPrevId,-1); - if( pIter->pRead ){ - pIter->iPos = pIter->iPosOffset; - fts3LcsIteratorAdvance(&aIter[i]); - }else{ - pIter->iCol = LCS_ITERATOR_FINISHED; - } } for(iCol=0; iColnCol; iCol++){ int nLcs = 0; /* LCS value for this column */ int nLive = 0; /* Number of iterators in aIter not at EOF */ - /* Loop through the iterators in aIter[]. Set nLive to the number of - ** iterators that point to a position-list corresponding to column iCol. - */ for(i=0; inPhrase; i++){ - assert( aIter[i].iCol>=iCol ); - if( aIter[i].iCol==iCol ) nLive++; + LcsIterator *pIt = &aIter[i]; + pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol); + if( pIt->pRead ){ + pIt->iPos = pIt->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + nLive++; + } } - /* The following loop runs until all iterators in aIter[] have finished - ** iterating through positions in column iCol. Exactly one of the - ** iterators is advanced each time the body of the loop is run. - */ while( nLive>0 ){ LcsIterator *pAdv = 0; /* The iterator to advance by one position */ int nThisLcs = 0; /* LCS for the current iterator positions */ for(i=0; inPhrase; i++){ LcsIterator *pIter = &aIter[i]; - if( iCol!=pIter->iCol ){ + if( pIter->pRead==0 ){ /* This iterator is already at EOF for this column. */ nThisLcs = 0; }else{ if( pAdv==0 || pIter->iPosiPos ){ pAdv = pIter; @@ -1424,11 +1296,11 @@ int iTerm; /* For looping through nTerm phrase terms */ char *pList; /* Pointer to position list for phrase */ int iPos = 0; /* First position in position-list */ UNUSED_PARAMETER(iPhrase); - pList = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->iDocid, p->iCol); + pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); nTerm = pExpr->pPhrase->nToken; if( pList ){ fts3GetDeltaPosition(&pList, &iPos); assert( iPos>=0 ); } Index: ext/fts3/fts3_term.c ================================================================== --- ext/fts3/fts3_term.c +++ ext/fts3/fts3_term.c @@ -25,16 +25,17 @@ typedef struct Fts3termTable Fts3termTable; typedef struct Fts3termCursor Fts3termCursor; struct Fts3termTable { sqlite3_vtab base; /* Base class used by SQLite core */ + int iIndex; /* Index for Fts3Table.aIndex[] */ Fts3Table *pFts3Tab; }; struct Fts3termCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - Fts3SegReaderCursor csr; /* Must be right after "base" */ + Fts3MultiSegReader csr; /* Must be right after "base" */ Fts3SegFilter filter; int isEof; /* True if cursor is at EOF */ char *pNext; @@ -54,11 +55,11 @@ ** These tables have no persistent representation of their own, so xConnect ** and xCreate are identical operations. */ static int fts3termConnectMethod( sqlite3 *db, /* Database connection */ - void *pUnused, /* Unused */ + void *pCtx, /* Non-zero for an fts4prefix table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ @@ -67,12 +68,16 @@ int nDb; /* Result of strlen(zDb) */ int nFts3; /* Result of strlen(zFts3) */ int nByte; /* Bytes of space to allocate here */ int rc; /* value returned by declare_vtab() */ Fts3termTable *p; /* Virtual table object to return */ + int iIndex = 0; - UNUSED_PARAMETER(pUnused); + if( argc==5 ){ + iIndex = atoi(argv[4]); + argc--; + } /* The user should specify a single argument - the name of an fts3 table. */ if( argc!=4 ){ *pzErr = sqlite3_mprintf( "wrong number of arguments to fts4term constructor" @@ -95,10 +100,12 @@ p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; p->pFts3Tab->db = db; + p->pFts3Tab->nIndex = iIndex+1; + p->iIndex = iIndex; memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); @@ -242,11 +249,12 @@ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts3termCursor *pCsr = (Fts3termCursor *)pCursor; - Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab; + Fts3termTable *p = (Fts3termTable *)pCursor->pVtab; + Fts3Table *pFts3 = p->pFts3Tab; int rc; UNUSED_PARAMETER(nVal); UNUSED_PARAMETER(idxNum); UNUSED_PARAMETER(idxStr); @@ -260,11 +268,11 @@ memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; pCsr->filter.flags |= FTS3_SEGMENT_SCAN; - rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL, + rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); } ADDED ext/fts3/fts3_test.c Index: ext/fts3/fts3_test.c ================================================================== --- /dev/null +++ ext/fts3/fts3_test.c @@ -0,0 +1,249 @@ +/* +** 2011 Jun 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file is not part of the production FTS code. It is only used for +** testing. It contains a Tcl command that can be used to test if a document +** matches an FTS NEAR expression. +*/ + +#include +#include +#include + +#define NM_MAX_TOKEN 12 + +typedef struct NearPhrase NearPhrase; +typedef struct NearDocument NearDocument; +typedef struct NearToken NearToken; + +struct NearDocument { + int nToken; /* Length of token in bytes */ + NearToken *aToken; /* Token array */ +}; + +struct NearToken { + int n; /* Length of token in bytes */ + const char *z; /* Pointer to token string */ +}; + +struct NearPhrase { + int nNear; /* Preceding NEAR value */ + int nToken; /* Number of tokens in this phrase */ + NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */ +}; + +static int nm_phrase_match( + NearPhrase *p, + NearToken *aToken +){ + int ii; + + for(ii=0; iinToken; ii++){ + NearToken *pToken = &p->aToken[ii]; + if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){ + if( aToken[ii].n<(pToken->n-1) ) return 0; + if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0; + }else{ + if( aToken[ii].n!=pToken->n ) return 0; + if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0; + } + } + + return 1; +} + +static int nm_near_chain( + int iDir, /* Direction to iterate through aPhrase[] */ + NearDocument *pDoc, /* Document to match against */ + int iPos, /* Position at which iPhrase was found */ + int nPhrase, /* Size of phrase array */ + NearPhrase *aPhrase, /* Phrase array */ + int iPhrase /* Index of phrase found */ +){ + int iStart; + int iStop; + int ii; + int nNear; + int iPhrase2; + NearPhrase *p; + NearPhrase *pPrev; + + assert( iDir==1 || iDir==-1 ); + + if( iDir==1 ){ + if( (iPhrase+1)==nPhrase ) return 1; + nNear = aPhrase[iPhrase+1].nNear; + }else{ + if( iPhrase==0 ) return 1; + nNear = aPhrase[iPhrase].nNear; + } + pPrev = &aPhrase[iPhrase]; + iPhrase2 = iPhrase+iDir; + p = &aPhrase[iPhrase2]; + + iStart = iPos - nNear - p->nToken; + iStop = iPos + nNear + pPrev->nToken; + + if( iStart<0 ) iStart = 0; + if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken; + + for(ii=iStart; ii<=iStop; ii++){ + if( nm_phrase_match(p, &pDoc->aToken[ii]) ){ + if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1; + } + } + + return 0; +} + +static int nm_match_count( + NearDocument *pDoc, /* Document to match against */ + int nPhrase, /* Size of phrase array */ + NearPhrase *aPhrase, /* Phrase array */ + int iPhrase /* Index of phrase to count matches for */ +){ + int nOcc = 0; + int ii; + NearPhrase *p = &aPhrase[iPhrase]; + + for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){ + if( nm_phrase_match(p, &pDoc->aToken[ii]) ){ + /* Test forward NEAR chain (i>iPhrase) */ + if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue; + + /* Test reverse NEAR chain (iNM_MAX_TOKEN ){ + Tcl_AppendResult(interp, "Too many tokens in phrase", 0); + rc = TCL_ERROR; + goto near_match_out; + } + for(jj=0; jjz = Tcl_GetStringFromObj(apToken[jj], &pT->n); + } + aPhrase[ii].nToken = nToken; + } + for(ii=1; ii0)); + + near_match_out: + ckfree((char *)aPhrase); + ckfree((char *)doc.aToken); + return rc; +} + +int Sqlitetestfts3_Init(Tcl_Interp *interp){ + Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0); + return TCL_OK; +} + Index: ext/fts3/fts3_write.c ================================================================== --- ext/fts3/fts3_write.c +++ ext/fts3/fts3_write.c @@ -34,18 +34,38 @@ ** it is always safe to read up to two varints from it without risking an ** overread, even if the node data is corrupted. */ #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) +/* +** Under certain circumstances, b-tree nodes (doclists) can be loaded into +** memory incrementally instead of all at once. This can be a big performance +** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() +** method before retrieving all query results (as may happen, for example, +** if a query has a LIMIT clause). +** +** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD +** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. +** The code is written so that the hard lower-limit for each of these values +** is 1. Clearly such small values would be inefficient, but can be useful +** for testing purposes. +** +** TODO: Add a test interface to modify these "constants" from a script for +** this purpose. +*/ +#define FTS3_NODE_CHUNKSIZE (4*1024) +#define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) +/* #define FTS3_NODE_CHUNKSIZE 1 */ +/* #define FTS3_NODE_CHUNK_THRESHOLD 1 */ + typedef struct PendingList PendingList; typedef struct SegmentNode SegmentNode; typedef struct SegmentWriter SegmentWriter; /* -** Data structure used while accumulating terms in the pending-terms hash -** table. The hash table entry maps from term (a string) to a malloc'd -** instance of this structure. +** An instance of the following data structure is used to build doclists +** incrementally. See function fts3PendingListAppend() for details. */ struct PendingList { int nData; char *aData; int nSpace; @@ -72,11 +92,10 @@ ** of type Fts3SegReader* are also used by code in fts3.c to iterate through ** terms when querying the full-text index. See functions: ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() -** sqlite3Fts3SegReaderCost() ** sqlite3Fts3SegReaderIterate() ** ** Methods used to manipulate Fts3SegReader structures: ** ** fts3SegReaderNext() @@ -91,10 +110,13 @@ sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ char *aNode; /* Pointer to node data (or NULL) */ int nNode; /* Size of buffer at aNode (or 0) */ + int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ + sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ + Fts3HashElem **ppNextElem; /* Variables set by fts3SegReaderNext(). These may be read directly ** by the caller. They are valid from the time SegmentReaderNew() returns ** until SegmentReaderNext() returns something other than SQLITE_OK @@ -104,12 +126,15 @@ char *zTerm; /* Pointer to current term */ int nTermAlloc; /* Allocated size of zTerm buffer */ char *aDoclist; /* Pointer to doclist of current entry */ int nDoclist; /* Size of doclist in current entry */ - /* The following variables are used to iterate through the current doclist */ + /* The following variables are used by fts3SegReaderNextDocid() to iterate + ** through the current doclist (aDoclist/nDoclist). + */ char *pOffsetList; + int nOffsetList; /* For descending pending seg-readers only */ sqlite3_int64 iDocid; }; #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) #define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1]) @@ -143,10 +168,18 @@ ** within the fts3SegWriterXXX() family of functions described above. ** ** fts3NodeAddTerm() ** fts3NodeWrite() ** fts3NodeFree() +** +** When a b+tree is written to the database (either as a result of a merge +** or the pending-terms table being flushed), leaves are written into the +** database file as soon as they are completely populated. The interior of +** the tree is assembled in memory and written out only once all leaves have +** been populated and stored. This is Ok, as the b+-tree fanout is usually +** very large, meaning that the interior of the tree consumes relatively +** little memory. */ struct SegmentNode { SegmentNode *pParent; /* Parent node (or NULL for root node) */ SegmentNode *pRight; /* Pointer to right-sibling */ SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ @@ -173,21 +206,26 @@ #define SQL_NEXT_SEGMENT_INDEX 8 #define SQL_INSERT_SEGMENTS 9 #define SQL_NEXT_SEGMENTS_ID 10 #define SQL_INSERT_SEGDIR 11 #define SQL_SELECT_LEVEL 12 -#define SQL_SELECT_ALL_LEVEL 13 +#define SQL_SELECT_LEVEL_RANGE 13 #define SQL_SELECT_LEVEL_COUNT 14 -#define SQL_SELECT_SEGDIR_COUNT_MAX 15 -#define SQL_DELETE_SEGDIR_BY_LEVEL 16 +#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 +#define SQL_DELETE_SEGDIR_LEVEL 16 #define SQL_DELETE_SEGMENTS_RANGE 17 #define SQL_CONTENT_INSERT 18 #define SQL_DELETE_DOCSIZE 19 #define SQL_REPLACE_DOCSIZE 20 #define SQL_SELECT_DOCSIZE 21 #define SQL_SELECT_DOCTOTAL 22 #define SQL_REPLACE_DOCTOTAL 23 + +#define SQL_SELECT_ALL_PREFIX_LEVEL 24 +#define SQL_DELETE_ALL_TERMS_SEGDIR 25 + +#define SQL_DELETE_SEGDIR_RANGE 26 /* ** This function is used to obtain an SQLite prepared statement handle ** for the statement identified by the second argument. If successful, ** *pp is set to the requested statement handle and SQLITE_OK returned. @@ -220,23 +258,29 @@ /* Return segments in order from oldest to newest.*/ /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' ORDER BY level DESC, idx ASC", + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" + "ORDER BY level DESC, idx ASC", /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", -/* 15 */ "SELECT count(*), max(level) FROM %Q.'%q_segdir'", +/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0", /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)", +/* 24 */ "", +/* 25 */ "", + +/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", + }; int rc = SQLITE_OK; sqlite3_stmt *pStmt; assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); @@ -388,18 +432,36 @@ ** 1: start_block ** 2: leaves_end_block ** 3: end_block ** 4: root */ -int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){ +int sqlite3Fts3AllSegdirs( + Fts3Table *p, /* FTS3 table */ + int iIndex, /* Index for p->aIndex[] */ + int iLevel, /* Level to select */ + sqlite3_stmt **ppStmt /* OUT: Compiled statement */ +){ int rc; sqlite3_stmt *pStmt = 0; + + assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); + assert( iLevel=0 && iIndexnIndex ); + if( iLevel<0 ){ - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0); + /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); + sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1); + } }else{ + /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); - if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL); + } } *ppStmt = pStmt; return rc; } @@ -509,10 +571,51 @@ *pp = p; return 1; } return 0; } + +/* +** Free a PendingList object allocated by fts3PendingListAppend(). +*/ +static void fts3PendingListDelete(PendingList *pList){ + sqlite3_free(pList); +} + +/* +** Add an entry to one of the pending-terms hash tables. +*/ +static int fts3PendingTermsAddOne( + Fts3Table *p, + int iCol, + int iPos, + Fts3Hash *pHash, /* Pending terms hash table to add entry to */ + const char *zToken, + int nToken +){ + PendingList *pList; + int rc = SQLITE_OK; + + pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); + if( pList ){ + p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); + } + if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ + if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ + /* Malloc failed while inserting the new entry. This can only + ** happen if there was no previous entry for this token. + */ + assert( 0==fts3HashFind(pHash, zToken, nToken) ); + sqlite3_free(pList); + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK ){ + p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + } + return rc; +} /* ** Tokenize the nul-terminated string zText and add all tokens to the ** pending-terms hash-table. The docid used is that currently stored in ** p->iPrevDocid, and the column is specified by argument iCol. @@ -558,12 +661,11 @@ xNext = pModule->xNext; while( SQLITE_OK==rc && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) ){ - PendingList *pList; - + int i; if( iPos>=nWord ) nWord = iPos+1; /* Positions cannot be negative; we use -1 as a terminator internally. ** Tokens must have a non-zero length. */ @@ -570,26 +672,23 @@ if( iPos<0 || !zToken || nToken<=0 ){ rc = SQLITE_ERROR; break; } - pList = (PendingList *)fts3HashFind(&p->pendingTerms, zToken, nToken); - if( pList ){ - p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); - } - if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ - if( pList==fts3HashInsert(&p->pendingTerms, zToken, nToken, pList) ){ - /* Malloc failed while inserting the new entry. This can only - ** happen if there was no previous entry for this token. - */ - assert( 0==fts3HashFind(&p->pendingTerms, zToken, nToken) ); - sqlite3_free(pList); - rc = SQLITE_NOMEM; - } - } - if( rc==SQLITE_OK ){ - p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + /* Add the term to the terms index */ + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken + ); + + /* Add the term to each of the prefix indexes that it is not too + ** short for. */ + for(i=1; rc==SQLITE_OK && inIndex; i++){ + struct Fts3Index *pIndex = &p->aIndex[i]; + if( nTokennPrefix ) continue; + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix + ); } } pModule->xClose(pCsr); *pnWord = nWord; @@ -615,18 +714,23 @@ p->iPrevDocid = iDocid; return SQLITE_OK; } /* -** Discard the contents of the pending-terms hash table. +** Discard the contents of the pending-terms hash tables. */ void sqlite3Fts3PendingTermsClear(Fts3Table *p){ - Fts3HashElem *pElem; - for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){ - sqlite3_free(fts3HashData(pElem)); + int i; + for(i=0; inIndex; i++){ + Fts3HashElem *pElem; + Fts3Hash *pHash = &p->aIndex[i].hPending; + for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ + PendingList *pList = (PendingList *)fts3HashData(pElem); + fts3PendingListDelete(pList); + } + fts3HashClear(pHash); } - fts3HashClear(&p->pendingTerms); p->nPendingData = 0; } /* ** This function is called by the xUpdate() method as part of an INSERT @@ -778,11 +882,11 @@ /* ** Forward declaration to account for the circular dependency between ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). */ -static int fts3SegmentMerge(Fts3Table *, int); +static int fts3SegmentMerge(Fts3Table *, int, int); /* ** This function allocates a new level iLevel index in the segdir table. ** Usually, indexes are allocated within a level sequentially starting ** with 0, so the allocated index is one greater than the value returned @@ -795,19 +899,24 @@ ** allocated index is 0. ** ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK ** returned. Otherwise, an SQLite error code is returned. */ -static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){ +static int fts3AllocateSegdirIdx( + Fts3Table *p, + int iIndex, /* Index for p->aIndex */ + int iLevel, + int *piIdx +){ int rc; /* Return Code */ sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ int iNext = 0; /* Result of query pNextIdx */ /* Set variable iNext to the next available segdir index at level iLevel. */ rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pNextIdx, 1, iLevel); + sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel); if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ iNext = sqlite3_column_int(pNextIdx, 0); } rc = sqlite3_reset(pNextIdx); } @@ -817,11 +926,11 @@ ** full, merge all segments in level iLevel into a single iLevel+1 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. */ if( iNext>=FTS3_MERGE_COUNT ){ - rc = fts3SegmentMerge(p, iLevel); + rc = fts3SegmentMerge(p, iIndex, iLevel); *piIdx = 0; }else{ *piIdx = iNext; } } @@ -858,11 +967,12 @@ */ int sqlite3Fts3ReadBlock( Fts3Table *p, /* FTS3 table handle */ sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ char **paBlob, /* OUT: Blob data in malloc'd buffer */ - int *pnBlob /* OUT: Size of blob data */ + int *pnBlob, /* OUT: Size of blob data */ + int *pnLoad /* OUT: Bytes actually loaded */ ){ int rc; /* Return code */ /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ assert( pnBlob); @@ -879,25 +989,29 @@ ); } if( rc==SQLITE_OK ){ int nByte = sqlite3_blob_bytes(p->pSegments); + *pnBlob = nByte; if( paBlob ){ char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); if( !aByte ){ rc = SQLITE_NOMEM; }else{ + if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ + nByte = FTS3_NODE_CHUNKSIZE; + *pnLoad = nByte; + } rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); memset(&aByte[nByte], 0, FTS3_NODE_PADDING); if( rc!=SQLITE_OK ){ sqlite3_free(aByte); aByte = 0; } } *paBlob = aByte; } - *pnBlob = nByte; } return rc; } @@ -907,17 +1021,59 @@ */ void sqlite3Fts3SegmentsClose(Fts3Table *p){ sqlite3_blob_close(p->pSegments); p->pSegments = 0; } + +static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ + int nRead; /* Number of bytes to read */ + int rc; /* Return code */ + + nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); + rc = sqlite3_blob_read( + pReader->pBlob, + &pReader->aNode[pReader->nPopulate], + nRead, + pReader->nPopulate + ); + + if( rc==SQLITE_OK ){ + pReader->nPopulate += nRead; + memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); + if( pReader->nPopulate==pReader->nNode ){ + sqlite3_blob_close(pReader->pBlob); + pReader->pBlob = 0; + pReader->nPopulate = 0; + } + } + return rc; +} + +static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ + int rc = SQLITE_OK; + assert( !pReader->pBlob + || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) + ); + while( pReader->pBlob && rc==SQLITE_OK + && (pFrom - pReader->aNode + nByte)>pReader->nPopulate + ){ + rc = fts3SegReaderIncrRead(pReader); + } + return rc; +} /* ** Move the iterator passed as the first argument to the next term in the ** segment. If successful, SQLITE_OK is returned. If there is no next term, ** SQLITE_DONE. Otherwise, an SQLite error code. */ -static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){ +static int fts3SegReaderNext( + Fts3Table *p, + Fts3SegReader *pReader, + int bIncr +){ + int rc; /* Return code of various sub-routines */ char *pNext; /* Cursor variable */ int nPrefix; /* Number of bytes in term prefix */ int nSuffix; /* Number of bytes in term suffix */ if( !pReader->aDoclist ){ @@ -925,11 +1081,10 @@ }else{ pNext = &pReader->aDoclist[pReader->nDoclist]; } if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ - int rc; /* Return code from Fts3ReadBlock() */ if( fts3SegReaderIsPending(pReader) ){ Fts3HashElem *pElem = *(pReader->ppNextElem); if( pElem==0 ){ pReader->aNode = 0; @@ -945,10 +1100,12 @@ return SQLITE_OK; } if( !fts3SegReaderIsRootOnly(pReader) ){ sqlite3_free(pReader->aNode); + sqlite3_blob_close(pReader->pBlob); + pReader->pBlob = 0; } pReader->aNode = 0; /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf ** blocks have already been traversed. */ @@ -956,19 +1113,29 @@ if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ return SQLITE_OK; } rc = sqlite3Fts3ReadBlock( - p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode + p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, + (bIncr ? &pReader->nPopulate : 0) ); if( rc!=SQLITE_OK ) return rc; + assert( pReader->pBlob==0 ); + if( bIncr && pReader->nPopulatenNode ){ + pReader->pBlob = p->pSegments; + p->pSegments = 0; + } pNext = pReader->aNode; } + + assert( !fts3SegReaderIsPending(pReader) ); + + rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); + if( rc!=SQLITE_OK ) return rc; /* Because of the FTS3_NODE_PADDING bytes of padding, the following is - ** safe (no risk of overread) even if the node data is corrupted. - */ + ** safe (no risk of overread) even if the node data is corrupted. */ pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix); pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix); if( nPrefix<0 || nSuffix<=0 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] ){ @@ -982,10 +1149,14 @@ return SQLITE_NOMEM; } pReader->zTerm = zNew; pReader->nTermAlloc = nNew; } + + rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); + if( rc!=SQLITE_OK ) return rc; + memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); pReader->nTerm = nPrefix+nSuffix; pNext += nSuffix; pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist); pReader->aDoclist = pNext; @@ -994,11 +1165,11 @@ /* Check that the doclist does not appear to extend past the end of the ** b-tree node. And that the final byte of the doclist is 0x00. If either ** of these statements is untrue, then the data structure is corrupt. */ if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] - || pReader->aDoclist[pReader->nDoclist-1] + || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) ){ return SQLITE_CORRUPT_VTAB; } return SQLITE_OK; } @@ -1005,16 +1176,30 @@ /* ** Set the SegReader to point to the first docid in the doclist associated ** with the current term. */ -static void fts3SegReaderFirstDocid(Fts3SegReader *pReader){ - int n; +static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ + int rc = SQLITE_OK; assert( pReader->aDoclist ); assert( !pReader->pOffsetList ); - n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); - pReader->pOffsetList = &pReader->aDoclist[n]; + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + u8 bEof = 0; + pReader->iDocid = 0; + pReader->nOffsetList = 0; + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, + &pReader->iDocid, &pReader->nOffsetList, &bEof + ); + }else{ + rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); + pReader->pOffsetList = &pReader->aDoclist[n]; + } + } + return rc; } /* ** Advance the SegReader to point to the next docid in the doclist ** associated with the current term. @@ -1023,132 +1208,126 @@ ** *ppOffsetList is set to point to the first column-offset list ** in the doclist entry (i.e. immediately past the docid varint). ** *pnOffsetList is set to the length of the set of column-offset ** lists, not including the nul-terminator byte. For example: */ -static void fts3SegReaderNextDocid( - Fts3SegReader *pReader, - char **ppOffsetList, - int *pnOffsetList +static int fts3SegReaderNextDocid( + Fts3Table *pTab, + Fts3SegReader *pReader, /* Reader to advance to next docid */ + char **ppOffsetList, /* OUT: Pointer to current position-list */ + int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ ){ + int rc = SQLITE_OK; char *p = pReader->pOffsetList; char c = 0; - /* Pointer p currently points at the first byte of an offset list. The - ** following two lines advance it to point one byte past the end of - ** the same offset list. - */ - while( *p | c ) c = *p++ & 0x80; - p++; - - /* If required, populate the output variables with a pointer to and the - ** size of the previous offset-list. - */ - if( ppOffsetList ){ - *ppOffsetList = pReader->pOffsetList; - *pnOffsetList = (int)(p - pReader->pOffsetList - 1); - } - - /* If there are no more entries in the doclist, set pOffsetList to - ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and - ** Fts3SegReader.pOffsetList to point to the next offset list before - ** returning. - */ - if( p>=&pReader->aDoclist[pReader->nDoclist] ){ - pReader->pOffsetList = 0; + assert( p ); + + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + /* A pending-terms seg-reader for an FTS4 table that uses order=desc. + ** Pending-terms doclists are always built up in ascending order, so + ** we have to iterate through them backwards here. */ + u8 bEof = 0; + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = pReader->nOffsetList - 1; + } + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, + &pReader->nOffsetList, &bEof + ); + if( bEof ){ + pReader->pOffsetList = 0; + }else{ + pReader->pOffsetList = p; + } }else{ - sqlite3_int64 iDelta; - pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); - pReader->iDocid += iDelta; - } -} - -/* -** This function is called to estimate the amount of data that will be -** loaded from the disk If SegReaderIterate() is called on this seg-reader, -** in units of average document size. -** -** This can be used as follows: If the caller has a small doclist that -** contains references to N documents, and is considering merging it with -** a large doclist (size X "average documents"), it may opt not to load -** the large doclist if X>N. -*/ -int sqlite3Fts3SegReaderCost( - Fts3Cursor *pCsr, /* FTS3 cursor handle */ - Fts3SegReader *pReader, /* Segment-reader handle */ - int *pnCost /* IN/OUT: Number of bytes read */ + + /* Pointer p currently points at the first byte of an offset list. The + ** following block advances it to point one byte past the end of + ** the same offset list. */ + while( 1 ){ + + /* The following line of code (and the "p++" below the while() loop) is + ** normally all that is required to move pointer p to the desired + ** position. The exception is if this node is being loaded from disk + ** incrementally and pointer "p" now points to the first byte passed + ** the populated part of pReader->aNode[]. + */ + while( *p | c ) c = *p++ & 0x80; + assert( *p==0 ); + + if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; + rc = fts3SegReaderIncrRead(pReader); + if( rc!=SQLITE_OK ) return rc; + } + p++; + + /* If required, populate the output variables with a pointer to and the + ** size of the previous offset-list. + */ + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = (int)(p - pReader->pOffsetList - 1); + } + + /* If there are no more entries in the doclist, set pOffsetList to + ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and + ** Fts3SegReader.pOffsetList to point to the next offset list before + ** returning. + */ + if( p>=&pReader->aDoclist[pReader->nDoclist] ){ + pReader->pOffsetList = 0; + }else{ + rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + sqlite3_int64 iDelta; + pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); + if( pTab->bDescIdx ){ + pReader->iDocid -= iDelta; + }else{ + pReader->iDocid += iDelta; + } + } + } + } + + return SQLITE_OK; +} + + +int sqlite3Fts3MsrOvfl( + Fts3Cursor *pCsr, + Fts3MultiSegReader *pMsr, + int *pnOvfl ){ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; - int rc = SQLITE_OK; /* Return code */ - int nCost = 0; /* Cost in bytes to return */ - int pgsz = p->nPgsz; /* Database page size */ - - /* If this seg-reader is reading the pending-terms table, or if all data - ** for the segment is stored on the root page of the b-tree, then the cost - ** is zero. In this case all required data is already in main memory. - */ - if( p->bHasStat - && !fts3SegReaderIsPending(pReader) - && !fts3SegReaderIsRootOnly(pReader) - ){ - int nBlob = 0; - sqlite3_int64 iBlock; - - if( pCsr->nRowAvg==0 ){ - /* The average document size, which is required to calculate the cost - ** of each doclist, has not yet been determined. Read the required - ** data from the %_stat table to calculate it. - ** - ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 - ** varints, where nCol is the number of columns in the FTS3 table. - ** The first varint is the number of documents currently stored in - ** the table. The following nCol varints contain the total amount of - ** data stored in all rows of each column of the table, from left - ** to right. - */ - sqlite3_stmt *pStmt; - sqlite3_int64 nDoc = 0; - sqlite3_int64 nByte = 0; - const char *pEnd; - const char *a; - - rc = sqlite3Fts3SelectDoctotal(p, &pStmt); - if( rc!=SQLITE_OK ) return rc; - a = sqlite3_column_blob(pStmt, 0); - assert( a ); - - pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; - a += sqlite3Fts3GetVarint(a, &nDoc); - while( anRowAvg = (int)(((nByte / nDoc) + pgsz) / pgsz); - assert( pCsr->nRowAvg>0 ); - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ) return rc; - } - - /* Assume that a blob flows over onto overflow pages if it is larger - ** than (pgsz-35) bytes in size (the file-format documentation - ** confirms this). - */ - for(iBlock=pReader->iStartBlock; iBlock<=pReader->iLeafEndBlock; iBlock++){ - rc = sqlite3Fts3ReadBlock(p, iBlock, 0, &nBlob); - if( rc!=SQLITE_OK ) break; - if( (nBlob+35)>pgsz ){ - int nOvfl = (nBlob + 34)/pgsz; - nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg); - } - } - } - - *pnCost += nCost; + int nOvfl = 0; + int ii; + int rc = SQLITE_OK; + int pgsz = p->nPgsz; + + assert( p->bHasStat ); + assert( pgsz>0 ); + + for(ii=0; rc==SQLITE_OK && iinSegment; ii++){ + Fts3SegReader *pReader = pMsr->apSegment[ii]; + if( !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + int jj; + for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ + int nBlob; + rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); + if( rc!=SQLITE_OK ) break; + if( (nBlob+35)>pgsz ){ + nOvfl += (nBlob + 34)/pgsz; + } + } + } + } + *pnOvfl = nOvfl; return rc; } /* ** Free all allocations associated with the iterator passed as the @@ -1157,10 +1336,11 @@ void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ if( pReader && !fts3SegReaderIsPending(pReader) ){ sqlite3_free(pReader->zTerm); if( !fts3SegReaderIsRootOnly(pReader) ){ sqlite3_free(pReader->aNode); + sqlite3_blob_close(pReader->pBlob); } } sqlite3_free(pReader); } @@ -1233,28 +1413,46 @@ } /* ** This function is used to allocate an Fts3SegReader that iterates through ** a subset of the terms stored in the Fts3Table.pendingTerms array. +** +** If the isPrefixIter parameter is zero, then the returned SegReader iterates +** through each term in the pending-terms table. Or, if isPrefixIter is +** non-zero, it iterates through each term and its prefixes. For example, if +** the pending terms hash table contains the terms "sqlite", "mysql" and +** "firebird", then the iterator visits the following 'terms' (in the order +** shown): +** +** f fi fir fire fireb firebi firebir firebird +** m my mys mysq mysql +** s sq sql sqli sqlit sqlite +** +** Whereas if isPrefixIter is zero, the terms visited are: +** +** firebird mysql sqlite */ int sqlite3Fts3SegReaderPending( Fts3Table *p, /* Virtual table handle */ + int iIndex, /* Index for p->aIndex */ const char *zTerm, /* Term to search for */ int nTerm, /* Size of buffer zTerm */ - int isPrefix, /* True for a term-prefix query */ + int bPrefix, /* True for a prefix iterator */ Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ ){ Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ int nElem = 0; /* Size of array at aElem */ int rc = SQLITE_OK; /* Return Code */ + Fts3Hash *pHash; - if( isPrefix ){ + pHash = &p->aIndex[iIndex].hPending; + if( bPrefix ){ int nAlloc = 0; /* Size of allocated array at aElem */ Fts3HashElem *pE = 0; /* Iterator variable */ - for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){ + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ char *zKey = (char *)fts3HashKey(pE); int nKey = fts3HashKeysize(pE); if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ if( nElem==nAlloc ){ Fts3HashElem **aElem2; @@ -1267,10 +1465,11 @@ nElem = 0; break; } aElem = aElem2; } + aElem[nElem++] = pE; } } /* If more than one term matches the prefix, sort the Fts3HashElem @@ -1280,11 +1479,13 @@ if( nElem>1 ){ qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); } }else{ - Fts3HashElem *pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm); + /* The query is a simple term lookup that matches at most one term in + ** the index. All that is required is a straight hash-lookup. */ + Fts3HashElem *pE = fts3HashFindElem(pHash, zTerm, nTerm); if( pE ){ aElem = &pE; nElem = 1; } } @@ -1300,11 +1501,11 @@ pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); } } - if( isPrefix ){ + if( bPrefix ){ sqlite3_free(aElem); } *ppReader = pReader; return rc; } @@ -1360,10 +1561,22 @@ if( pLhs->iDocid==pRhs->iDocid ){ rc = pRhs->iIdx - pLhs->iIdx; }else{ rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; } + } + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} +static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; + } } assert( pLhs->aNode && pRhs->aNode ); return rc; } @@ -1912,25 +2125,34 @@ } return rc; } /* -** Set *pnSegment to the total number of segments in the database. Set -** *pnMax to the largest segment level in the database (segment levels -** are stored in the 'level' column of the %_segdir table). +** Set *pnMax to the largest segment level in the database for the index +** iIndex. +** +** Segment levels are stored in the 'level' column of the %_segdir table. ** ** Return SQLITE_OK if successful, or an SQLite error code if not. */ -static int fts3SegmentCountMax(Fts3Table *p, int *pnSegment, int *pnMax){ +static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){ sqlite3_stmt *pStmt; int rc; + assert( iIndex>=0 && iIndexnIndex ); - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_COUNT_MAX, &pStmt, 0); + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); + sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1); if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pnSegment = sqlite3_column_int(pStmt, 0); - *pnMax = sqlite3_column_int(pStmt, 1); + *pnMax = sqlite3_column_int(pStmt, 0); } return sqlite3_reset(pStmt); } /* @@ -1947,10 +2169,11 @@ ** ** SQLITE_OK is returned if successful, otherwise an SQLite error code. */ static int fts3DeleteSegdir( Fts3Table *p, /* Virtual table handle */ + int iIndex, /* Index for p->aIndex */ int iLevel, /* Level of %_segdir entries to delete */ Fts3SegReader **apSegment, /* Array of SegReader objects */ int nReader /* Size of array apSegment */ ){ int rc; /* Return Code */ @@ -1969,23 +2192,28 @@ } if( rc!=SQLITE_OK ){ return rc; } + assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); if( iLevel==FTS3_SEGCURSOR_ALL ){ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); - }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ - sqlite3Fts3PendingTermsClear(p); + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL); + sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1); + } }else{ - assert( iLevel>=0 ); - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0); + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int(pDelete, 1, iLevel); - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); + sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel); } } + + if( rc==SQLITE_OK ){ + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } return rc; } /* @@ -2028,14 +2256,124 @@ } *ppList = pList; *pnList = nList; } + +int sqlite3Fts3MsrIncrStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + int iCol, /* Column to match on. */ + const char *zTerm, /* Term to iterate through a doclist for */ + int nTerm /* Number of bytes in zTerm */ +){ + int i; + int nSegment = pCsr->nSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + assert( pCsr->pFilter==0 ); + assert( zTerm && nTerm>0 ); + + /* Advance each segment iterator until it points to the term zTerm/nTerm. */ + for(i=0; iapSegment[i]; + do { + int rc = fts3SegReaderNext(p, pSeg, 1); + if( rc!=SQLITE_OK ) return rc; + }while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ); + } + fts3SegReaderSort(pCsr->apSegment, nSegment, nSegment, fts3SegReaderCmp); + + /* Determine how many of the segments actually point to zTerm/nTerm. */ + for(i=0; iapSegment[i]; + if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ + break; + } + } + pCsr->nAdvance = i; + + /* Advance each of the segments to point to the first docid. */ + for(i=0; inAdvance; i++){ + int rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); + + assert( iCol<0 || iColnColumn ); + pCsr->iColFilter = iCol; + + return SQLITE_OK; +} + +int sqlite3Fts3MsrIncrNext( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + sqlite3_int64 *piDocid, /* OUT: Docid value */ + char **paPoslist, /* OUT: Pointer to position list */ + int *pnPoslist /* OUT: Size of position list in bytes */ +){ + int nMerge = pMsr->nAdvance; + Fts3SegReader **apSegment = pMsr->apSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + if( nMerge==0 ){ + *paPoslist = 0; + return SQLITE_OK; + } + + while( 1 ){ + Fts3SegReader *pSeg; + pSeg = pMsr->apSegment[0]; + + if( pSeg->pOffsetList==0 ){ + *paPoslist = 0; + break; + }else{ + int rc; + char *pList; + int nList; + int j; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + + rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( rc==SQLITE_OK + && jpOffsetList + && apSegment[j]->iDocid==iDocid + ){ + rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + if( rc!=SQLITE_OK ) return rc; + fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); + + if( pMsr->iColFilter>=0 ){ + fts3ColumnFilter(pMsr->iColFilter, &pList, &nList); + } + + if( nList>0 ){ + *piDocid = iDocid; + *paPoslist = pList; + *pnPoslist = nList; + break; + } + } + + } + + return SQLITE_OK; +} int sqlite3Fts3SegReaderStart( Fts3Table *p, /* Virtual table handle */ - Fts3SegReaderCursor *pCsr, /* Cursor object */ + Fts3MultiSegReader *pCsr, /* Cursor object */ Fts3SegFilter *pFilter /* Restrictions on range of iteration */ ){ int i; /* Initialize the cursor object */ @@ -2050,11 +2388,11 @@ for(i=0; inSegment; i++){ int nTerm = pFilter->nTerm; const char *zTerm = pFilter->zTerm; Fts3SegReader *pSeg = pCsr->apSegment[i]; do { - int rc = fts3SegReaderNext(p, pSeg); + int rc = fts3SegReaderNext(p, pSeg, 0); if( rc!=SQLITE_OK ) return rc; }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ); } fts3SegReaderSort( pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp); @@ -2062,11 +2400,11 @@ return SQLITE_OK; } int sqlite3Fts3SegReaderStep( Fts3Table *p, /* Virtual table handle */ - Fts3SegReaderCursor *pCsr /* Cursor object */ + Fts3MultiSegReader *pCsr /* Cursor object */ ){ int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); @@ -2075,10 +2413,13 @@ int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; Fts3SegFilter *pFilter = pCsr->pFilter; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); if( pCsr->nSegment==0 ) return SQLITE_OK; do { int nMerge; @@ -2086,11 +2427,11 @@ /* Advance the first pCsr->nAdvance entries in the apSegment[] array ** forward. Then sort the list in order of current term again. */ for(i=0; inAdvance; i++){ - rc = fts3SegReaderNext(p, apSegment[i]); + rc = fts3SegReaderNext(p, apSegment[i], 0); if( rc!=SQLITE_OK ) return rc; } fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); pCsr->nAdvance = 0; @@ -2125,11 +2466,14 @@ ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); - if( nMerge==1 && !isIgnoreEmpty ){ + if( nMerge==1 + && !isIgnoreEmpty + && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) + ){ pCsr->aDoclist = apSegment[0]->aDoclist; pCsr->nDoclist = apSegment[0]->nDoclist; rc = SQLITE_ROW; }else{ int nDoclist = 0; /* Size of doclist */ @@ -2138,56 +2482,66 @@ /* The current term of the first nMerge entries in the array ** of Fts3SegReader objects is the same. The doclists must be merged ** and a single term returned with the merged doclist. */ for(i=0; ipOffsetList ){ int j; /* Number of segments that share a docid */ char *pList; int nList; int nByte; sqlite3_int64 iDocid = apSegment[0]->iDocid; - fts3SegReaderNextDocid(apSegment[0], &pList, &nList); + fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); j = 1; while( jpOffsetList && apSegment[j]->iDocid==iDocid ){ - fts3SegReaderNextDocid(apSegment[j], 0, 0); + fts3SegReaderNextDocid(p, apSegment[j], 0, 0); j++; } if( isColFilter ){ fts3ColumnFilter(pFilter->iCol, &pList, &nList); } if( !isIgnoreEmpty || nList>0 ){ - nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0); + + /* Calculate the 'docid' delta value to write into the merged + ** doclist. */ + sqlite3_int64 iDelta; + if( p->bDescIdx && nDoclist>0 ){ + iDelta = iPrev - iDocid; + }else{ + iDelta = iDocid - iPrev; + } + assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); + assert( nDoclist>0 || iDelta==iDocid ); + + nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); if( nDoclist+nByte>pCsr->nBuffer ){ char *aNew; pCsr->nBuffer = (nDoclist+nByte)*2; aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ return SQLITE_NOMEM; } pCsr->aBuffer = aNew; } - nDoclist += sqlite3Fts3PutVarint( - &pCsr->aBuffer[nDoclist], iDocid-iPrev - ); + nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); iPrev = iDocid; if( isRequirePos ){ memcpy(&pCsr->aBuffer[nDoclist], pList, nList); nDoclist += nList; pCsr->aBuffer[nDoclist++] = '\0'; } } - fts3SegReaderSort(apSegment, nMerge, j, fts3SegReaderDoclistCmp); + fts3SegReaderSort(apSegment, nMerge, j, xCmp); } if( nDoclist>0 ){ pCsr->aDoclist = pCsr->aBuffer; pCsr->nDoclist = nDoclist; rc = SQLITE_ROW; @@ -2196,13 +2550,14 @@ pCsr->nAdvance = nMerge; }while( rc==SQLITE_OK ); return rc; } + void sqlite3Fts3SegReaderFinish( - Fts3SegReaderCursor *pCsr /* Cursor object */ + Fts3MultiSegReader *pCsr /* Cursor object */ ){ if( pCsr ){ int i; for(i=0; inSegment; i++){ sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); @@ -2225,47 +2580,60 @@ ** If this function is called with iLevel<0, but there is only one ** segment in the database, SQLITE_DONE is returned immediately. ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, ** an SQLite error code is returned. */ -static int fts3SegmentMerge(Fts3Table *p, int iLevel){ +static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ - int iNewLevel = 0; /* Level to create new segment at */ + int iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ - Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */ + Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ + int bIgnoreEmpty = 0; /* True to ignore empty segments */ + + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevel=0 && iIndexnIndex ); - rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr); + rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the the numerically - ** greatest segment level currently present in the database. The index - ** of the new segment is always 0. */ - int nDummy; /* TODO: Remove this */ + ** greatest segment level currently present in the database for this + ** index. The idx of the new segment is always 0. */ if( csr.nSegment==1 ){ rc = SQLITE_DONE; goto finished; } - rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel); + rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel); + bIgnoreEmpty = 1; + + }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ + iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL; + rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx); }else{ - /* This call is to merge all segments at level iLevel. Find the next + /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ - iNewLevel = iLevel+1; - rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx); + rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx); + iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1; } if( rc!=SQLITE_OK ) goto finished; assert( csr.nSegment>0 ); - assert( iNewLevel>=0 ); + assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) ); + assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) ); memset(&filter, 0, sizeof(Fts3SegFilter)); filter.flags = FTS3_SEGMENT_REQUIRE_POS; - filter.flags |= (iLevel==FTS3_SEGCURSOR_ALL ? FTS3_SEGMENT_IGNORE_EMPTY : 0); + filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); while( SQLITE_OK==rc ){ rc = sqlite3Fts3SegReaderStep(p, &csr); if( rc!=SQLITE_ROW ) break; @@ -2273,12 +2641,14 @@ csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); } if( rc!=SQLITE_OK ) goto finished; assert( pWriter ); - rc = fts3DeleteSegdir(p, iLevel, csr.apSegment, csr.nSegment); - if( rc!=SQLITE_OK ) goto finished; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment); + if( rc!=SQLITE_OK ) goto finished; + } rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); finished: fts3SegWriterFree(pWriter); sqlite3Fts3SegReaderFinish(&csr); @@ -2285,14 +2655,21 @@ return rc; } /* -** Flush the contents of pendingTerms to a level 0 segment. +** Flush the contents of pendingTerms to level 0 segments. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ - return fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING); + int rc = SQLITE_OK; + int i; + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + sqlite3Fts3PendingTermsClear(p); + return rc; } /* ** Encode N integers as varints into a blob. */ @@ -2438,10 +2815,27 @@ sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC); sqlite3_step(pStmt); *pRC = sqlite3_reset(pStmt); sqlite3_free(a); } + +static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ + int i; + int bSeenDone = 0; + int rc = SQLITE_OK; + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL); + if( rc==SQLITE_DONE ){ + bSeenDone = 1; + rc = SQLITE_OK; + } + } + sqlite3Fts3SegmentsClose(p); + sqlite3Fts3PendingTermsClear(p); + + return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; +} /* ** Handle a 'special' INSERT of the form: ** ** "INSERT INTO tbl(tbl) VALUES()" @@ -2455,16 +2849,11 @@ int nVal = sqlite3_value_bytes(pVal); if( !zVal ){ return SQLITE_NOMEM; }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ - rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL); - if( rc==SQLITE_DONE ){ - rc = SQLITE_OK; - }else{ - sqlite3Fts3PendingTermsClear(p); - } + rc = fts3DoOptimize(p, 0); #ifdef SQLITE_TEST }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ p->nNodeSize = atoi(&zVal[9]); rc = SQLITE_OK; }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ @@ -2473,61 +2862,23 @@ #endif }else{ rc = SQLITE_ERROR; } - sqlite3Fts3SegmentsClose(p); return rc; } -/* -** Return the deferred doclist associated with deferred token pDeferred. -** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already -** been called to allocate and populate the doclist. -*/ -char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){ - if( pDeferred->pList ){ - *pnByte = pDeferred->pList->nData; - return pDeferred->pList->aData; - } - *pnByte = 0; - return 0; -} - -/* -** Helper fucntion for FreeDeferredDoclists(). This function removes all -** references to deferred doclists from within the tree of Fts3Expr -** structures headed by -*/ -static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ - if( pExpr ){ - fts3DeferredDoclistClear(pExpr->pLeft); - fts3DeferredDoclistClear(pExpr->pRight); - if( pExpr->isLoaded ){ - sqlite3_free(pExpr->aDoclist); - pExpr->isLoaded = 0; - pExpr->aDoclist = 0; - pExpr->nDoclist = 0; - pExpr->pCurrent = 0; - pExpr->iCurrent = 0; - } - } -} - /* ** Delete all cached deferred doclists. Deferred doclists are cached ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. */ void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ Fts3DeferredToken *pDef; for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ - sqlite3_free(pDef->pList); + fts3PendingListDelete(pDef->pList); pDef->pList = 0; } - if( pCsr->pDeferred ){ - fts3DeferredDoclistClear(pCsr->pExpr); - } } /* ** Free all entries in the pCsr->pDeffered list. Entries are added to ** this list using sqlite3Fts3DeferToken(). @@ -2535,11 +2886,11 @@ void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ Fts3DeferredToken *pDef; Fts3DeferredToken *pNext; for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ pNext = pDef->pNext; - sqlite3_free(pDef->pList); + fts3PendingListDelete(pDef->pList); sqlite3_free(pDef); } pCsr->pDeferred = 0; } @@ -2599,10 +2950,37 @@ } } return rc; } + +int sqlite3Fts3DeferredTokenList( + Fts3DeferredToken *p, + char **ppData, + int *pnData +){ + char *pRet; + int nSkip; + sqlite3_int64 dummy; + + *ppData = 0; + *pnData = 0; + + if( p->pList==0 ){ + return SQLITE_OK; + } + + pRet = (char *)sqlite3_malloc(p->pList->nData); + if( !pRet ) return SQLITE_NOMEM; + + nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); + *pnData = p->pList->nData - nSkip; + *ppData = pRet; + + memcpy(pRet, &p->pList->aData[nSkip], *pnData); + return SQLITE_OK; +} /* ** Add an entry for token pToken to the pCsr->pDeferred list. */ int sqlite3Fts3DeferToken( @@ -2674,11 +3052,11 @@ ){ Fts3Table *p = (Fts3Table *)pVtab; int rc = SQLITE_OK; /* Return Code */ int isRemove = 0; /* True for an UPDATE or DELETE */ sqlite3_int64 iRemove = 0; /* Rowid removed by UPDATE or DELETE */ - u32 *aSzIns; /* Sizes of inserted documents */ + u32 *aSzIns = 0; /* Sizes of inserted documents */ u32 *aSzDel; /* Sizes of deleted documents */ int nChng = 0; /* Net change in number of documents */ int bInsertDone = 0; assert( p->pSegments==0 ); @@ -2689,16 +3067,20 @@ */ if( nArg>1 && sqlite3_value_type(apVal[0])==SQLITE_NULL && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){ - return fts3SpecialInsert(p, apVal[p->nColumn+2]); + rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); + goto update_out; } /* Allocate space to hold the change in document sizes */ aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 ); - if( aSzIns==0 ) return SQLITE_NOMEM; + if( aSzIns==0 ){ + rc = SQLITE_NOMEM; + goto update_out; + } aSzDel = &aSzIns[p->nColumn+1]; memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2); /* If this is an INSERT operation, or an UPDATE that modifies the rowid ** value, then this operation requires constraint handling. @@ -2744,12 +3126,11 @@ bInsertDone = 1; } } } if( rc!=SQLITE_OK ){ - sqlite3_free(aSzIns); - return rc; + goto update_out; } /* If this is a DELETE or UPDATE operation, remove the old record. */ if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); @@ -2778,10 +3159,11 @@ if( p->bHasStat ){ fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); } + update_out: sqlite3_free(aSzIns); sqlite3Fts3SegmentsClose(p); return rc; } @@ -2792,16 +3174,14 @@ */ int sqlite3Fts3Optimize(Fts3Table *p){ int rc; rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); if( rc==SQLITE_OK ){ - rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL); - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); - if( rc==SQLITE_OK ){ - sqlite3Fts3PendingTermsClear(p); - } + rc = fts3DoOptimize(p, 1); + if( rc==SQLITE_OK || rc==SQLITE_DONE ){ + int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); + if( rc2!=SQLITE_OK ) rc = rc2; }else{ sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); } } Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -218,10 +218,11 @@ # Source code to the test files. # TESTSRC = \ + $(TOP)/ext/fts3/fts3_test.c \ $(TOP)/src/test1.c \ $(TOP)/src/test2.c \ $(TOP)/src/test3.c \ $(TOP)/src/test4.c \ $(TOP)/src/test5.c \ Index: src/tclsqlite.c ================================================================== --- src/tclsqlite.c +++ src/tclsqlite.c @@ -3582,10 +3582,14 @@ extern int Sqlitemultiplex_Init(Tcl_Interp*); extern int SqliteSuperlock_Init(Tcl_Interp*); extern int SqlitetestSyscall_Init(Tcl_Interp*); extern int Sqlitetestfuzzer_Init(Tcl_Interp*); extern int Sqlitetestwholenumber_Init(Tcl_Interp*); + +#ifdef SQLITE_ENABLE_FTS3 + extern int Sqlitetestfts3_Init(Tcl_Interp *interp); +#endif #ifdef SQLITE_ENABLE_ZIPVFS extern int Zipvfs_Init(Tcl_Interp*); Zipvfs_Init(interp); #endif @@ -3622,10 +3626,14 @@ Sqlitemultiplex_Init(interp); SqliteSuperlock_Init(interp); SqlitetestSyscall_Init(interp); Sqlitetestfuzzer_Init(interp); Sqlitetestwholenumber_Init(interp); + +#ifdef SQLITE_ENABLE_FTS3 + Sqlitetestfts3_Init(interp); +#endif Tcl_CreateObjCommand(interp,"load_testfixture_extensions",init_all_cmd,0,0); #ifdef SQLITE_SSE Sqlitetestsse_Init(interp); ADDED test/fts3auto.test Index: test/fts3auto.test ================================================================== --- /dev/null +++ test/fts3auto.test @@ -0,0 +1,535 @@ +# 2011 June 10 +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If this build does not include FTS3, skip the tests in this file. +# +ifcapable !fts3 { finish_test ; return } +source $testdir/fts3_common.tcl +source $testdir/malloc_common.tcl + +set testprefix fts3auto +set sfep $sqlite_fts3_enable_parentheses +set sqlite_fts3_enable_parentheses 1 + +#-------------------------------------------------------------------------- +# Start of Tcl infrastructure used by tests. The entry points are: +# +# do_fts3query_test +# fts3_make_deferrable +# fts3_zero_long_segments +# + +# +# do_fts3query_test TESTNAME ?OPTIONS? TABLE MATCHEXPR +# +# This proc runs several test cases on FTS3/4 table $TABLE using match +# expression $MATCHEXPR. All documents in $TABLE must be formatted so that +# they can be "tokenized" using the Tcl list commands (llength, lindex etc.). +# The name and column names used by $TABLE must not require any quoting or +# escaping when used in SQL statements. +# +# $MATCHINFO may be any expression accepted by the FTS4 MATCH operator, +# except that the ":token" syntax is not supported. Tcl list +# commands are used to tokenize the expression. Any parenthesis must appear +# either as separate list elements, or as the first (for opening) or last +# (for closing) character of a list element. i.e. the expression "(a OR b)c" +# will not be parsed correctly, but "( a OR b) c" will. +# +# Available OPTIONS are: +# +# -deferred TOKENLIST +# +# If the "deferred" option is supplied, it is passed a list of tokens that +# are deferred by FTS and result in the relevant matchinfo() stats being an +# approximation. +# +set sqlite_fts3_enable_parentheses 1 +proc do_fts3query_test {tn args} { + + set nArg [llength $args] + if {$nArg < 2 || ($nArg % 2)} { + set cmd do_fts3query_test + error "wrong # args: should be \"$cmd ?-deferred LIST? TABLE MATCHEXPR\"" + } + set tbl [lindex $args [expr $nArg-2]] + set match [lindex $args [expr $nArg-1]] + set deferred [list] + + foreach {k v} [lrange $args 0 [expr $nArg-3]] { + switch -- $k { + -deferred { + set deferred $v + } + default { + error "bad option \"$k\": must be -deferred" + } + } + } + + get_near_results $tbl $match $deferred aMatchinfo + + set matchinfo_asc [list] + foreach docid [lsort -integer -incr [array names aMatchinfo]] { + lappend matchinfo_asc $docid $aMatchinfo($docid) + } + set matchinfo_desc [list] + foreach docid [lsort -integer -decr [array names aMatchinfo]] { + lappend matchinfo_desc $docid $aMatchinfo($docid) + } + + set title "(\"$match\" -> [llength [array names aMatchinfo]] rows)" + + do_execsql_test $tn$title.1 " + SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid ASC + " [lsort -integer -incr [array names aMatchinfo]] + + do_execsql_test $tn$title.2 " + SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid DESC + " [lsort -integer -decr [array names aMatchinfo]] + + do_execsql_test $tn$title.3 " + SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl + WHERE $tbl MATCH '$match' ORDER BY docid DESC + " $matchinfo_desc + + do_execsql_test $tn$title.4 " + SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl + WHERE $tbl MATCH '$match' ORDER BY docid ASC + " $matchinfo_asc +} + +# fts3_make_deferrable TABLE TOKEN +# +proc fts3_make_deferrable {tbl token} { + + set stmt [sqlite3_prepare db "SELECT * FROM $tbl" -1 dummy] + set name [sqlite3_column_name $stmt 0] + sqlite3_finalize $stmt + + set nRow [db one "SELECT count(*) FROM $tbl"] + set pgsz [db one "PRAGMA page_size"] + execsql BEGIN + for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} { + set doc [string repeat "$token " 100] + execsql "INSERT INTO $tbl ($name) VALUES(\$doc)" + } + execsql "INSERT INTO $tbl ($name) VALUES('aaaaaaa ${token}aaaaa')" + execsql COMMIT + + return [expr $nRow*$pgsz] +} + +# fts3_zero_long_segments TABLE ?LIMIT? +# +proc fts3_zero_long_segments {tbl limit} { + execsql " + UPDATE ${tbl}_segments + SET block = zeroblob(length(block)) + WHERE length(block)>$limit + " + return [db changes] +} + + +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit + +proc fix_near_expr {expr} { + set out [list] + lappend out [lindex $expr 0] + foreach {a b} [lrange $expr 1 end] { + if {[string match -nocase near $a]} { set a 10 } + if {[string match -nocase near/* $a]} { set a [string range $a 5 end] } + lappend out $a + lappend out $b + } + return $out +} + +proc get_single_near_results {tbl expr deferred arrayvar nullvar} { + upvar $arrayvar aMatchinfo + upvar $nullvar nullentry + catch {array unset aMatchinfo} + + set expr [fix_near_expr $expr] + + # Calculate the expected results using [fts3_near_match]. The following + # loop populates the "hits" and "counts" arrays as follows: + # + # 1. For each document in the table that matches the NEAR expression, + # hits($docid) is set to 1. The set of docids that match the expression + # can therefore be found using [array names hits]. + # + # 2. For each column of each document in the table, counts($docid,$iCol) + # is set to the -phrasecountvar output. + # + set res [list] + catch { array unset hits } + db eval "SELECT docid, * FROM $tbl" d { + set iCol 0 + foreach col [lrange $d(*) 1 end] { + set docid $d(docid) + set hit [fts3_near_match $d($col) $expr -p counts($docid,$iCol)] + if {$hit} { set hits($docid) 1 } + incr iCol + } + } + set nPhrase [expr ([llength $expr]+1)/2] + set nCol $iCol + + # This block populates the nHit and nDoc arrays. For each phrase/column + # in the query/table, array elements are set as follows: + # + # nHit($iPhrase,$iCol) - Total number of hits for phrase $iPhrase in + # column $iCol. + # + # nDoc($iPhrase,$iCol) - Number of documents with at least one hit for + # phrase $iPhrase in column $iCol. + # + for {set iPhrase 0} {$iPhrase < $nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol < $nCol} {incr iCol} { + set nHit($iPhrase,$iCol) 0 + set nDoc($iPhrase,$iCol) 0 + } + } + foreach key [array names counts] { + set iCol [lindex [split $key ,] 1] + set iPhrase 0 + foreach c $counts($key) { + if {$c>0} { incr nDoc($iPhrase,$iCol) 1 } + incr nHit($iPhrase,$iCol) $c + incr iPhrase + } + } + + if {[llength $deferred] && [llength $expr]==1} { + set phrase [lindex $expr 0] + set rewritten [list] + set partial 0 + foreach tok $phrase { + if {[lsearch $deferred $tok]>=0} { + lappend rewritten * + } else { + lappend rewritten $tok + set partial 1 + } + } + if {$partial==0} { + set tblsize [db one "SELECT count(*) FROM $tbl"] + for {set iCol 0} {$iCol < $nCol} {incr iCol} { + set nHit(0,$iCol) $tblsize + set nDoc(0,$iCol) $tblsize + } + } elseif {$rewritten != $phrase} { + while {[lindex $rewritten end] == "*"} { + set rewritten [lrange $rewritten 0 end-1] + } + while {[lindex $rewritten 0] == "*"} { + set rewritten [lrange $rewritten 1 end] + } + get_single_near_results $tbl [list $rewritten] {} aRewrite nullentry + foreach docid [array names hits] { + set aMatchinfo($docid) $aRewrite($docid) + } + return + } + } + + # Set up the aMatchinfo array. For each document, set aMatchinfo($docid) to + # contain the output of matchinfo('x') for the document. + # + foreach docid [array names hits] { + set mi [list] + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol<$nCol} {incr iCol} { + lappend mi [lindex $counts($docid,$iCol) $iPhrase] + lappend mi $nHit($iPhrase,$iCol) + lappend mi $nDoc($iPhrase,$iCol) + } + } + set aMatchinfo($docid) $mi + } + + # Set up the nullentry output. + # + set nullentry [list] + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol<$nCol} {incr iCol} { + lappend nullentry 0 $nHit($iPhrase,$iCol) $nDoc($iPhrase,$iCol) + } + } +} + + +proc matching_brackets {expr} { + if {[string range $expr 0 0]!="(" || [string range $expr end end] !=")"} { + return 0 + } + + set iBracket 1 + set nExpr [string length $expr] + for {set i 1} {$iBracket && $i < $nExpr} {incr i} { + set c [string range $expr $i $i] + if {$c == "("} {incr iBracket} + if {$c == ")"} {incr iBracket -1} + } + + return [expr ($iBracket==0 && $i==$nExpr)] +} + +proc get_near_results {tbl expr deferred arrayvar {nullvar ""}} { + upvar $arrayvar aMatchinfo + if {$nullvar != ""} { upvar $nullvar nullentry } + + set expr [string trim $expr] + while { [matching_brackets $expr] } { + set expr [string trim [string range $expr 1 end-1]] + } + + set prec(NOT) 1 + set prec(AND) 2 + set prec(OR) 3 + + set currentprec 0 + set iBracket 0 + set expr_length [llength $expr] + for {set i 0} {$i < $expr_length} {incr i} { + set op [lindex $expr $i] + if {$iBracket==0 && [info exists prec($op)] && $prec($op)>=$currentprec } { + set opidx $i + set currentprec $prec($op) + } else { + for {set j 0} {$j < [string length $op]} {incr j} { + set c [string range $op $j $j] + if {$c == "("} { incr iBracket +1 } + if {$c == ")"} { incr iBracket -1 } + } + } + } + if {$iBracket!=0} { error "mismatched brackets in: $expr" } + + if {[info exists opidx]==0} { + get_single_near_results $tbl $expr $deferred aMatchinfo nullentry + } else { + set eLeft [lrange $expr 0 [expr $opidx-1]] + set eRight [lrange $expr [expr $opidx+1] end] + + get_near_results $tbl $eLeft $deferred aLeft nullleft + get_near_results $tbl $eRight $deferred aRight nullright + + switch -- [lindex $expr $opidx] { + "NOT" { + foreach hit [array names aLeft] { + if {0==[info exists aRight($hit)]} { + set aMatchinfo($hit) $aLeft($hit) + } + } + set nullentry $nullleft + } + + "AND" { + foreach hit [array names aLeft] { + if {[info exists aRight($hit)]} { + set aMatchinfo($hit) [concat $aLeft($hit) $aRight($hit)] + } + } + set nullentry [concat $nullleft $nullright] + } + + "OR" { + foreach hit [array names aLeft] { + if {[info exists aRight($hit)]} { + set aMatchinfo($hit) [concat $aLeft($hit) $aRight($hit)] + unset aRight($hit) + } else { + set aMatchinfo($hit) [concat $aLeft($hit) $nullright] + } + } + foreach hit [array names aRight] { + set aMatchinfo($hit) [concat $nullleft $aRight($hit)] + } + + set nullentry [concat $nullleft $nullright] + } + } + } +} + + +# End of test procs. Actual tests are below this line. +#-------------------------------------------------------------------------- + +#-------------------------------------------------------------------------- +# The following test cases - fts3auto-1.* - focus on testing the Tcl +# command [fts3_near_match], which is used by other tests in this file. +# +proc test_fts3_near_match {tn doc expr res} { + fts3_near_match $doc $expr -phrasecountvar p + uplevel do_test [list $tn] [list [list set {} $p]] [list $res] +} + +test_fts3_near_match 1.1.1 {a b c a b} a {2} +test_fts3_near_match 1.1.2 {a b c a b} {a 5 b 6 c} {2 2 1} +test_fts3_near_match 1.1.3 {a b c a b} {"a b"} {2} +test_fts3_near_match 1.1.4 {a b c a b} {"b c"} {1} +test_fts3_near_match 1.1.5 {a b c a b} {"c c"} {0} + +test_fts3_near_match 1.2.1 "a b c d e f g" {b 2 f} {0 0} +test_fts3_near_match 1.2.2 "a b c d e f g" {b 3 f} {1 1} +test_fts3_near_match 1.2.3 "a b c d e f g" {f 2 b} {0 0} +test_fts3_near_match 1.2.4 "a b c d e f g" {f 3 b} {1 1} +test_fts3_near_match 1.2.5 "a b c d e f g" {"a b" 2 "f g"} {0 0} +test_fts3_near_match 1.2.6 "a b c d e f g" {"a b" 3 "f g"} {1 1} + +set A "a b c d e f g h i j k l m n o p q r s t u v w x y z" +test_fts3_near_match 1.3.1 $A {"c d" 5 "i j" 1 "e f"} {0 0 0} +test_fts3_near_match 1.3.2 $A {"c d" 5 "i j" 2 "e f"} {1 1 1} + +#-------------------------------------------------------------------------- +# Test cases fts3auto-2.* run some simple tests using the +# [do_fts3query_test] proc. +# +foreach {tn create} { + 1 "fts4(a, b)" + 2 "fts4(a, b, order=DESC)" + 3 "fts4(a, b, order=ASC)" + 4 "fts4(a, b, prefix=1)" + 5 "fts4(a, b, order=DESC, prefix=1)" + 6 "fts4(a, b, order=ASC, prefix=1)" +} { + do_test 2.$tn.1 { + catchsql { DROP TABLE t1 } + execsql "CREATE VIRTUAL TABLE t1 USING $create" + for {set i 0} {$i<32} {incr i} { + set doc [list] + if {$i&0x01} {lappend doc one} + if {$i&0x02} {lappend doc two} + if {$i&0x04} {lappend doc three} + if {$i&0x08} {lappend doc four} + if {$i&0x10} {lappend doc five} + execsql { INSERT INTO t1 VALUES($doc, null) } + } + } {} + + foreach {tn2 expr} { + 1 {one} + 2 {one NEAR/1 five} + 3 {t*} + 4 {t* NEAR/0 five} + 5 {o* NEAR/1 f*} + 6 {one NEAR five NEAR two NEAR four NEAR three} + 7 {one NEAR xyz} + 8 {one OR two} + 9 {one AND two} + 10 {one NOT two} + 11 {one AND two OR three} + 12 {three OR one AND two} + 13 {(three OR one) AND two} + 14 {(three OR one) AND two NOT (five NOT four)} + 15 {"one two"} + 16 {"one two" NOT "three four"} + } { + do_fts3query_test 2.$tn.2.$tn2 t1 $expr + } +} + +#-------------------------------------------------------------------------- +# Some test cases involving deferred tokens. +# + +foreach {tn create} { + 1 "fts4(x)" + 2 "fts4(x, order=DESC)" +} { + catchsql { DROP TABLE t1 } + execsql "CREATE VIRTUAL TABLE t1 USING $create" + do_execsql_test 3.$tn.1 { + INSERT INTO t1(docid, x) VALUES(-2, 'a b c d e f g h i j k'); + INSERT INTO t1(docid, x) VALUES(-1, 'b c d e f g h i j k a'); + INSERT INTO t1(docid, x) VALUES(0, 'c d e f g h i j k a b'); + INSERT INTO t1(docid, x) VALUES(1, 'd e f g h i j k a b c'); + INSERT INTO t1(docid, x) VALUES(2, 'e f g h i j k a b c d'); + INSERT INTO t1(docid, x) VALUES(3, 'f g h i j k a b c d e'); + INSERT INTO t1(docid, x) VALUES(4, 'a c e g i k'); + INSERT INTO t1(docid, x) VALUES(5, 'a d g j'); + INSERT INTO t1(docid, x) VALUES(6, 'c a b'); + } + + set limit [fts3_make_deferrable t1 c] + + do_fts3query_test 3.$tn.2.1 t1 {a OR c} + + do_test 3.$tn.3 { + fts3_zero_long_segments t1 $limit + } {1} + + foreach {tn2 expr def} { + 1 {a NEAR c} {} + 2 {a AND c} c + 3 {"a c"} c + 4 {"c a"} c + 5 {"a c" NEAR/1 g} {} + 6 {"a c" NEAR/0 g} {} + } { + do_fts3query_test 3.$tn.4.$tn2 -deferred $def t1 $expr + } +} + +#-------------------------------------------------------------------------- +# +foreach {tn create} { + 1 "fts4(x, y)" + 2 "fts4(x, y, order=DESC)" + 3 "fts4(x, y, order=DESC, prefix=2)" +} { + + execsql [subst { + DROP TABLE t1; + CREATE VIRTUAL TABLE t1 USING $create; + INSERT INTO t1 VALUES('one two five four five', ''); + INSERT INTO t1 VALUES('', 'one two five four five'); + INSERT INTO t1 VALUES('one two', 'five four five'); + }] + + do_fts3query_test 4.$tn.1.1 t1 {one AND five} + do_fts3query_test 4.$tn.1.2 t1 {one NEAR five} + do_fts3query_test 4.$tn.1.3 t1 {one NEAR/1 five} + do_fts3query_test 4.$tn.1.4 t1 {one NEAR/2 five} + do_fts3query_test 4.$tn.1.5 t1 {one NEAR/3 five} + + do_test 4.$tn.2 { + set limit [fts3_make_deferrable t1 five] + execsql { INSERT INTO t1(t1) VALUES('optimize') } + expr {[fts3_zero_long_segments t1 $limit]>0} + } {1} + + do_fts3query_test 4.$tn.3.1 -deferred five t1 {one AND five} + do_fts3query_test 4.$tn.3.2 -deferred five t1 {one NEAR five} + do_fts3query_test 4.$tn.3.3 -deferred five t1 {one NEAR/1 five} + do_fts3query_test 4.$tn.3.4 -deferred five t1 {one NEAR/2 five} + do_fts3query_test 4.$tn.3.5 -deferred five t1 {one NEAR/3 five} + + do_fts3query_test 4.$tn.4.1 -deferred fi* t1 {on* AND fi*} + do_fts3query_test 4.$tn.4.2 -deferred fi* t1 {on* NEAR fi*} + do_fts3query_test 4.$tn.4.3 -deferred fi* t1 {on* NEAR/1 fi*} + do_fts3query_test 4.$tn.4.4 -deferred fi* t1 {on* NEAR/2 fi*} + do_fts3query_test 4.$tn.4.5 -deferred fi* t1 {on* NEAR/3 fi*} +} + +set sqlite_fts3_enable_parentheses $sfep +finish_test + Index: test/fts3defer.test ================================================================== --- test/fts3defer.test +++ test/fts3defer.test @@ -18,10 +18,12 @@ return } set sqlite_fts3_enable_parentheses 1 +set fts3_simple_deferred_tokens_only 1 + set ::testprefix fts3defer #-------------------------------------------------------------------------- # Test cases fts3defer-1.* are the "warm body" cases. The database contains # one row with 15000 instances of the token "a". This makes the doclist for @@ -255,11 +257,10 @@ SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf' } {13 29 40 47 48 52 63 92} do_select_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh' } {100} -if {$tn==3} breakpoint do_select_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf' } {7 70 98} do_select_test 1.4 { SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk' @@ -280,17 +281,20 @@ SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf' } {7 70 98} do_select_test 1.10 { SELECT rowid FROM t1 WHERE t1 MATCH 'z* vgsld' } {10 13 17 31 35 51 58 88 89 90 93 100} - do_select_test 1.11 { - SELECT rowid FROM t1 - WHERE t1 MATCH '( - zdu OR zexh OR zf OR zhbrzadb OR zidhxhbtv OR - zk OR zkhdvkw OR zm OR zsmhnf - ) vgsld' - } {10 13 17 31 35 51 58 88 89 90 93 100} + + if { $fts3_simple_deferred_tokens_only==0 } { + do_select_test 1.11 { + SELECT rowid FROM t1 + WHERE t1 MATCH '( + zdu OR zexh OR zf OR zhbrzadb OR zidhxhbtv OR + zk OR zkhdvkw OR zm OR zsmhnf + ) vgsld' + } {10 13 17 31 35 51 58 88 89 90 93 100} + } do_select_test 2.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm agmckuiu"' } {3 24 52 53} do_select_test 2.2 { @@ -362,10 +366,11 @@ # zeroed. Change this by messing with the [set dmt_modes] commands above. # foreach DO_MALLOC_TEST $dmt_modes { # Phrase search. + # do_select_test 5.$DO_MALLOC_TEST.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"' } {8 15 36 64 67 72} # Multiple tokens search. @@ -414,13 +419,15 @@ SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk"' } {8} do_select_test 6.2.2 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"' } {15 26 92 96} - do_select_test 6.2.3 { - SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' - } {8 15 26 92 96} + if {$fts3_simple_deferred_tokens_only==0} { + do_select_test 6.2.3 { + SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' + } {8 15 26 92 96} + } } set testprefix fts3defer do_execsql_test 3.1 { Index: test/fts3defer2.test ================================================================== --- test/fts3defer2.test +++ test/fts3defer2.test @@ -46,10 +46,14 @@ do_execsql_test 1.1.4 { SELECT count(*) FROM t1_segments WHERE length(block)>10000; UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } {2} +do_execsql_test 1.2.0 { + SELECT content FROM t1 WHERE t1 MATCH 'f (e a)'; +} {{a b c d e f a x y}} + do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.2 { @@ -56,20 +60,20 @@ SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ - [list 3 1 1 1 1 1 8 8 1 8 8 8 5001 9] + [list 3 1 1 1 1 1 1 1 1 1 1 8 5001 9] ] do_execsql_test 1.2.3 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)'; } [list \ {[a] b c d [e] [f] [a] x y} \ {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1} \ - [list 3 1 1 1 1 1 8 8 2 8 8 8 5001 9] + [list 3 1 1 1 1 1 1 1 2 2 1 8 5001 9] ] do_execsql_test 1.3.1 { DROP TABLE t1 } #----------------------------------------------------------------------------- @@ -97,12 +101,18 @@ } [list \ [list 2 1 1 54 54 1 3 3 54 372 8] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] - set sqlite_fts3_enable_parentheses 1 do_execsql_test 2.2.$tn.2 { + SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g z'; + } [list \ + [list 1 2 2 1 54 54] \ + ] + + set sqlite_fts3_enable_parentheses 1 + do_execsql_test 2.2.$tn.3 { SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g OR (g z)'; } [list \ [list 1 2 2 1 2 2 1 54 54] \ [list 1 2 2 1 2 2 0 54 54] \ ] Index: test/fts3matchinfo.test ================================================================== --- test/fts3matchinfo.test +++ test/fts3matchinfo.test @@ -66,11 +66,15 @@ SELECT mtchinfo FROM t3; } {{Beside the lake, beneath the trees}} do_execsql_test 3.2 { CREATE VIRTUAL TABLE xx USING FTS4; +} +do_execsql_test 3.3 { SELECT * FROM xx WHERE xx MATCH 'abc'; +} +do_execsql_test 3.4 { SELECT * FROM xx WHERE xx MATCH 'a b c'; } #-------------------------------------------------------------------------- @@ -238,13 +242,17 @@ do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; -do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { - x {{5 8 2 5 5 5} {3 8 2 3 5 5}} - s {2 1} +# It used to be that the second 'a' token would be deferred. That doesn't +# work any longer. +if 0 { + do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { + x {{5 8 2 5 5 5} {3 8 2 3 5 5}} + s {2 1} + } } do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } ADDED test/fts3prefix.test Index: test/fts3prefix.test ================================================================== --- /dev/null +++ test/fts3prefix.test @@ -0,0 +1,203 @@ +# 2011 May 04 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts3prefix + +ifcapable !fts3 { + finish_test + return +} + +# This proc tests that the prefixes index appears to represent the same content +# as the terms index. +# +proc fts3_terms_and_prefixes {db tbl prefixlengths} { + + set iIndex 0 + foreach len $prefixlengths { + incr iIndex + $db eval { + DROP TABLE IF EXISTS fts3check1; + DROP TABLE IF EXISTS fts3check2; + } + $db eval "CREATE VIRTUAL TABLE fts3check1 USING fts4term($tbl, 0);" + $db eval "CREATE VIRTUAL TABLE fts3check2 USING fts4term($tbl, $iIndex);" + + $db eval { + DROP TABLE IF EXISTS temp.terms; + DROP TABLE IF EXISTS temp.prefixes; + CREATE TEMP TABLE terms AS SELECT * FROM fts3check1; + CREATE TEMP TABLE prefixes AS SELECT * FROM fts3check2; + CREATE INDEX temp.idx ON prefixes(term); + DROP TABLE fts3check1; + DROP TABLE fts3check2; + } + + set nExpect 0 + $db eval { SELECT term, docid, col, pos FROM temp.terms } a { + if {[string length $a(term)]<$len} continue + incr nExpect + set prefix [string range $a(term) 0 [expr $len-1]] + set r [$db one { + SELECT count(*) FROM temp.prefixes WHERE + term = $prefix AND docid = $a(docid) AND col = $a(col) AND pos = $a(pos) + }] + if {$r != 1} { + error "$t, $a(docid), $a(col), $a(pos)" + } + } + + set nCount [$db one {SELECT count(*) FROM temp.prefixes}] + if {$nCount != $nExpect} { + error "prefixes.count(*) is $nCount expected $nExpect" + } + + execsql { DROP TABLE temp.prefixes } + execsql { DROP TABLE temp.terms } + + set list [list] + $db eval " + SELECT sum( 1 << (16*(level%1024)) ) AS total, (level/1024) AS tree + FROM ${tbl}_segdir GROUP BY tree + " { + lappend list [list $total $tree] + } + + if { [lsort -integer -index 0 $list] != [lsort -integer -index 1 $list] } { + error "inconsistent tree structures: $list" + } + } + + return "" +} +proc fts3_tap_test {tn db tbl lens} { + uplevel [list do_test $tn [list fts3_terms_and_prefixes $db $tbl $lens] ""] +} + +#------------------------------------------------------------------------- +# Test cases 1.* are a sanity check. They test that the prefixes index is +# being constructed correctly for the simplest possible case. +# +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4(prefix='1,3,6'); + + CREATE VIRTUAL TABLE p1 USING fts4term(t1, 1); + CREATE VIRTUAL TABLE p2 USING fts4term(t1, 2); + CREATE VIRTUAL TABLE p3 USING fts4term(t1, 3); + CREATE VIRTUAL TABLE terms USING fts4term(t1); +} +do_execsql_test 1.2 { + INSERT INTO t1 VALUES('sqlite mysql firebird'); +} +do_execsql_test 1.3.1 { SELECT term FROM p1 } {f m s} +do_execsql_test 1.3.2 { SELECT term FROM p2 } {fir mys sql} +do_execsql_test 1.3.3 { SELECT term FROM p3 } {firebi sqlite} +do_execsql_test 1.4 { + SELECT term FROM terms; +} {firebird mysql sqlite} + +fts3_tap_test 1.5 db t1 {1 3 6} + +#------------------------------------------------------------------------- +# A slightly more complicated dataset. This test also verifies that DELETE +# operations do not corrupt the prefixes index. +# +do_execsql_test 2.1 { + INSERT INTO t1 VALUES('FTS3 and FTS4 are an SQLite virtual table modules'); + INSERT INTO t1 VALUES('that allows users to perform full-text searches on'); + INSERT INTO t1 VALUES('a set of documents. The most common (and'); + INSERT INTO t1 VALUES('effective) way to describe full-text searches is'); + INSERT INTO t1 VALUES('"what Google, Yahoo and Altavista do with'); + INSERT INTO t1 VALUES('documents placed on the World Wide Web". Users'); + INSERT INTO t1 VALUES('input a term, or series of terms, perhaps'); + INSERT INTO t1 VALUES('connected by a binary operator or grouped together'); + INSERT INTO t1 VALUES('into a phrase, and the full-text query system'); + INSERT INTO t1 VALUES('finds the set of documents that best matches those'); + INSERT INTO t1 VALUES('terms considering the operators and groupings the'); + INSERT INTO t1 VALUES('user has specified. This article describes the'); + INSERT INTO t1 VALUES('deployment and usage of FTS3 and FTS4.'); + INSERT INTO t1 VALUES('FTS1 and FTS2 are obsolete full-text search'); + INSERT INTO t1 VALUES('modules for SQLite. There are known issues with'); + INSERT INTO t1 VALUES('these older modules and their use should be'); + INSERT INTO t1 VALUES('avoided. Portions of the original FTS3 code were'); + INSERT INTO t1 VALUES('contributed to the SQLite project by Scott Hess of'); + INSERT INTO t1 VALUES('Google. It is now developed and maintained as part'); + INSERT INTO t1 VALUES('of SQLite. '); +} +fts3_tap_test 2.2 db t1 {1 3 6} +do_execsql_test 2.3 { DELETE FROM t1 WHERE docid%2; } +fts3_tap_test 2.4 db t1 {1 3 6} + +do_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') } +fts3_tap_test 2.6 db t1 {1 3 6} + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t2 USING fts4(prefix='1,2,3'); + INSERT INTO t2 VALUES('On 12 September the wind direction turned and'); + INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the'); + INSERT INTO t2 VALUES('fleet was forced to take shelter at'); + INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind'); + INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet'); + INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey'); + INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved'); + INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he'); + INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of'); + INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland'); + INSERT INTO t2 VALUES('and waited for Harold''s return from the north.'); + INSERT INTO t2 VALUES('On 12 September the wind direction turned and'); + INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the'); + INSERT INTO t2 VALUES('fleet was forced to take shelter at'); + INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind'); + INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet'); + INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey'); + INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved'); + INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he'); + INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of'); + INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland'); + INSERT INTO t2 VALUES('and waited for Harold''s return from the north.'); +} + +fts3_tap_test 3.2 db t2 {1 2 3} +do_execsql_test 3.3 { SELECT optimize(t2) FROM t2 LIMIT 1 } {{Index optimized}} +fts3_tap_test 3.4 db t2 {1 2 3} + + +#------------------------------------------------------------------------- +# Simple tests for reading the prefix-index. +# +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE t3 USING fts4(prefix="1,4"); + INSERT INTO t3 VALUES('one two three'); + INSERT INTO t3 VALUES('four five six'); + INSERT INTO t3 VALUES('seven eight nine'); +} +do_execsql_test 4.2 { + SELECT * FROM t3 WHERE t3 MATCH 'f*' +} {{four five six}} +do_execsql_test 4.3 { + SELECT * FROM t3 WHERE t3 MATCH 'four*' +} {{four five six}} +do_execsql_test 4.4 { + SELECT * FROM t3 WHERE t3 MATCH 's*' +} {{four five six} {seven eight nine}} +do_execsql_test 4.5 { + SELECT * FROM t3 WHERE t3 MATCH 'sev*' +} {{seven eight nine}} +do_execsql_test 4.6 { + SELECT * FROM t3 WHERE t3 MATCH 'one*' +} {{one two three}} + +finish_test Index: test/fts3rnd.test ================================================================== --- test/fts3rnd.test +++ test/fts3rnd.test @@ -160,19 +160,21 @@ set ret } # This [proc] is used to test the FTS3 matchinfo() function. # -proc simple_token_matchinfo {zToken} { +proc simple_token_matchinfo {zToken bDesc} { set nDoc(0) 0 set nDoc(1) 0 set nDoc(2) 0 set nHit(0) 0 set nHit(1) 0 set nHit(2) 0 + set dir -inc + if {$bDesc} { set dir -dec } foreach key [array names ::t1] { set value $::t1($key) set a($key) [list] foreach i {0 1 2} col $value { @@ -182,11 +184,11 @@ if {$hit>0} { incr nDoc($i) } } } set ret [list] - foreach docid [lsort -integer [array names a]] { + foreach docid [lsort -integer $dir [array names a]] { if { [lindex [lsort -integer $a($docid)] end] } { set matchinfo [list 1 3] foreach i {0 1 2} hit $a($docid) { lappend matchinfo $hit $nHit($i) $nDoc($i) } @@ -260,34 +262,51 @@ set scan(bigEndian) I* binary scan $blob $scan($::tcl_platform(byteOrder)) r return $r } db func mit mit - set sqlite_fts3_enable_parentheses 1 -foreach nodesize {50 500 1000 2000} { +proc do_orderbydocid_test {tn sql res} { + uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] + uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ + [lsort -int -dec $res] + ] +} + +set NUM_TRIALS 100 + +foreach {nodesize order} { + 50 DESC + 50 ASC + 500 ASC + 1000 DESC + 2000 ASC +} { catch { array unset ::t1 } + set testname "$nodesize/$order" # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. # db transaction { catchsql { DROP TABLE t1 } - execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)" + execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" for {set i 0} {$i < 100} {incr i} { insert_row $i } } - for {set iTest 1} {$iTest <= 100} {incr iTest} { + for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} { catchsql COMMIT set DO_MALLOC_TEST 0 set nRep 10 if {$iTest==100 && $nodesize==50} { set DO_MALLOC_TEST 1 set nRep 2 } + + set ::testprefix fts3rnd-1.$testname.$iTest # Delete one row, update one row and insert one row. # set rows [array names ::t1] set nRow [llength $rows] @@ -305,53 +324,58 @@ update_row $iUpdate delete_row $iDelete if {0==($iTest%2)} { execsql COMMIT } if {0==($iTest%2)} { - do_test fts3rnd-1.$nodesize.$iTest.0 { fts3_integrity_check t1 } ok + #do_test 0 { fts3_integrity_check t1 } ok } # Pick 10 terms from the vocabulary. Check that the results of querying # the database for the set of documents containing each of these terms # is the same as the result obtained by scanning the contents of the Tcl # array for each term. # for {set i 0} {$i < 10} {incr i} { set term [random_term] - do_select_test fts3rnd-1.$nodesize.$iTest.1.$i { + do_select_test 1.$i.asc { + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term + ORDER BY docid ASC + } [simple_token_matchinfo $term 0] + do_select_test 1.$i.desc { SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term - } [simple_token_matchinfo $term] + ORDER BY docid DESC + } [simple_token_matchinfo $term 1] } # This time, use the first two characters of each term as a term prefix # to query for. Test that querying the Tcl array produces the same results # as querying the FTS3 table for the prefix. # for {set i 0} {$i < $nRep} {incr i} { set prefix [string range [random_term] 0 end-1] set match "${prefix}*" - do_select_test fts3rnd-1.$nodesize.$iTest.2.$i { + do_orderbydocid_test 2.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_phrase $match] } # Similar to the above, except for phrase queries. # for {set i 0} {$i < $nRep} {incr i} { set term [list [random_term] [random_term]] set match "\"$term\"" - do_select_test fts3rnd-1.$nodesize.$iTest.3.$i { + do_orderbydocid_test 3.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_phrase $term] } # Three word phrases. # for {set i 0} {$i < $nRep} {incr i} { set term [list [random_term] [random_term] [random_term]] set match "\"$term\"" - do_select_test fts3rnd-1.$nodesize.$iTest.4.$i { + do_orderbydocid_test 4.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_phrase $term] } # Three word phrases made up of term-prefixes. @@ -360,21 +384,23 @@ set query "[string range [random_term] 0 end-1]* " append query "[string range [random_term] 0 end-1]* " append query "[string range [random_term] 0 end-1]*" set match "\"$query\"" - do_select_test fts3rnd-1.$nodesize.$iTest.5.$i { + do_orderbydocid_test 5.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_phrase $query] } - # A NEAR query with terms as the arguments. + # A NEAR query with terms as the arguments: + # + # ... MATCH '$term1 NEAR $term2' ... # for {set i 0} {$i < $nRep} {incr i} { set terms [list [random_term] [random_term]] set match [join $terms " NEAR "] - do_select_test fts3rnd-1.$nodesize.$iTest.6.$i { + do_orderbydocid_test 6.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_near $terms 10] } # A 3-way NEAR query with terms as the arguments. @@ -381,11 +407,11 @@ # for {set i 0} {$i < $nRep} {incr i} { set terms [list [random_term] [random_term] [random_term]] set nNear 11 set match [join $terms " NEAR/$nNear "] - do_select_test fts3rnd-1.$nodesize.$iTest.7.$i { + do_orderbydocid_test 7.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [simple_near $terms $nNear] } # Set operations on simple term queries. @@ -397,30 +423,30 @@ } { for {set i 0} {$i < $nRep} {incr i} { set term1 [random_term] set term2 [random_term] set match "$term1 $op $term2" - do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i { + do_orderbydocid_test $tn.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [$proc [simple_phrase $term1] [simple_phrase $term2]] } } # Set operations on NEAR queries. # foreach {tn op proc} { - 8 OR setop_or - 9 NOT setop_not - 10 AND setop_and + 11 OR setop_or + 12 NOT setop_not + 13 AND setop_and } { for {set i 0} {$i < $nRep} {incr i} { set term1 [random_term] set term2 [random_term] set term3 [random_term] set term4 [random_term] set match "$term1 NEAR $term2 $op $term3 NEAR $term4" - do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i { + do_orderbydocid_test $tn.$i { SELECT docid FROM t1 WHERE t1 MATCH $match } [$proc \ [simple_near [list $term1 $term2] 10] \ [simple_near [list $term3 $term4] 10] ] Index: test/fts3sort.test ================================================================== --- test/fts3sort.test +++ test/fts3sort.test @@ -19,21 +19,20 @@ ifcapable !fts3 { finish_test return } -set testprefix fts3sort -proc build_database {nRow} { +proc build_database {nRow param} { db close forcedelete test.db sqlite3 db test.db set vocab [list aa ab ac ba bb bc ca cb cc da] expr srand(0) - execsql { CREATE VIRTUAL TABLE t1 USING fts4 } + execsql "CREATE VIRTUAL TABLE t1 USING fts4($param)" for {set i 0} {$i < $nRow} {incr i} { set v [expr int(rand()*1000000)] set doc [list] for {set div 1} {$div < 1000000} {set div [expr $div*10]} { lappend doc [lindex $vocab [expr ($v/$div) % 10]] @@ -40,17 +39,28 @@ } execsql { INSERT INTO t1 VALUES($doc) } } } -set nRow 1000 -do_test 1.0 { - build_database $nRow - execsql { SELECT count(*) FROM t1 } -} $nRow +set testprefix fts3sort + +unset -nocomplain CONTROL +foreach {t param} { + 1 "" + 2 "order=asc" + 3 "order=desc" +} { + + set testprefix fts3sort-1.$t -foreach {tn query} { + set nRow 1000 + do_test 1.0 { + build_database $nRow $param + execsql { SELECT count(*) FROM t1 } + } $nRow + + foreach {tn query} { 1 "SELECT docid, * FROM t1" 2 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa'" 3 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a*'" 4 "SELECT docid, quote(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'a*'" 5 "SELECT docid, quote(matchinfo(t1,'pcnxals')) FROM t1 WHERE t1 MATCH 'b*'" @@ -57,52 +67,96 @@ 6 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a* b* c*'" 7 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa OR da'" 8 "SELECT docid, * FROM t1 WHERE t1 MATCH 'nosuchtoken'" 9 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'aa OR da'" 10 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'aa OR nosuchtoken'" -} { - - unset -nocomplain A B C D - set A_list [list] - set B_list [list] - set C_list [list] - set D_list [list] - - unset -nocomplain X - db eval "$query ORDER BY rowid ASC" X { - set A($X(docid)) [array get X] - lappend A_list $X(docid) - } - unset -nocomplain X - db eval "$query ORDER BY rowid DESC" X { - set B($X(docid)) [array get X] - lappend B_list $X(docid) - } - unset -nocomplain X - db eval "$query ORDER BY docid ASC" X { - set C($X(docid)) [array get X] - lappend C_list $X(docid) - } - unset -nocomplain X - db eval "$query ORDER BY docid DESC" X { - set D($X(docid)) [array get X] - lappend D_list $X(docid) - } - - do_test 1.$tn.1 { set A_list } [lsort -integer -increasing $A_list] - do_test 1.$tn.2 { set B_list } [lsort -integer -decreasing $B_list] - do_test 1.$tn.3 { set C_list } [lsort -integer -increasing $C_list] - do_test 1.$tn.4 { set D_list } [lsort -integer -decreasing $D_list] - - unset -nocomplain DATA - unset -nocomplain X - db eval "$query" X { - set DATA($X(docid)) [array get X] - } - - do_test 1.$tn.5 { lsort [array get A] } [lsort [array get DATA]] - do_test 1.$tn.6 { lsort [array get B] } [lsort [array get DATA]] - do_test 1.$tn.7 { lsort [array get C] } [lsort [array get DATA]] - do_test 1.$tn.8 { lsort [array get D] } [lsort [array get DATA]] -} - -finish_test + 11 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'aa NEAR bb'" + 12 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH '\"aa bb\"'" + 13 "SELECT docid, content FROM t1 WHERE t1 MATCH 'aa NEAR/2 bb NEAR/3 cc'" + 14 "SELECT docid, content FROM t1 WHERE t1 MATCH '\"aa bb cc\"'" + } { + + unset -nocomplain A B C D + set A_list [list] + set B_list [list] + set C_list [list] + set D_list [list] + + unset -nocomplain X + db eval "$query ORDER BY rowid ASC" X { + set A($X(docid)) [array get X] + lappend A_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY rowid DESC" X { + set B($X(docid)) [array get X] + lappend B_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY docid ASC" X { + set C($X(docid)) [array get X] + lappend C_list $X(docid) + } + unset -nocomplain X + db eval "$query ORDER BY docid DESC" X { + set D($X(docid)) [array get X] + lappend D_list $X(docid) + } + + do_test $tn.1 { set A_list } [lsort -integer -increasing $A_list] + do_test $tn.2 { set B_list } [lsort -integer -decreasing $B_list] + do_test $tn.3 { set C_list } [lsort -integer -increasing $C_list] + do_test $tn.4 { set D_list } [lsort -integer -decreasing $D_list] + + unset -nocomplain DATA + unset -nocomplain X + db eval "$query" X { + set DATA($X(docid)) [array get X] + } + + do_test $tn.5 { lsort [array get A] } [lsort [array get DATA]] + do_test $tn.6 { lsort [array get B] } [lsort [array get DATA]] + do_test $tn.7 { lsort [array get C] } [lsort [array get DATA]] + do_test $tn.8 { lsort [array get D] } [lsort [array get DATA]] + + if {[info exists CONTROL($tn)]} { + do_test $tn.9 { set CONTROL($tn) } [lsort [array get DATA]] + } else { + set CONTROL($tn) [lsort [array get DATA]] + } + } +} +unset -nocomplain CONTROL + +set testprefix fts3sort + +#------------------------------------------------------------------------- +# Tests for parsing the "order=asc" and "order=desc" directives. +# +foreach {tn param res} { + 1 "order=asc" {0 {}} + 2 "order=desc" {0 {}} + 3 "order=dec" {1 {unrecognized order: dec}} + 4 "order=xxx, order=asc" {1 {unrecognized order: xxx}} + 5 "order=desc, order=asc" {0 {}} +} { + execsql { DROP TABLE IF EXISTS t1 } + do_catchsql_test 2.1.$tn " + CREATE VIRTUAL TABLE t1 USING fts4(a, b, $param) + " $res +} + +do_execsql_test 2.2 { + BEGIN; + CREATE VIRTUAL TABLE t2 USING fts4(order=desc); + INSERT INTO t2 VALUES('aa bb'); + INSERT INTO t2 VALUES('bb cc'); + INSERT INTO t2 VALUES('cc aa'); + SELECT docid FROM t2 WHERE t2 MATCH 'aa'; + END; +} {3 1} +do_execsql_test 2.3 { + SELECT docid FROM t2 WHERE t2 MATCH 'aa'; +} {3 1} + +finish_test + Index: test/hook.test ================================================================== --- test/hook.test +++ test/hook.test @@ -272,10 +272,38 @@ SELECT * FROM t1 GROUP BY b; } set ::update_hook } [list] } + +do_test hook-4.4 { + execsql { + CREATE TABLE t4(a UNIQUE, b); + INSERT INTO t4 VALUES(1, 'a'); + INSERT INTO t4 VALUES(2, 'b'); + } + set ::update_hook [list] + execsql { + REPLACE INTO t4 VALUES(1, 'c'); + } + set ::update_hook +} [list INSERT main t4 3 ] +do_execsql_test hook-4.4.1 { + SELECT * FROM t4 ORDER BY a; +} {1 c 2 b} +do_test hook-4.4.2 { + set ::update_hook [list] + execsql { + PRAGMA recursive_triggers = on; + REPLACE INTO t4 VALUES(1, 'd'); + } + set ::update_hook +} [list INSERT main t4 4 ] +do_execsql_test hook-4.4.3 { + SELECT * FROM t4 ORDER BY a; +} {1 d 2 b} + db update_hook {} # #---------------------------------------------------------------------------- #---------------------------------------------------------------------------- Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -177,14 +177,15 @@ fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test fts3near.test fts3query.test fts3shared.test fts3snippet.test + fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test - fts3aux1.test fts3comp1.test + fts3aux1.test fts3comp1.test fts3auto.test } lappend ::testsuitelist xxx #------------------------------------------------------------------------- Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -372,15 +372,15 @@ } } proc do_execsql_test {testname sql {result {}}} { fix_testname testname - uplevel do_test $testname [list "execsql {$sql}"] [list [list {*}$result]] + uplevel do_test [list $testname] [list "execsql {$sql}"] [list [list {*}$result]] } proc do_catchsql_test {testname sql result} { fix_testname testname - uplevel do_test $testname [list "catchsql {$sql}"] [list $result] + uplevel do_test [list $testname] [list "catchsql {$sql}"] [list $result] } proc do_eqp_test {name sql res} { uplevel do_execsql_test $name [list "EXPLAIN QUERY PLAN $sql"] [list $res] } Index: test/trace2.test ================================================================== --- test/trace2.test +++ test/trace2.test @@ -139,14 +139,14 @@ do_trace_test 2.3 { INSERT INTO x1(x1) VALUES('optimize'); } { "INSERT INTO x1(x1) VALUES('optimize');" - "-- SELECT idx, start_block, leaves_end_block, end_block, root FROM 'main'.'x1_segdir' ORDER BY level DESC, idx ASC" - "-- SELECT count(*), max(level) FROM 'main'.'x1_segdir'" + "-- SELECT idx, start_block, leaves_end_block, end_block, root FROM 'main'.'x1_segdir' WHERE level BETWEEN ? AND ?ORDER BY level DESC, idx ASC" + "-- SELECT max(level) FROM 'main'.'x1_segdir' WHERE level BETWEEN ? AND ?" "-- SELECT coalesce((SELECT max(blockid) FROM 'main'.'x1_segments') + 1, 1)" - "-- DELETE FROM 'main'.'x1_segdir'" + "-- DELETE FROM 'main'.'x1_segdir' WHERE level BETWEEN ? AND ?" "-- INSERT INTO 'main'.'x1_segdir' VALUES(?,?,?,?,?,?)" } } finish_test