Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch fts5-perf Excluding Merge-Ins
This is equivalent to a diff from 3be336aa89 to b4ac61aeee
2016-01-26
| ||
20:19 | Performance improvements for fts5, particularly detail=col mode. (check-in: a3d7b8ac53 user: dan tags: trunk) | |
20:08 | Further minor performance improvements and code-size reductions related to fts5 column filters on detail=col tables. (Leaf check-in: b4ac61aeee user: dan tags: fts5-perf) | |
19:30 | Improve the performance of fts5 column filters on detail=col tables. (check-in: 249a2d070c user: dan tags: fts5-perf) | |
2016-01-23
| ||
18:24 | Changes to spellfix to try to get it to use stack space instead of heap space in cases where that makes sense. (check-in: dfcebc7393 user: drh tags: trunk) | |
16:20 | Merge trunk changes (including fixes for warnings in fts5) with this branch. (check-in: ceccc9ad78 user: dan tags: fts5-perf) | |
15:57 | Fix some signed/unsigned comparison compiler warnings in fts5. (check-in: 3be336aa89 user: dan tags: trunk) | |
14:05 | Remove an assert() that can be false if compiled with SQLITE_USE_ALLOCA. (check-in: f0a551edf8 user: drh tags: trunk) | |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
25 26 27 28 29 30 31 | typedef unsigned char u8; typedef unsigned int u32; typedef unsigned short u16; typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; | | | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | typedef unsigned char u8; typedef unsigned int u32; typedef unsigned short u16; typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; #define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0]))) #define testcase(x) #define ALWAYS(x) 1 #define NEVER(x) 0 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) #define MAX(x,y) (((x) > (y)) ? (x) : (y)) |
︙ | ︙ | |||
311 312 313 314 315 316 317 318 319 320 321 322 323 324 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ | > > > > > > | 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; struct Fts5IndexIter { i64 iRowid; const u8 *pData; int nData; }; /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ |
︙ | ︙ | |||
378 379 380 381 382 383 384 | ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). */ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); | < < | 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). */ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter*); /* |
︙ | ︙ | |||
465 466 467 468 469 470 471 | int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); | < < | 469 470 471 472 473 474 475 476 477 478 479 480 481 482 | int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_varint.c. */ |
︙ | ︙ |
Changes to ext/fts5/fts5_aux.c.
︙ | ︙ | |||
540 541 542 543 544 545 546 | { "snippet", 0, fts5SnippetFunction, 0 }, { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ | | | 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 | { "snippet", 0, fts5SnippetFunction, 0 }, { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ rc = pApi->xCreateFunction(pApi, aBuiltin[i].zFunc, aBuiltin[i].pUserData, aBuiltin[i].xFunc, aBuiltin[i].xDestroy ); } |
︙ | ︙ |
Changes to ext/fts5/fts5_buffer.c.
︙ | ︙ | |||
318 319 320 321 322 323 324 | const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; *pbPresent = 0; if( p ){ int i; | | | | 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; *pbPresent = 0; if( p ){ int i; u32 hash = 13; Fts5TermsetEntry *pEntry; /* Calculate a hash value for this term. This is the same hash checksum ** used by the fts5_hash.c module. This is not important for correct ** operation of the module, but is necessary to ensure that some tests ** designed to produce hash table collisions really do work. */ for(i=nTerm-1; i>=0; i--){ hash = (hash << 3) ^ hash ^ pTerm[i]; } hash = (hash << 3) ^ hash ^ iIdx; hash = hash % ArraySize(p->apHash); for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ if( pEntry->iIdx==iIdx && pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){ *pbPresent = 1; break; } } if( pEntry==0 ){ pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
302 303 304 305 306 307 308 | ** Argument pTerm must be a synonym iterator. */ static int fts5ExprSynonymList( Fts5ExprTerm *pTerm, int bCollist, Fts5Colset *pColset, i64 iRowid, | | < < < < < < < < < < < | | < < > | | < < | | < | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | ** Argument pTerm must be a synonym iterator. */ static int fts5ExprSynonymList( Fts5ExprTerm *pTerm, int bCollist, Fts5Colset *pColset, i64 iRowid, Fts5Buffer *pBuf, /* Use this buffer for space if required */ u8 **pa, int *pn ){ Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int nIter = 0; int nAlloc = 4; int rc = SQLITE_OK; Fts5ExprTerm *p; assert( pTerm->pSynonym ); for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ if( pIter->nData==0 ) continue; if( nIter==nAlloc ){ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( aNew==0 ){ rc = SQLITE_NOMEM; goto synonym_poslist_out; } memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); nAlloc = nAlloc*2; if( aIter!=aStatic ) sqlite3_free(aIter); aIter = aNew; } sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); assert( aIter[nIter].bEof==0 ); nIter++; } } if( nIter==1 ){ *pa = (u8*)aIter[0].a; *pn = aIter[0].n; }else{ Fts5PoslistWriter writer = {0}; i64 iPrev = -1; fts5BufferZero(pBuf); while( 1 ){ int i; i64 iMin = FTS5_LARGEST_INT64; for(i=0; i<nIter; i++){ if( aIter[i].bEof==0 ){ if( aIter[i].iPos==iPrev ){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; } if( aIter[i].iPos<iMin ){ iMin = aIter[i].iPos; } } } if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); iPrev = iMin; } if( rc==SQLITE_OK ){ *pa = pBuf->p; *pn = pBuf->n; } } synonym_poslist_out: if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } |
︙ | ︙ | |||
413 414 415 416 417 418 419 | int i; int rc = SQLITE_OK; fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ | | < | > | > > > > > > | < | 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | int i; int rc = SQLITE_OK; fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>ArraySize(aStatic) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; int n = 0; int bFlag = 0; u8 *a = 0; if( pTerm->pSynonym ){ Fts5Buffer buf = {0, 0, 0}; rc = fts5ExprSynonymList( pTerm, 0, pColset, pNode->iRowid, &buf, &a, &n ); if( rc ){ sqlite3_free(a); goto ismatch_out; } if( a==buf.p ) bFlag = 1; }else{ a = (u8*)pTerm->pIter->pData; n = pTerm->pIter->nData; } sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); aIter[i].bFlag = (u8)bFlag; if( aIter[i].bEof ) goto ismatch_out; } while( 1 ){ int bMatch; |
︙ | ︙ | |||
549 550 551 552 553 554 555 | int rc = *pRc; int bMatch; assert( pNear->nPhrase>1 ); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ | | | 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 | int rc = *pRc; int bMatch; assert( pNear->nPhrase>1 ); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>ArraySize(aStatic) ){ int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); }else{ memset(aStatic, 0, sizeof(aStatic)); } if( rc!=SQLITE_OK ){ *pRc = rc; |
︙ | ︙ | |||
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ Fts5ExprTerm *pTerm; Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; pPhrase->poslist.n = 0; for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ Fts5IndexIter *pIter = pTerm->pIter; if( sqlite3Fts5IterEof(pIter)==0 ){ int n; i64 iRowid; rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid); if( rc!=SQLITE_OK ){ *pRc = rc; return 0; }else if( iRowid==pNode->iRowid && n>0 ){ pPhrase->poslist.n = 1; } } } return pPhrase->poslist.n; }else{ int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); } } *pRc = rc; if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ return 1; } return 0; } } static int fts5ExprTokenTest( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ ){ /* As this "NEAR" object is actually a single phrase that consists ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ | > > > > > > > > > < | < < | | > | < > > | | 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ Fts5ExprTerm *pTerm; Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; pPhrase->poslist.n = 0; for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ Fts5IndexIter *pIter = pTerm->pIter; if( sqlite3Fts5IterEof(pIter)==0 ){ #if 0 int n; i64 iRowid; rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid); if( rc!=SQLITE_OK ){ *pRc = rc; return 0; }else if( iRowid==pNode->iRowid && n>0 ){ pPhrase->poslist.n = 1; } #endif if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ pPhrase->poslist.n = 1; } } } return pPhrase->poslist.n; }else{ int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); #if 0 rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); #endif } } *pRc = rc; if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ return 1; } return 0; } } static int fts5ExprTokenTest( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ ){ /* As this "NEAR" object is actually a single phrase that consists ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; assert( pNode->eType==FTS5_TERM ); assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); assert( pPhrase->aTerm[0].pSynonym==0 ); pPhrase->poslist.n = pIter->nData; if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ pPhrase->poslist.p = (u8*)pIter->pData; } pNode->iRowid = pIter->iRowid; pNode->bNomatch = (pPhrase->poslist.n==0); return SQLITE_OK; } /* ** All individual term iterators in pNear are guaranteed to be valid when ** this function is called. This function checks if all term iterators ** point to the same rowid, and if not, advances them until they do. ** If an EOF is reached before this happens, *pbEof is set to true before |
︙ | ︙ | |||
1366 1367 1368 1369 1370 1371 1372 | int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); sqlite3Fts5IterClose(pTerm->pIter); | < > | 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 | int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; sqlite3Fts5IterClose(pSyn->pIter); fts5BufferFree((Fts5Buffer*)&pSyn[1]); sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } |
︙ | ︙ | |||
1457 1458 1459 1460 1461 1462 1463 | /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; | | | | 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 | /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ){ rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, nByte); pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); memcpy(pSyn->zTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } }else{ Fts5ExprTerm *pTerm; if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ |
︙ | ︙ | |||
2242 2243 2244 2245 2246 2247 2248 | { "fts5_isalnum", fts5ExprIsAlnum }, { "fts5_fold", fts5ExprFold }, }; int i; int rc = SQLITE_OK; void *pCtx = (void*)pGlobal; | | | 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 | { "fts5_isalnum", fts5ExprIsAlnum }, { "fts5_fold", fts5ExprFold }, }; int i; int rc = SQLITE_OK; void *pCtx = (void*)pGlobal; for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){ struct Fts5ExprFunc *p = &aFunc[i]; rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); } /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */ #ifndef NDEBUG (void)sqlite3Fts5ParserTrace; |
︙ | ︙ | |||
2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 | int *pnCollist ){ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; Fts5ExprNode *pNode = pPhrase->pNode; int rc = SQLITE_OK; assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid && pPhrase->poslist.n>0 ){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; if( pTerm->pSynonym ){ | > > | < | < < < < | | < < | | 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 | int *pnCollist ){ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; Fts5ExprNode *pNode = pPhrase->pNode; int rc = SQLITE_OK; assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid && pPhrase->poslist.n>0 ){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; if( pTerm->pSynonym ){ Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; rc = fts5ExprSynonymList( pTerm, 1, 0, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist ); }else{ *ppCollist = pPhrase->aTerm[0].pIter->pData; *pnCollist = pPhrase->aTerm[0].pIter->nData; } }else{ *ppCollist = 0; *pnCollist = 0; } return rc; } |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
257 258 259 260 261 262 263 264 265 266 267 268 269 270 | #define FTS5_DATA_ZERO_PADDING 8 #define FTS5_DATA_PADDING 20 typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; | > | 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | #define FTS5_DATA_ZERO_PADDING 8 #define FTS5_DATA_PADDING 20 typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5Iter Fts5Iter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; |
︙ | ︙ | |||
499 500 501 502 503 504 505 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. ** ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ | | > > > > > > | 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. ** ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ struct Fts5Iter { Fts5IndexIter base; /* Base class containing output vars */ Fts5Index *pIndex; /* Index that owns this iterator */ Fts5Structure *pStruct; /* Database structure for this iterator */ Fts5Buffer poslist; /* Buffer containing current poslist */ Fts5Colset *pColset; /* Restrict matches to these columns */ /* Invoked to set output variables. */ void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ u8 bSkipEmpty; /* True to skip deleted entries */ u8 bEof; /* True at EOF */ u8 bFiltered; /* True if column-filter already applied */ |
︙ | ︙ | |||
1748 1749 1750 1751 1752 1753 1754 | } /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ | | | 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 | } /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* ** Advance iterator pIter to the next entry. ** |
︙ | ︙ | |||
2402 2403 2404 2405 2406 2407 2408 | /* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( | | | 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 | /* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( Fts5Iter *pIter, Fts5SegIter *p1, Fts5SegIter *p2, Fts5CResult *pRes ){ int i1 = p1 - pIter->aSeg; int i2 = p2 - pIter->aSeg; |
︙ | ︙ | |||
2443 2444 2445 2446 2447 2448 2449 | /* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ | | | 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 | /* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int i; assert( (pFirst->pLeaf==0)==pIter->bEof ); /* Check that pIter->iSwitchRowid is set correctly. */ |
︙ | ︙ | |||
2488 2489 2490 2491 2492 2493 2494 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ | | | 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ Fts5CResult *pRes = &pIter->aFirst[iOut]; |
︙ | ︙ | |||
2634 2635 2636 2637 2638 2639 2640 | }while( p->rc==SQLITE_OK ); } /* ** Free the iterator object passed as the second argument. */ | | | | 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 | }while( p->rc==SQLITE_OK ); } /* ** Free the iterator object passed as the second argument. */ static void fts5MultiIterFree(Fts5Index *p, Fts5Iter *pIter){ if( pIter ){ int i; for(i=0; i<pIter->nSeg; i++){ fts5SegIterClear(&pIter->aSeg[i]); } fts5StructureRelease(pIter->pStruct); fts5BufferFree(&pIter->poslist); sqlite3_free(pIter); } } static void fts5MultiIterAdvanced( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ |
︙ | ︙ | |||
2676 2677 2678 2679 2680 2681 2682 | ** ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() ** on the iterator instead. That function does the same as this one, except ** that it deals with more complicated cases as well. */ static int fts5MultiIterAdvanceRowid( Fts5Index *p, /* FTS5 backend to iterate within */ | | | > | 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 | ** ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() ** on the iterator instead. That function does the same as this one, except ** that it deals with more complicated cases as well. */ static int fts5MultiIterAdvanceRowid( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ Fts5SegIter **ppFirst ){ Fts5SegIter *pNew = &pIter->aSeg[iChanged]; if( pNew->iRowid==pIter->iSwitchRowid || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev ){ int i; |
︙ | ︙ | |||
2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 | pRes->iFirst = (u16)(pNew - pIter->aSeg); if( i==1 ) break; pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; } } return 0; } /* ** Set the pIter->bEof variable based on the state of the sub-iterators. */ | > | | < | < > | | | | | | | | | | | | | | > > | > | > > > > | | < | | > | > | | | > | | | | 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 | pRes->iFirst = (u16)(pNew - pIter->aSeg); if( i==1 ) break; pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; } } *ppFirst = pNew; return 0; } /* ** Set the pIter->bEof variable based on the state of the sub-iterators. */ static void fts5MultiIterSetEof(Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; pIter->bEof = pSeg->pLeaf==0; pIter->iSwitchRowid = pSeg->iRowid; } /* ** Move the iterator to the next entry. ** ** If an error occurs, an error code is left in Fts5Index.rc. It is not ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ static void fts5MultiIterNext( Fts5Index *p, Fts5Iter *pIter, int bFrom, /* True if argument iFrom is valid */ i64 iFrom /* Advance at least as far as this */ ){ int bUseFrom = bFrom; while( p->rc==SQLITE_OK ){ int iFirst = pIter->aFirst[1].iFirst; int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ pSeg->xNext(p, pSeg, &bNewTerm); } if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); fts5MultiIterSetEof(pIter); pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; if( pSeg->pLeaf==0 ) return; } fts5AssertMultiIterSetup(p, pIter); assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); if( pIter->bSkipEmpty==0 || pSeg->nPos ){ pIter->xSetOutputs(pIter, pSeg); return; } bUseFrom = 0; } } static void fts5MultiIterNext2( Fts5Index *p, Fts5Iter *pIter, int *pbNewTerm /* OUT: True if *might* be new term */ ){ assert( pIter->bSkipEmpty ); if( p->rc==SQLITE_OK ){ do { int iFirst = pIter->aFirst[1].iFirst; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; int bNewTerm = 0; assert( p->rc==SQLITE_OK ); pSeg->xNext(p, pSeg, &bNewTerm); if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); fts5MultiIterSetEof(pIter); *pbNewTerm = 1; }else{ *pbNewTerm = 0; } fts5AssertMultiIterSetup(p, pIter); }while( fts5MultiIterIsEmpty(p, pIter) ); } } static void fts5IterSetOutputs_Noop(Fts5Iter *pIter, Fts5SegIter *pSeg){ } static Fts5Iter *fts5MultiIterAlloc( Fts5Index *p, /* FTS5 backend to iterate within */ int nSeg ){ Fts5Iter *pNew; int nSlot; /* Power of two >= nSeg */ for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); pNew = fts5IdxMalloc(p, sizeof(Fts5Iter) + /* pNew */ sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew ){ pNew->nSeg = nSlot; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->pIndex = p; pNew->xSetOutputs = fts5IterSetOutputs_Noop; } return pNew; } /* ** Allocate a new Fts5Iter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data. */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5Iter **ppOut /* New object */ ){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5Buffer buf = {0,0,0}; /* Buffer used by fts5SegIterSeekInit() */ Fts5StructureLevel *pLvl; Fts5Iter *pNew; assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ if( p->rc==SQLITE_OK ){ if( iLevel<0 ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
︙ | ︙ | |||
2913 2914 2915 2916 2917 2918 2919 | fts5MultiIterFree(p, pNew); *ppOut = 0; } fts5BufferFree(&buf); } /* | | | | | 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 | fts5MultiIterFree(p, pNew); *ppOut = 0; } fts5BufferFree(&buf); } /* ** Create an Fts5Iter that iterates through the doclist provided ** as the second argument. */ static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ int bDesc, /* True for descending rowid order */ Fts5Iter **ppOut /* New object */ ){ Fts5Iter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pNew->bFiltered = 1; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){ |
︙ | ︙ | |||
2957 2958 2959 2960 2961 2962 2963 | fts5DataRelease(pData); } /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ | | | | | | 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 | fts5DataRelease(pData); } /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ assert( p->rc || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof ); return (p->rc || pIter->bEof); } /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5Iter *pIter){ assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ static void fts5MultiIterNextFrom( Fts5Index *p, Fts5Iter *pIter, i64 iMatch ){ while( 1 ){ i64 iRowid; fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid>=iMatch ) break; if( pIter->bRev!=0 && iRowid<=iMatch ) break; } } /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } static void fts5ChunkIterate( Fts5Index *p, /* Index object */ |
︙ | ︙ | |||
3578 3579 3580 3581 3582 3583 3584 | } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ | | | 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 | } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ int i; Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; i<pIter->nSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */ |
︙ | ︙ | |||
3656 3657 3658 3659 3660 3661 3662 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; | | | 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; Fts5Iter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ int eDetail = p->pConfig->eDetail; |
︙ | ︙ | |||
4338 4339 4340 4341 4342 4343 4344 | } return p - (*pa); } static int fts5AppendRowid( Fts5Index *p, i64 iDelta, | | | 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 | } return p - (*pa); } static int fts5AppendRowid( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, Fts5Colset *pColset, Fts5Buffer *pBuf ){ fts5BufferAppendVarint(&p->rc, pBuf, iDelta); return 0; } |
︙ | ︙ | |||
4363 4364 4365 4366 4367 4368 4369 | ** even iDelta). ** ** If an error occurs, an error code is left in p->rc. */ static int fts5AppendPoslist( Fts5Index *p, i64 iDelta, | | | 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 | ** even iDelta). ** ** If an error occurs, an error code is left in p->rc. */ static int fts5AppendPoslist( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); assert( pSeg->nPos>0 ); |
︙ | ︙ | |||
4641 4642 4643 4644 4645 4646 4647 | static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ | | | | | 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 | static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); int (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Colset*, Fts5Buffer*); if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ xMerge = fts5MergeRowidLists; xAppend = fts5AppendRowid; }else{ xMerge = fts5MergePrefixLists; xAppend = fts5AppendPoslist; } aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN; int i; i64 iLastRowid = 0; Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; Fts5Buffer doclist; int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; |
︙ | ︙ | |||
4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 | ); } } return rc; } /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5Colset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < > < > > > > > | > > > > > > > > > | > | | | | > | > | 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 | ); } } return rc; } static int fts5IndexExtractColset ( Fts5Colset *pColset, /* Colset to filter on */ const u8 *pPos, int nPos, /* Position list */ Fts5Buffer *pBuf /* Output buffer */ ){ int rc = SQLITE_OK; int i; fts5BufferZero(pBuf); for(i=0; i<pColset->nCol; i++){ const u8 *pSub = pPos; int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); if( nSub ){ fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); } } return rc; } /* ** xSetOutputs callback used by detail=none tables. */ static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); pIter->base.iRowid = pSeg->iRowid; pIter->base.nData = pSeg->nPos; } /* ** xSetOutputs callback used by detail=full and detail=col tables when no ** column filters are specified. */ static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ pIter->base.iRowid = pSeg->iRowid; pIter->base.nData = pSeg->nPos; assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); assert( pIter->pColset==0 || pIter->bFiltered ); if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output ** variables to point into the body of the page object. */ pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point ** to this buffer. */ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); pIter->base.pData = pIter->poslist.p; } } /* ** xSetOutputs callback used by detail=col when there is a column filter ** and there are 100 or more columns. Also called as a fallback from ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. */ static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); pIter->base.iRowid = pSeg->iRowid; pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } /* ** xSetOutputs callback used when: ** ** * detail=col, ** * there is a column filter, and ** * the table contains 100 or fewer columns. ** ** The last point is to ensure all column numbers are stored as ** single-byte varints. */ static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); assert( pIter->pColset ); if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ fts5IterSetOutputs_Col(pIter, pSeg); }else{ u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; u8 *pEnd = (u8*)&a[pSeg->nPos]; int iPrev = 0; int *aiCol = pIter->pColset->aiCol; int *aiColEnd = &aiCol[pIter->pColset->nCol]; u8 *aOut = pIter->poslist.p; int iPrevOut = 0; pIter->base.iRowid = pSeg->iRowid; while( a<pEnd ){ iPrev += (int)a++[0] - 2; while( *aiCol<iPrev ){ aiCol++; if( aiCol==aiColEnd ) goto setoutputs_col_out; } if( *aiCol==iPrev ){ *aOut++ = (iPrev - iPrevOut) + 2; iPrevOut = iPrev; } } setoutputs_col_out: pIter->base.pData = pIter->poslist.p; pIter->base.nData = aOut - pIter->poslist.p; } } /* ** xSetOutputs callback used by detail=full when there is a column filter. */ static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ Fts5Colset *pColset = pIter->pColset; pIter->base.iRowid = pSeg->iRowid; assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); assert( pColset ); if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output ** variables to point into the body of the page object. */ const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; if( pColset->nCol==1 ){ pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); pIter->base.pData = a; }else{ fts5BufferZero(&pIter->poslist); fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist); pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } }else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point ** to this buffer. */ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } } static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ Fts5Config *pConfig = pIter->pIndex->pConfig; if( pConfig->eDetail==FTS5_DETAIL_NONE ){ pIter->xSetOutputs = fts5IterSetOutputs_None; } else if( pIter->pColset==0 || pIter->bFiltered ){ pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; } else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ pIter->xSetOutputs = fts5IterSetOutputs_Full; } else{ assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); if( pConfig->nCol<=100 ){ pIter->xSetOutputs = fts5IterSetOutputs_Col100; sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); }else{ pIter->xSetOutputs = fts5IterSetOutputs_Col; } } } /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5Colset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; Fts5Iter *pRet = 0; Fts5Buffer buf = {0, 0, 0}; /* If the QUERY_SCAN flag is set, all other flags must be clear. */ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this ** is a prefix query for which there is no prefix index, set iIdx to ** greater than pConfig->nPrefix to indicate that the query will be ** satisfied by scanning multiple terms in the main index. ** ** If the QUERY_TEST_NOIDX flag was specified, then this must be a ** prefix-query. Instead of using a prefix-index (if one exists), ** evaluate the prefix query using the main FTS index. This is used ** for internal sanity checking by the integrity-check in debug ** mode only. */ #ifdef SQLITE_DEBUG if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); if( pStruct ){ fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet); fts5StructureRelease(pStruct); } }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); } if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; pRet->pColset = pColset; fts5IterSetOutputCb(&p->rc, pRet); if( p->rc==SQLITE_OK && pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); } if( p->rc ){ sqlite3Fts5IterClose(&pRet->base); pRet = 0; fts5CloseReader(p); } *ppIter = &pRet->base; sqlite3Fts5BufferFree(&buf); } return fts5IndexReturn(p); } /* ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ assert( ((Fts5Iter*)pIter)->pIndex->rc==SQLITE_OK ); return ((Fts5Iter*)pIter)->bEof; } /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching term/rowid. Used by the fts5vocab module. */ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *p = pIter->pIndex; assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(p, pIter, 0, 0); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
︙ | ︙ | |||
5035 5036 5037 5038 5039 5040 5041 | } /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ | | > | | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | | > | 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 | } /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); return fts5IndexReturn(pIter->pIndex); } /* ** Return the current rowid. */ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIndexIter){ return fts5MultiIterRowid((Fts5Iter*)pIndexIter); } /* ** Return the current term. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ int n; const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); *pn = n-1; return &z[1]; } /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ if( pIndexIter ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; fts5MultiIterFree(pIter->pIndex, pIter); fts5CloseReader(pIndex); } } /* |
︙ | ︙ | |||
5324 5325 5326 5327 5328 5329 5330 | const char *z, /* Index key to query for */ int n, /* Size of index key in bytes */ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; | | < | | | < < | | | | | | | | < | | < | 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 | const char *z, /* Index key to query for */ int n, /* Size of index key in bytes */ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; Fts5IndexIter *pIter = 0; int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ i64 rowid = sqlite3Fts5IterRowid(pIter); if( eDetail==FTS5_DETAIL_NONE ){ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); }else{ Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); sReader.bEof==0; sqlite3Fts5PoslistReaderNext(&sReader) ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } } if( rc==SQLITE_OK ){ rc = sqlite3Fts5IterNext(pIter); } } sqlite3Fts5IterClose(pIter); *pCksum = cksum; return rc; } /* |
︙ | ︙ | |||
5657 5658 5659 5660 5661 5662 5663 | ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ int eDetail = p->pConfig->eDetail; u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ | | | 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 | ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ int eDetail = p->pConfig->eDetail; u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5Iter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ #ifdef SQLITE_DEBUG /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ #endif |
︙ | ︙ |
Changes to ext/fts5/fts5_main.c.
︙ | ︙ | |||
534 535 536 537 538 539 540 | aColMap[1] = pConfig->nCol; aColMap[2] = pConfig->nCol+1; /* Set idxFlags flags for all WHERE clause terms that will be used. */ for(i=0; i<pInfo->nConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; int j; | | | 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 | aColMap[1] = pConfig->nCol; aColMap[2] = pConfig->nCol+1; /* Set idxFlags flags for all WHERE clause terms that will be used. */ for(i=0; i<pInfo->nConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; int j; for(j=0; j<ArraySize(aConstraint); j++){ struct Constraint *pC = &aConstraint[j]; if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){ if( p->usable ){ pC->iConsIndex = i; idxFlags |= pC->fts5op; }else if( j==0 ){ /* As there exists an unusable MATCH constraint this is an |
︙ | ︙ | |||
581 582 583 584 585 586 587 | pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0; }else{ pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; } /* Assign argvIndex values to each constraint in use. */ iNext = 1; | | | 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0; }else{ pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; } /* Assign argvIndex values to each constraint in use. */ iNext = 1; for(i=0; i<ArraySize(aConstraint); i++){ struct Constraint *pC = &aConstraint[i]; if( pC->iConsIndex>=0 ){ pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++; pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit; } } |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
334 335 336 337 338 339 340 | */ int sqlite3Fts5StorageClose(Fts5Storage *p){ int rc = SQLITE_OK; if( p ){ int i; /* Finalize all SQL statements */ | | | 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | */ int sqlite3Fts5StorageClose(Fts5Storage *p){ int rc = SQLITE_OK; if( p ){ int i; /* Finalize all SQL statements */ for(i=0; i<ArraySize(p->aStmt); i++){ sqlite3_finalize(p->aStmt[i]); } sqlite3_free(p); } return rc; } |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
1216 1217 1218 1219 1220 1221 1222 | { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ | | | 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 | { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ rc = pApi->xCreateTokenizer(pApi, aBuiltin[i].zName, (void*)pApi, &aBuiltin[i].x, 0 ); } |
︙ | ︙ |
Changes to ext/fts5/fts5_vocab.c.
︙ | ︙ | |||
180 181 182 183 184 185 186 | const char *zType = bDb ? argv[5] : argv[4]; int nDb = (int)strlen(zDb)+1; int nTab = (int)strlen(zTab)+1; int eType = 0; rc = fts5VocabTableType(zType, pzErr, &eType); if( rc==SQLITE_OK ){ | | | 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | const char *zType = bDb ? argv[5] : argv[4]; int nDb = (int)strlen(zDb)+1; int nTab = (int)strlen(zTab)+1; int eType = 0; rc = fts5VocabTableType(zType, pzErr, &eType); if( rc==SQLITE_OK ){ assert( eType>=0 && eType<ArraySize(azSchema) ); rc = sqlite3_declare_vtab(db, azSchema[eType]); } nByte = sizeof(Fts5VocabTable) + nDb + nTab; pRet = sqlite3Fts5MallocZero(&rc, nByte); if( pRet ){ pRet->pGlobal = (Fts5Global*)pAux; |
︙ | ︙ | |||
403 404 405 406 407 408 409 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); memset(pCsr->aCnt, 0, nCol * sizeof(i64)); memset(pCsr->aDoc, 0, nCol * sizeof(i64)); pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ | < > > > | < | | | | | | | | | | | | | | | | | < < < < | | | | | | | | < < | 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); memset(pCsr->aCnt, 0, nCol * sizeof(i64)); memset(pCsr->aDoc, 0, nCol * sizeof(i64)); pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ pPos = pCsr->pIter->pData; nPos = pCsr->pIter->nData; switch( pCsr->pConfig->eDetail ){ case FTS5_DETAIL_FULL: pPos = pCsr->pIter->pData; nPos = pCsr->pIter->nData; if( pTab->eType==FTS5_VOCAB_ROW ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ pCsr->aCnt[0]++; } pCsr->aDoc[0]++; }else{ int iCol = -1; while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ int ii = FTS5_POS2COLUMN(iPos); pCsr->aCnt[ii]++; if( iCol!=ii ){ if( ii>=nCol ){ rc = FTS5_CORRUPT; break; } pCsr->aDoc[ii]++; iCol = ii; } } } break; case FTS5_DETAIL_COLUMNS: if( pTab->eType==FTS5_VOCAB_ROW ){ pCsr->aDoc[0]++; }else{ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ assert_nc( iPos>=0 && iPos<nCol ); if( iPos>=nCol ){ rc = FTS5_CORRUPT; break; } pCsr->aDoc[iPos]++; } } break; default: assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); pCsr->aDoc[0]++; break; |
︙ | ︙ |
Changes to ext/fts5/test/fts5_common.tcl.
︙ | ︙ | |||
44 45 46 47 48 49 50 | for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { $cmd xPhraseForeach $i c o { lappend res $i.$c.$o } } | | > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { $cmd xPhraseForeach $i c o { lappend res $i.$c.$o } } #set res sort_poslist $res } proc fts5_test_collist {cmd} { set res [list] for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { $cmd xPhraseColumnForeach $i c { lappend res $i.$c } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ac.test.
︙ | ︙ | |||
154 155 156 157 158 159 160 | do_execsql_test 1.$tn2.integrity { INSERT INTO xx(xx) VALUES('integrity-check'); } #------------------------------------------------------------------------- # foreach {tn expr} { | < > | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | do_execsql_test 1.$tn2.integrity { INSERT INTO xx(xx) VALUES('integrity-check'); } #------------------------------------------------------------------------- # foreach {tn expr} { 1.1 "a AND b" 1.2 "a OR b" 1.3 "o" 1.4 "b q" 1.5 "e a e" 1.6 "m d g q q b k b w f q q p p" 1.7 "l o o l v v k" 1.8 "a" 1.9 "b" |
︙ | ︙ | |||
245 246 247 248 249 250 251 | } set res [fts5_query_data $expr xx] do_execsql_test 1.$tn2.$tn.[llength $res].asc { SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) FROM xx WHERE xx match $expr } $res | < | 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | } set res [fts5_query_data $expr xx] do_execsql_test 1.$tn2.$tn.[llength $res].asc { SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) FROM xx WHERE xx match $expr } $res set res [fts5_query_data $expr xx DESC] do_execsql_test 1.$tn2.$tn.[llength $res].desc { SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) FROM xx WHERE xx match $expr ORDER BY 1 DESC } $res } |
︙ | ︙ |
Added ext/fts5/test/fts5simple3.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | # 2015 September 05 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5simple3 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } fts5_aux_test_functions db do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, detail=col); INSERT INTO t1 VALUES('a', 'b', 'c'); INSERT INTO t1 VALUES('x', 'x', 'x'); } do_execsql_test 1.1 { SELECT rowid, fts5_test_collist(t1) FROM t1('a:a'); } {1 0.0} do_execsql_test 1.2 { SELECT rowid, fts5_test_collist(t1) FROM t1('b:x'); } {2 0.1} do_execsql_test 1.3 { SELECT rowid, fts5_test_collist(t1) FROM t1('b:a'); } {} finish_test |
Changes to ext/fts5/test/fts5synonym2.test.
︙ | ︙ | |||
23 24 25 26 27 28 29 30 31 32 33 34 35 36 | foreach tok {query document} { foreach_detail_mode $testprefix { fts5_tclnum_register db fts5_aux_test_functions db proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] } sqlite3_fts5_create_function db fts5_rowid fts5_rowid do_execsql_test 1.$tok.0.1 " CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize='tclnum $tok', detail=%DETAIL%); INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()'); | > > > > > > > > > > > > > > > | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | foreach tok {query document} { foreach_detail_mode $testprefix { fts5_tclnum_register db fts5_aux_test_functions db proc fts5_test_bothlist {cmd} { for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { set bFirst 1 $cmd xPhraseColumnForeach $i c { lappend CL $i.$c if {$bFirst} { $cmd xPhraseForeach $i c o { lappend PL $i.$c.$o } } set bFirst 0 } } list [sort_poslist $PL] $CL } sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] } sqlite3_fts5_create_function db fts5_rowid fts5_rowid do_execsql_test 1.$tok.0.1 " CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize='tclnum $tok', detail=%DETAIL%); INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()'); |
︙ | ︙ | |||
85 86 87 88 89 90 91 92 93 94 95 96 97 98 | INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii'); INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4'); INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); } foreach {tn expr} { 1.1 "one" 1.2 "two" 1.3 "three" 1.4 "four" 1.5 "v" 1.6 "vi" 1.7 "vii" 1.8 "viii" 1.9 "9" 1.10 "0" 1.11 "1" 1.12 "2" 2.1 "one OR two OR three OR four" 2.2 "(one AND two) OR (three AND four)" 2.3 "(one AND two) OR (three AND four) NOT five" | > > | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii'); INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4'); INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); } foreach {tn expr} { 2.1 "one OR two OR three OR four" 1.1 "one" 1.2 "two" 1.3 "three" 1.4 "four" 1.5 "v" 1.6 "vi" 1.7 "vii" 1.8 "viii" 1.9 "9" 1.10 "0" 1.11 "1" 1.12 "2" 2.1 "one OR two OR three OR four" 2.2 "(one AND two) OR (three AND four)" 2.3 "(one AND two) OR (three AND four) NOT five" |
︙ | ︙ | |||
109 110 111 112 113 114 115 | if {[fts5_expr_ok $expr ss]==0} { do_test 1.$tok.$tn.OMITTED { list } [list] continue } set res [fts5_query_data $expr ss ASC ::tclnum_syn] do_execsql_test 1.$tok.$tn.[llength $res].asc.1 { | | > > > > > > > > > > > > > > > > > > | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | if {[fts5_expr_ok $expr ss]==0} { do_test 1.$tok.$tn.OMITTED { list } [list] continue } set res [fts5_query_data $expr ss ASC ::tclnum_syn] do_execsql_test 1.$tok.$tn.[llength $res].asc.1 { SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr) } $res do_execsql_test 1.$tok.$tn.[llength $res].asc.2 { SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) } $res do_execsql_test 1.$tok.$tn.[llength $res].asc.2 { SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr) ORDER BY rank ASC } $res set res2 [list] foreach {a b c} $res { lappend res2 $a $c $b } do_execsql_test 1.$tok.$tn.[llength $res].asc.3 { SELECT rowid, fts5_test_collist(ss), fts5_test_poslist2(ss) FROM ss($expr) } $res2 set res3 [list] foreach {a b c} $res { lappend res3 $a [list $b $c] } do_execsql_test 1.$tok.$tn.[llength $res].asc.3 { SELECT rowid, fts5_test_bothlist(ss) FROM ss($expr) } $res3 } } } finish_test |
Changes to ext/fts5/tool/fts5speed.tcl.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | set Q { {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"} {25 "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"} {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} } proc usage {} { global Q puts stderr "Usage: $::argv0 DATABASE QUERY" puts stderr "" for {set i 1} {$i <= [llength $Q]} {incr i} { | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | set Q { {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"} {25 "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"} {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"} } proc usage {} { global Q puts stderr "Usage: $::argv0 DATABASE QUERY" puts stderr "" for {set i 1} {$i <= [llength $Q]} {incr i} { |
︙ | ︙ |
Changes to ext/fts5/tool/fts5txt2db.tcl.
1 2 |
| > > > | > | | > > > > > > > > > > | | > > > > > > | > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < | < < < < < < < < < < < < < < < < < < < < < < < | | < | < | | | < < < < < < | | | | | > > > > > > > > | | | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | #------------------------------------------------------------------------- # Command line options processor. # proc command_line_error {O E {msg ""}} { if {$msg != ""} { puts stderr "Error: $msg" puts stderr "" } set L [list] foreach o $O { if {[llength $o]==1} { lappend L [string toupper $o] } } puts stderr "Usage: $::argv0 ?SWITCHES? $L" puts stderr "" puts stderr "Switches are:" foreach o $O { if {[llength $o]==3} { foreach {a b c} $o {} puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] } elseif {[llength $o]==2} { foreach {a b} $o {} puts stderr [format " -%-15s %s" $a $b] } } puts stderr "" puts stderr $E exit -1 } proc process_command_line {avar lArgs O E} { upvar $avar A set zTrailing "" ;# True if ... is present in $O set lPosargs [list] # Populate A() with default values. Also, for each switch in the command # line spec, set an entry in the idx() array as follows: # # {tblname t1 "table name to use"} # -> [set idx(-tblname) {tblname t1 "table name to use"} # # For each position parameter, append its name to $lPosargs. If the ... # specifier is present, set $zTrailing to the name of the prefix. # foreach o $O { set nm [lindex $o 0] set nArg [llength $o] switch -- $nArg { 1 { if {[string range $nm end-2 end]=="..."} { set zTrailing [string range $nm 0 end-3] } else { lappend lPosargs $nm } } 2 { set A($nm) 0 set idx(-$nm) $o } 3 { set A($nm) [lindex $o 1] set idx(-$nm) $o } default { error "Error in command line specification" } } } # Set explicitly specified option values # set nArg [llength $lArgs] for {set i 0} {$i < $nArg} {incr i} { set opt [lindex $lArgs $i] if {[string range $opt 0 0]!="-" || $opt=="--"} break set c [array names idx "${opt}*"] if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"} if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"} if {[llength $idx($c)]==3} { if {$i==[llength $lArgs]-1} { command_line_error $O $E "Option requires argument: $c" } incr i set A([lindex $idx($c) 0]) [lindex $lArgs $i] } else { set A([lindex $idx($c) 0]) 1 } } # Deal with position arguments. # set nPosarg [llength $lPosargs] set nRem [expr $nArg - $i] if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { command_line_error $O $E } for {set j 0} {$j < $nPosarg} {incr j} { set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] } if {$zTrailing!=""} { set A($zTrailing) [lrange $lArgs [expr $j+$i] end] } } # End of command line options processor. #------------------------------------------------------------------------- process_command_line A $argv { {fts5 "use fts5"} {fts4 "use fts4"} {colsize "10 10 10" "list of column sizes"} {tblname "t1" "table name to create"} {detail "full" "Fts5 detail mode to use"} {repeat 1 "Load each file this many times"} database file... } { This script is designed to create fts4/5 tables with more than one column. The -colsize option should be set to a Tcl list of integer values, one for each column in the table. Each value is the number of tokens that will be inserted into the column value for each row. For example, setting the -colsize option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 tokens per row in each, respectively. Each "FILE" argument should be a text file. The contents of these text files is split on whitespace characters to form a list of tokens. The first N1 tokens are used for the first column of the first row, where N1 is the first element of the -colsize list. The next N2 are used for the second column of the first row, and so on. Rows are added to the table until the entire list of tokens is exhausted. } if {$A(fts4)} { set A(fts) fts4 } else { set A(fts) fts5 } sqlite3 db $A(database) # Create the FTS table in the db. Return a list of the table columns. # proc create_table {} { global A set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] set nCol [llength $A(colsize)] set cols [lrange $cols 0 [expr $nCol-1]] set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" append sql [join $cols ,] if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" } db eval $sql return $cols } # Return a list of tokens from the named file. # proc readfile {file} { set fd [open $file] set data [read $fd] close $fd split $data } proc repeat {L n} { set res [list] for {set i 0} {$i < $n} {incr i} { set res [concat $res $L] } set res } # Load all the data into a big list of tokens. # set tokens [list] foreach f $A(file) { set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] } set N [llength $tokens] set i 0 set cols [create_table] set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" foreach c [lrange $cols 1 end] { append sql ", \$R($c)" } append sql ")" db eval BEGIN while {$i < $N} { foreach c $cols s $A(colsize) { set R($c) [lrange $tokens $i [expr $i+$s-1]] incr i $s } db eval $sql } db eval COMMIT |