/ Changes On Branch fts5-perf
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch fts5-perf Excluding Merge-Ins

This is equivalent to a diff from 3be336aa89 to b4ac61aeee

2016-01-26
20:19
Performance improvements for fts5, particularly detail=col mode. (check-in: a3d7b8ac53 user: dan tags: trunk)
20:08
Further minor performance improvements and code-size reductions related to fts5 column filters on detail=col tables. (Leaf check-in: b4ac61aeee user: dan tags: fts5-perf)
19:30
Improve the performance of fts5 column filters on detail=col tables. (check-in: 249a2d070c user: dan tags: fts5-perf)
2016-01-23
18:24
Changes to spellfix to try to get it to use stack space instead of heap space in cases where that makes sense. (check-in: dfcebc7393 user: drh tags: trunk)
16:20
Merge trunk changes (including fixes for warnings in fts5) with this branch. (check-in: ceccc9ad78 user: dan tags: fts5-perf)
15:57
Fix some signed/unsigned comparison compiler warnings in fts5. (check-in: 3be336aa89 user: dan tags: trunk)
14:05
Remove an assert() that can be false if compiled with SQLITE_USE_ALLOCA. (check-in: f0a551edf8 user: drh tags: trunk)

Changes to ext/fts5/fts5Int.h.

25
26
27
28
29
30
31
32

33
34
35
36
37
38
39
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39







-
+








typedef unsigned char  u8;
typedef unsigned int   u32;
typedef unsigned short u16;
typedef sqlite3_int64 i64;
typedef sqlite3_uint64 u64;

#define ArraySize(x) (sizeof(x) / sizeof(x[0]))
#define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))

#define testcase(x)
#define ALWAYS(x) 1
#define NEVER(x) 0

#define MIN(x,y) (((x) < (y)) ? (x) : (y))
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
311
312
313
314
315
316
317






318
319
320
321
322
323
324
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330







+
+
+
+
+
+







** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/

typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;

struct Fts5IndexIter {
  i64 iRowid;
  const u8 *pData;
  int nData;
};

/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX     0x0001   /* Prefix query */
#define FTS5INDEX_QUERY_DESC       0x0002   /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004   /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN       0x0008   /* Scan query (fts5vocab) */
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
384
385
386
387
388
389
390


391
392
393
394
395
396
397







-
-







** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*,Fts5Colset*, const u8**, int*, i64*);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
469
470
471
472
473
474
475


476
477
478
479
480
481
482







-
-








int sqlite3Fts5IndexReinit(Fts5Index *p);
int sqlite3Fts5IndexOptimize(Fts5Index *p);
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);

int sqlite3Fts5IndexLoadConfig(Fts5Index *p);

int sqlite3Fts5IterCollist(Fts5IndexIter*, const u8 **, int*);

/*
** End of interface to code in fts5_index.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_varint.c. 
*/

Changes to ext/fts5/fts5_aux.c.

540
541
542
543
544
545
546
547

548
549
550
551
552
553
554
540
541
542
543
544
545
546

547
548
549
550
551
552
553
554







-
+







    { "snippet",   0, fts5SnippetFunction, 0 },
    { "highlight", 0, fts5HighlightFunction, 0 },
    { "bm25",      0, fts5Bm25Function,    0 },
  };
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){
  for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
    rc = pApi->xCreateFunction(pApi,
        aBuiltin[i].zFunc,
        aBuiltin[i].pUserData,
        aBuiltin[i].xFunc,
        aBuiltin[i].xDestroy
    );
  }

Changes to ext/fts5/fts5_buffer.c.

318
319
320
321
322
323
324
325

326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342

343
344
345
346
347
348
349
318
319
320
321
322
323
324

325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

342
343
344
345
346
347
348
349







-
+
















-
+







  const char *pTerm, int nTerm, 
  int *pbPresent
){
  int rc = SQLITE_OK;
  *pbPresent = 0;
  if( p ){
    int i;
    int hash = 13;
    u32 hash = 13;
    Fts5TermsetEntry *pEntry;

    /* Calculate a hash value for this term. This is the same hash checksum
    ** used by the fts5_hash.c module. This is not important for correct
    ** operation of the module, but is necessary to ensure that some tests
    ** designed to produce hash table collisions really do work.  */
    for(i=nTerm-1; i>=0; i--){
      hash = (hash << 3) ^ hash ^ pTerm[i];
    }
    hash = (hash << 3) ^ hash ^ iIdx;
    hash = hash % ArraySize(p->apHash);

    for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
      if( pEntry->iIdx==iIdx 
          && pEntry->nTerm==nTerm 
          && memcmp(pEntry->pTerm, pTerm, nTerm)==0 
        ){
      ){
        *pbPresent = 1;
        break;
      }
    }

    if( pEntry==0 ){
      pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);

Changes to ext/fts5/fts5_expr.c.

302
303
304
305
306
307
308
309

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

335
336
337
338
339
340
341
342
343
344
345
346
347

348
349
350
351
352
353
354
355
356
357
358
359
360

361
362
363
364
365
366
367
368
369
370
371
372
373
374
375

376
377
378

379
380
381
382


383
384
385
386
387
388
389
390
302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
319
320
321
322












323
324
325
326
327
328
329
330
331
332
333
334
335

336
337
338
339
340
341

342
343
344
345
346

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362

363
364
365

366




367
368

369
370
371
372
373
374
375







-
+













-
-
-
-
-
-
-
-
-
-
-
-
+












-
+





-





-

+














-
+


-
+
-
-
-
-
+
+
-







** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymList(
  Fts5ExprTerm *pTerm, 
  int bCollist, 
  Fts5Colset *pColset,
  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  Fts5Buffer *pBuf,               /* Use this buffer for space if required */
  u8 **pa, int *pn
){
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int nIter = 0;
  int nAlloc = 4;
  int rc = SQLITE_OK;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;

      if( bCollist ){
        rc = sqlite3Fts5IterCollist(pIter, &a, &n);
      }else{
        i64 dummy;
        rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy);
      }

      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
      if( n==0 ) continue;
      if( pIter->nData==0 ) continue;
      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
        nAlloc = nAlloc*2;
        if( aIter!=aStatic ) sqlite3_free(aIter);
        aIter = aNew;
      }
      sqlite3Fts5PoslistReaderInit(a, n, &aIter[nIter]);
      sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
      assert( aIter[nIter].bEof==0 );
      nIter++;
    }
  }

  assert( *pbDel==0 );
  if( nIter==1 ){
    *pa = (u8*)aIter[0].a;
    *pn = aIter[0].n;
  }else{
    Fts5PoslistWriter writer = {0};
    Fts5Buffer buf = {0,0,0};
    i64 iPrev = -1;
    fts5BufferZero(pBuf);
    while( 1 ){
      int i;
      i64 iMin = FTS5_LARGEST_INT64;
      for(i=0; i<nIter; i++){
        if( aIter[i].bEof==0 ){
          if( aIter[i].iPos==iPrev ){
            if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
          }
          if( aIter[i].iPos<iMin ){
            iMin = aIter[i].iPos;
          }
        }
      }
      if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
      rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
      rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
      iPrev = iMin;
    }
    if( rc ){
    if( rc==SQLITE_OK ){
      sqlite3_free(buf.p);
    }else{
      *pa = buf.p;
      *pn = buf.n;
      *pa = pBuf->p;
      *pn = pBuf->n;
      *pbDel = 1;
    }
  }

 synonym_poslist_out:
  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
}
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
428
429
430
431
432
433

434

435
436

437





438

439

440
441
442
443
444
445
446
447
448
398
399
400
401
402
403
404

405
406
407
408
409
410
411
412
413
414

415
416

417
418
419
420

421
422
423
424
425
426
427
428
429

430
431

432
433
434
435
436
437
438







-
+









-


-
+

+

-
+

+
+
+
+
+

+
-
+

-







  int i;
  int rc = SQLITE_OK;
  
  fts5BufferZero(&pPhrase->poslist);

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(int)ArraySize(aStatic) ){
  if( pPhrase->nTerm>ArraySize(aStatic) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }
  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    u8 *a = 0;
    if( pTerm->pSynonym ){
      Fts5Buffer buf = {0, 0, 0};
      rc = fts5ExprSynonymList(
          pTerm, 0, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n
          pTerm, 0, pColset, pNode->iRowid, &buf, &a, &n
      );
      if( rc ){
        sqlite3_free(a);
        goto ismatch_out;
      }
      if( a==buf.p ) bFlag = 1;
    }else{
      a = (u8*)pTerm->pIter->pData;
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy);
      n = pTerm->pIter->nData;
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
    aIter[i].bFlag = (u8)bFlag;
    if( aIter[i].bEof ) goto ismatch_out;
  }

  while( 1 ){
    int bMatch;
549
550
551
552
553
554
555
556

557
558
559
560
561
562
563
539
540
541
542
543
544
545

546
547
548
549
550
551
552
553







-
+







  int rc = *pRc;
  int bMatch;

  assert( pNear->nPhrase>1 );

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pNear->nPhrase>(int)ArraySize(aStatic) ){
  if( pNear->nPhrase>ArraySize(aStatic) ){
    int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
    a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
  }else{
    memset(aStatic, 0, sizeof(aStatic));
  }
  if( rc!=SQLITE_OK ){
    *pRc = rc;
771
772
773
774
775
776
777

778
779
780
781
782
783
784
785
786




787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802



803
804
805

806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827

828
829
830
831
832
833

834
835
836
837



838


839
840

841
842
843
844
845
846
847
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824


825
826


827
828

829
830
831


832
833
834

835
836
837

838
839
840
841
842
843
844
845







+









+
+
+
+
















+
+
+



+




















-
-
+

-
-


-
+


-
-
+
+
+
-
+
+

-
+







  if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
    Fts5ExprTerm *pTerm;
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    pPhrase->poslist.n = 0;
    for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
      Fts5IndexIter *pIter = pTerm->pIter;
      if( sqlite3Fts5IterEof(pIter)==0 ){
#if 0
        int n;
        i64 iRowid;
        rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid);
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return 0;
        }else if( iRowid==pNode->iRowid && n>0 ){
          pPhrase->poslist.n = 1;
        }
#endif
        if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
          pPhrase->poslist.n = 1;
        }
      }
    }
    return pPhrase->poslist.n;
  }else{
    int i;

    /* Check that each phrase in the nearset matches the current row.
    ** Populate the pPhrase->poslist buffers at the same time. If any
    ** phrase is not a match, break out of the loop early.  */
    for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
        int bMatch = 0;
        rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
        if( bMatch==0 ) break;
      }else{
        Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
        fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
#if 0
        rc = sqlite3Fts5IterPoslistBuffer(
            pPhrase->aTerm[0].pIter, &pPhrase->poslist
        );
#endif
      }
    }

    *pRc = rc;
    if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
      return 1;
    }
    return 0;
  }
}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
){
  /* As this "NEAR" object is actually a single phrase that consists 
  ** of a single term only, grab pointers into the poslist managed by the
  ** fts5_index.c iterator object. This is much faster than synthesizing 
  ** a new poslist the way we have to for more complicated phrase or NEAR
  ** expressions.  */
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
  Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
  Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
  Fts5Colset *pColset = pNear->pColset;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, pColset, 
      (const u8**)&pPhrase->poslist.p, (int*)&pPhrase->poslist.n, &pNode->iRowid
  pPhrase->poslist.n = pIter->nData;
  if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
    pPhrase->poslist.p = (u8*)pIter->pData;
  );
  }
  pNode->iRowid = pIter->iRowid;
  pNode->bNomatch = (pPhrase->poslist.n==0);
  return rc;
  return SQLITE_OK;
}

/*
** All individual term iterators in pNear are guaranteed to be valid when
** this function is called. This function checks if all term iterators
** point to the same rowid, and if not, advances them until they do.
** If an EOF is reached before this happens, *pbEof is set to true before
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376

1377
1378
1379
1380
1381
1382
1383
1364
1365
1366
1367
1368
1369
1370

1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381







-



+







    int i;
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pSyn;
      Fts5ExprTerm *pNext;
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      sqlite3Fts5IterClose(pTerm->pIter);

      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
        pNext = pSyn->pSynonym;
        sqlite3Fts5IterClose(pSyn->pIter);
        fts5BufferFree((Fts5Buffer*)&pSyn[1]);
        sqlite3_free(pSyn);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}
1457
1458
1459
1460
1461
1462
1463
1464

1465
1466
1467
1468
1469
1470

1471
1472
1473
1474
1475
1476
1477
1455
1456
1457
1458
1459
1460
1461

1462
1463
1464
1465
1466
1467

1468
1469
1470
1471
1472
1473
1474
1475







-
+





-
+








  /* If an error has already occurred, this is a no-op */
  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pSyn, 0, nByte);
      pSyn->zTerm = (char*)&pSyn[1];
      pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
      memcpy(pSyn->zTerm, pToken, nToken);
      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
    }
  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
2242
2243
2244
2245
2246
2247
2248
2249

2250
2251
2252
2253
2254
2255
2256
2240
2241
2242
2243
2244
2245
2246

2247
2248
2249
2250
2251
2252
2253
2254







-
+







    { "fts5_isalnum",  fts5ExprIsAlnum },
    { "fts5_fold",     fts5ExprFold },
  };
  int i;
  int rc = SQLITE_OK;
  void *pCtx = (void*)pGlobal;

  for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aFunc); i++){
  for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
    struct Fts5ExprFunc *p = &aFunc[i];
    rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
  }

  /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */
#ifndef NDEBUG
  (void)sqlite3Fts5ParserTrace;
2480
2481
2482
2483
2484
2485
2486


2487
2488
2489
2490
2491
2492
2493

2494
2495
2496

2497
2498
2499
2500
2501
2502
2503


2504
2505
2506

2507
2508
2509
2510
2511
2512
2513
2514
2515
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492

2493

2494

2495
2496






2497
2498



2499
2500
2501
2502
2503
2504
2505
2506
2507
2508







+
+






-
+
-

-
+

-
-
-
-
-
-
+
+
-
-
-
+









  int *pnCollist
){
  Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
  Fts5ExprNode *pNode = pPhrase->pNode;
  int rc = SQLITE_OK;

  assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
  assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );

  if( pNode->bEof==0 
   && pNode->iRowid==pExpr->pRoot->iRowid 
   && pPhrase->poslist.n>0
  ){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
    if( pTerm->pSynonym ){
      int bDel = 0;
      Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
      u8 *a;
      rc = fts5ExprSynonymList(
          pTerm, 1, 0, pNode->iRowid, &bDel, &a, pnCollist
          pTerm, 1, 0, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
      );
      if( bDel ){
        sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, *pnCollist, a);
        *ppCollist = pPhrase->poslist.p;
        sqlite3_free(a);
      }else{
        *ppCollist = a;
    }else{
      *ppCollist = pPhrase->aTerm[0].pIter->pData;
      }
    }else{
      sqlite3Fts5IterCollist(pPhrase->aTerm[0].pIter, ppCollist, pnCollist);
      *pnCollist = pPhrase->aTerm[0].pIter->nData;
    }
  }else{
    *ppCollist = 0;
    *pnCollist = 0;
  }

  return rc;
}

Changes to ext/fts5/fts5_index.c.

257
258
259
260
261
262
263

264
265
266
267
268
269
270
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271







+







#define FTS5_DATA_ZERO_PADDING 8
#define FTS5_DATA_PADDING 20

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5Iter Fts5Iter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;
499
500
501
502
503
504
505
506



507
508
509




510
511
512
513
514
515
516
500
501
502
503
504
505
506

507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523







-
+
+
+



+
+
+
+







** aFirst[1] contains the index in aSeg[] of the iterator that points to
** the smallest key overall. aFirst[0] is unused. 
**
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
struct Fts5Iter {
  Fts5IndexIter base;             /* Base class containing output vars */

  Fts5Index *pIndex;              /* Index that owns this iterator */
  Fts5Structure *pStruct;         /* Database structure for this iterator */
  Fts5Buffer poslist;             /* Buffer containing current poslist */
  Fts5Colset *pColset;            /* Restrict matches to these columns */

  /* Invoked to set output variables. */
  void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);

  int nSeg;                       /* Size of aSeg[] array */
  int bRev;                       /* True to iterate in reverse order */
  u8 bSkipEmpty;                  /* True to skip deleted entries */
  u8 bEof;                        /* True at EOF */
  u8 bFiltered;                   /* True if column-filter already applied */

1748
1749
1750
1751
1752
1753
1754
1755

1756
1757
1758
1759
1760
1761
1762
1755
1756
1757
1758
1759
1760
1761

1762
1763
1764
1765
1766
1767
1768
1769







-
+







}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
  Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
}

/*
** Advance iterator pIter to the next entry.
**
2402
2403
2404
2405
2406
2407
2408
2409

2410
2411
2412
2413
2414
2415
2416
2409
2410
2411
2412
2413
2414
2415

2416
2417
2418
2419
2420
2421
2422
2423







-
+







/*
** This function is used as part of the big assert() procedure implemented by
** fts5AssertMultiIterSetup(). It ensures that the result currently stored
** in *pRes is the correct result of comparing the current positions of the
** two iterators.
*/
static void fts5AssertComparisonResult(
  Fts5IndexIter *pIter, 
  Fts5Iter *pIter, 
  Fts5SegIter *p1,
  Fts5SegIter *p2,
  Fts5CResult *pRes
){
  int i1 = p1 - pIter->aSeg;
  int i2 = p2 - pIter->aSeg;

2443
2444
2445
2446
2447
2448
2449
2450

2451
2452
2453
2454
2455
2456
2457
2450
2451
2452
2453
2454
2455
2456

2457
2458
2459
2460
2461
2462
2463
2464







-
+








/*
** This function is a no-op unless SQLITE_DEBUG is defined when this module
** is compiled. In that case, this function is essentially an assert() 
** statement used to verify that the contents of the pIter->aFirst[] array
** are correct.
*/
static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){
static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    int i;

    assert( (pFirst->pLeaf==0)==pIter->bEof );

    /* Check that pIter->iSwitchRowid is set correctly. */
2488
2489
2490
2491
2492
2493
2494
2495

2496
2497
2498
2499
2500
2501
2502
2495
2496
2497
2498
2499
2500
2501

2502
2503
2504
2505
2506
2507
2508
2509







-
+







** Do the comparison necessary to populate pIter->aFirst[iOut].
**
** If the returned value is non-zero, then it is the index of an entry
** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
** to a key that is a duplicate of another, higher priority, 
** segment-iterator in the pSeg->aSeg[] array.
*/
static int fts5MultiIterDoCompare(Fts5IndexIter *pIter, int iOut){
static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
  int i1;                         /* Index of left-hand Fts5SegIter */
  int i2;                         /* Index of right-hand Fts5SegIter */
  int iRes;
  Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
  Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
  Fts5CResult *pRes = &pIter->aFirst[iOut];

2634
2635
2636
2637
2638
2639
2640
2641

2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655

2656
2657
2658
2659
2660
2661
2662
2641
2642
2643
2644
2645
2646
2647

2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661

2662
2663
2664
2665
2666
2667
2668
2669







-
+













-
+







  }while( p->rc==SQLITE_OK );
}


/*
** Free the iterator object passed as the second argument.
*/
static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
static void fts5MultiIterFree(Fts5Index *p, Fts5Iter *pIter){
  if( pIter ){
    int i;
    for(i=0; i<pIter->nSeg; i++){
      fts5SegIterClear(&pIter->aSeg[i]);
    }
    fts5StructureRelease(pIter->pStruct);
    fts5BufferFree(&pIter->poslist);
    sqlite3_free(pIter);
  }
}

static void fts5MultiIterAdvanced(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5IndexIter *pIter,           /* Iterator to update aFirst[] array for */
  Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
  int iChanged,                   /* Index of sub-iterator just advanced */
  int iMinset                     /* Minimum entry in aFirst[] to set */
){
  int i;
  for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
    int iEq;
    if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2676
2677
2678
2679
2680
2681
2682
2683
2684



2685
2686
2687
2688
2689
2690
2691
2683
2684
2685
2686
2687
2688
2689


2690
2691
2692
2693
2694
2695
2696
2697
2698
2699







-
-
+
+
+







**
** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
** on the iterator instead. That function does the same as this one, except
** that it deals with more complicated cases as well.
*/ 
static int fts5MultiIterAdvanceRowid(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5IndexIter *pIter,           /* Iterator to update aFirst[] array for */
  int iChanged                    /* Index of sub-iterator just advanced */
  Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
  int iChanged,                   /* Index of sub-iterator just advanced */
  Fts5SegIter **ppFirst
){
  Fts5SegIter *pNew = &pIter->aSeg[iChanged];

  if( pNew->iRowid==pIter->iSwitchRowid
   || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
  ){
    int i;
2710
2711
2712
2713
2714
2715
2716

2717
2718
2719
2720
2721
2722
2723

2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738

2739
2740
2741
2742
2743

2744
2745
2746
2747
2748
2749
2750
2751
2752
2753










2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
















2764
2765
2766
2767
2768
2769
2770

2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783

2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796

2797
2798



2799
2800
2801
2802

2803
2804
2805
2806
2807

2808
2809
2810
2811
2812
2813
2814

2815
2816
2817
2818
2819
2820

2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838

2839
2840
2841
2842
2843
2844
2845

2846
2847
2848
2849
2850
2851
2852
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731

2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746

2747
2748
2749
2750


2751










2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762









2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778

2779
2780
2781
2782
2783

2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796

2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811


2812
2813
2814
2815
2816
2817

2818
2819
2820
2821
2822

2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836

2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854

2855
2856
2857
2858
2859
2860
2861

2862
2863
2864
2865
2866
2867
2868
2869







+






-
+














-
+



-
-
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-





-
+












-
+













+
-
-
+
+
+



-
+




-
+







+





-
+

















-
+






-
+







      pRes->iFirst = (u16)(pNew - pIter->aSeg);
      if( i==1 ) break;

      pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
    }
  }

  *ppFirst = pNew;
  return 0;
}

/*
** Set the pIter->bEof variable based on the state of the sub-iterators.
*/
static void fts5MultiIterSetEof(Fts5IndexIter *pIter){
static void fts5MultiIterSetEof(Fts5Iter *pIter){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  pIter->bEof = pSeg->pLeaf==0;
  pIter->iSwitchRowid = pSeg->iRowid;
}

/*
** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 
** considered an error if the iterator reaches EOF, or if it is already at 
** EOF when this function is called.
*/
static void fts5MultiIterNext(
  Fts5Index *p, 
  Fts5IndexIter *pIter,
  Fts5Iter *pIter,
  int bFrom,                      /* True if argument iFrom is valid */
  i64 iFrom                       /* Advance at least as far as this */
){
  if( p->rc==SQLITE_OK ){
    int bUseFrom = bFrom;
  int bUseFrom = bFrom;
    do {
      int iFirst = pIter->aFirst[1].iFirst;
      int bNewTerm = 0;
      Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
      assert( p->rc==SQLITE_OK );
      if( bUseFrom && pSeg->pDlidx ){
        fts5SegIterNextFrom(p, pSeg, iFrom);
      }else{
        pSeg->xNext(p, pSeg, &bNewTerm);
      }
  while( p->rc==SQLITE_OK ){
    int iFirst = pIter->aFirst[1].iFirst;
    int bNewTerm = 0;
    Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
    assert( p->rc==SQLITE_OK );
    if( bUseFrom && pSeg->pDlidx ){
      fts5SegIterNextFrom(p, pSeg, iFrom);
    }else{
      pSeg->xNext(p, pSeg, &bNewTerm);
    }

      if( pSeg->pLeaf==0 || bNewTerm 
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
        fts5MultiIterSetEof(pIter);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    if( pSeg->pLeaf==0 || bNewTerm 
     || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg)
    ){
      fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      fts5MultiIterSetEof(pIter);
      pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
      if( pSeg->pLeaf==0 ) return;
    }

    fts5AssertMultiIterSetup(p, pIter);
    assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
    if( pIter->bSkipEmpty==0 || pSeg->nPos ){
      pIter->xSetOutputs(pIter, pSeg);
      return;
    }
    bUseFrom = 0;
    }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );
  }
}

static void fts5MultiIterNext2(
  Fts5Index *p, 
  Fts5IndexIter *pIter,
  Fts5Iter *pIter,
  int *pbNewTerm                  /* OUT: True if *might* be new term */
){
  assert( pIter->bSkipEmpty );
  if( p->rc==SQLITE_OK ){
    do {
      int iFirst = pIter->aFirst[1].iFirst;
      Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
      int bNewTerm = 0;

      assert( p->rc==SQLITE_OK );
      pSeg->xNext(p, pSeg, &bNewTerm);
      if( pSeg->pLeaf==0 || bNewTerm 
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst, &pSeg)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
        fts5MultiIterSetEof(pIter);
        *pbNewTerm = 1;
      }else{
        *pbNewTerm = 0;
      }
      fts5AssertMultiIterSetup(p, pIter);

    }while( fts5MultiIterIsEmpty(p, pIter) );
  }
}

static void fts5IterSetOutputs_Noop(Fts5Iter *pIter, Fts5SegIter *pSeg){

static Fts5IndexIter *fts5MultiIterAlloc(
}

static Fts5Iter *fts5MultiIterAlloc(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  int nSeg
){
  Fts5IndexIter *pNew;
  Fts5Iter *pNew;
  int nSlot;                      /* Power of two >= nSeg */

  for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
  pNew = fts5IdxMalloc(p, 
      sizeof(Fts5IndexIter) +             /* pNew */
      sizeof(Fts5Iter) +                  /* pNew */
      sizeof(Fts5SegIter) * (nSlot-1) +   /* pNew->aSeg[] */
      sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
  );
  if( pNew ){
    pNew->nSeg = nSlot;
    pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
    pNew->pIndex = p;
    pNew->xSetOutputs = fts5IterSetOutputs_Noop;
  }
  return pNew;
}

/*
** Allocate a new Fts5IndexIter object.
** Allocate a new Fts5Iter object.
**
** The new object will be used to iterate through data in structure pStruct.
** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
** is zero or greater, data from the first nSegment segments on level iLevel
** is merged.
**
** The iterator initially points to the first term/rowid entry in the 
** iterated data.
*/
static void fts5MultiIterNew(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5Structure *pStruct,         /* Structure of specific index */
  int bSkipEmpty,                 /* True to ignore delete-keys */
  int flags,                      /* FTS5INDEX_QUERY_XXX flags */
  const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
  int iLevel,                     /* Level to iterate (-1 for all) */
  int nSegment,                   /* Number of segments to merge (iLevel>=0) */
  Fts5IndexIter **ppOut           /* New object */
  Fts5Iter **ppOut                /* New object */
){
  int nSeg = 0;                   /* Number of segment-iters in use */
  int iIter = 0;                  /* */
  int iSeg;                       /* Used to iterate through segments */
  Fts5Buffer buf = {0,0,0};       /* Buffer used by fts5SegIterSeekInit() */
  Fts5StructureLevel *pLvl;
  Fts5IndexIter *pNew;
  Fts5Iter *pNew;

  assert( (pTerm==0 && nTerm==0) || iLevel<0 );

  /* Allocate space for the new multi-seg-iterator. */
  if( p->rc==SQLITE_OK ){
    if( iLevel<0 ){
      assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
2913
2914
2915
2916
2917
2918
2919
2920

2921
2922
2923
2924
2925
2926
2927

2928
2929

2930
2931
2932
2933
2934
2935
2936
2930
2931
2932
2933
2934
2935
2936

2937
2938
2939
2940
2941
2942
2943

2944
2945

2946
2947
2948
2949
2950
2951
2952
2953







-
+






-
+

-
+







    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
  fts5BufferFree(&buf);
}

/*
** Create an Fts5IndexIter that iterates through the doclist provided
** Create an Fts5Iter that iterates through the doclist provided
** as the second argument.
*/
static void fts5MultiIterNew2(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5Data *pData,                /* Doclist to iterate through */
  int bDesc,                      /* True for descending rowid order */
  Fts5IndexIter **ppOut           /* New object */
  Fts5Iter **ppOut                /* New object */
){
  Fts5IndexIter *pNew;
  Fts5Iter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];

    pNew->bFiltered = 1;
    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->szLeaf>0 ){
2957
2958
2959
2960
2961
2962
2963
2964

2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976

2977
2978
2979
2980
2981
2982
2983
2984
2985
2986

2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003

3004
3005
3006
3007
3008
3009
3010
2974
2975
2976
2977
2978
2979
2980

2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992

2993
2994
2995
2996
2997
2998
2999
3000
3001
3002

3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019

3020
3021
3022
3023
3024
3025
3026
3027







-
+











-
+









-
+
















-
+







  fts5DataRelease(pData);
}

/*
** Return true if the iterator is at EOF or if an error has occurred. 
** False otherwise.
*/
static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){
static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
  assert( p->rc 
      || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof 
  );
  return (p->rc || pIter->bEof);
}

/*
** Return the rowid of the entry that the iterator currently points
** to. If the iterator points to EOF when this function is called the
** results are undefined.
*/
static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){
static i64 fts5MultiIterRowid(Fts5Iter *pIter){
  assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
  return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
}

/*
** Move the iterator to the next entry at or following iMatch.
*/
static void fts5MultiIterNextFrom(
  Fts5Index *p, 
  Fts5IndexIter *pIter, 
  Fts5Iter *pIter, 
  i64 iMatch
){
  while( 1 ){
    i64 iRowid;
    fts5MultiIterNext(p, pIter, 1, iMatch);
    if( fts5MultiIterEof(p, pIter) ) break;
    iRowid = fts5MultiIterRowid(pIter);
    if( pIter->bRev==0 && iRowid>=iMatch ) break;
    if( pIter->bRev!=0 && iRowid<=iMatch ) break;
  }
}

/*
** Return a pointer to a buffer containing the term associated with the 
** entry that the iterator currently points to.
*/
static const u8 *fts5MultiIterTerm(Fts5IndexIter *pIter, int *pn){
static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  *pn = p->term.n;
  return p->term.p;
}

static void fts5ChunkIterate(
  Fts5Index *p,                   /* Index object */
3578
3579
3580
3581
3582
3583
3584
3585

3586
3587
3588
3589
3590
3591
3592
3595
3596
3597
3598
3599
3600
3601

3602
3603
3604
3605
3606
3607
3608
3609







-
+







}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
  int i;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(Fts5Buffer));
  for(i=0; i<pIter->nSeg; i++){
    Fts5SegIter *pSeg = &pIter->aSeg[i];
    if( pSeg->pSeg==0 ){
      /* no-op */
3656
3657
3658
3659
3660
3661
3662
3663

3664
3665
3666
3667
3668
3669
3670
3673
3674
3675
3676
3677
3678
3679

3680
3681
3682
3683
3684
3685
3686
3687







-
+







  Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
  int iLvl,                       /* Level to read input from */
  int *pnRem                      /* Write up to this many output leaves */
){
  Fts5Structure *pStruct = *ppStruct;
  Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  Fts5StructureLevel *pLvlOut;
  Fts5IndexIter *pIter = 0;       /* Iterator to read input data */
  Fts5Iter *pIter = 0;       /* Iterator to read input data */
  int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
  int nInput;                     /* Number of input segments */
  Fts5SegWriter writer;           /* Writer object */
  Fts5StructureSegment *pSeg;     /* Output segment */
  Fts5Buffer term;
  int bOldest;                    /* True if the output segment is the oldest */
  int eDetail = p->pConfig->eDetail;
4338
4339
4340
4341
4342
4343
4344
4345

4346
4347
4348
4349
4350
4351
4352
4355
4356
4357
4358
4359
4360
4361

4362
4363
4364
4365
4366
4367
4368
4369







-
+







  }
  return p - (*pa);
}

static int fts5AppendRowid(
  Fts5Index *p,
  i64 iDelta,
  Fts5IndexIter *pMulti,
  Fts5Iter *pMulti,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){
  fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
  return 0;
}

4363
4364
4365
4366
4367
4368
4369
4370

4371
4372
4373
4374
4375
4376
4377
4380
4381
4382
4383
4384
4385
4386

4387
4388
4389
4390
4391
4392
4393
4394







-
+







** even iDelta).
**
** If an error occurs, an error code is left in p->rc. 
*/
static int fts5AppendPoslist(
  Fts5Index *p,
  i64 iDelta,
  Fts5IndexIter *pMulti,
  Fts5Iter *pMulti,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    assert( pSeg->nPos>0 );
4641
4642
4643
4644
4645
4646
4647
4648

4649
4650
4651
4652
4653
4654
4655

4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671

4672
4673
4674
4675
4676
4677
4678
4658
4659
4660
4661
4662
4663
4664

4665
4666
4667
4668
4669
4670
4671

4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687

4688
4689
4690
4691
4692
4693
4694
4695







-
+






-
+















-
+








static void fts5SetupPrefixIter(
  Fts5Index *p,                   /* Index to read from */
  int bDesc,                      /* True for "ORDER BY rowid DESC" */
  const u8 *pToken,               /* Buffer containing prefix to match */
  int nToken,                     /* Size of buffer pToken in bytes */
  Fts5Colset *pColset,            /* Restrict matches to these columns */
  Fts5IndexIter **ppIter          /* OUT: New iterator */
  Fts5Iter **ppIter          /* OUT: New iterator */
){
  Fts5Structure *pStruct;
  Fts5Buffer *aBuf;
  const int nBuf = 32;

  void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
  int (*xAppend)(Fts5Index*, i64, Fts5IndexIter*, Fts5Colset*, Fts5Buffer*);
  int (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Colset*, Fts5Buffer*);
  if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
    xMerge = fts5MergeRowidLists;
    xAppend = fts5AppendRowid;
  }else{
    xMerge = fts5MergePrefixLists;
    xAppend = fts5AppendPoslist;
  }

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);

  if( aBuf && pStruct ){
    const int flags = FTS5INDEX_QUERY_SCAN;
    int i;
    i64 iLastRowid = 0;
    Fts5IndexIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
    Fts5Buffer doclist;
    int bNewTerm = 1;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
4928
4929
4930
4931
4932
4933
4934













































































































































































4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947

4948
4949
4950
4951
4952
4953
4954

4955
4956
4957
4958






4959
4960
4961
4962

4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975

4976
4977
4978
4979
4980
4981
4982

4983
4984
4985
4986
4987






4988
4989

4990
4991
4992

4993

4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004


5005
5006
5007
5008
5009
5010


5011
5012
5013
5014
5015
5016
5017
5018
5019


5020
5021
5022
5023
5024
5025
5026
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136

5137

5138
5139
5140
5141
5142
5143
5144
5145
5146


5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191

5192
5193
5194
5195
5196

5197
5198
5199
5200
5201
5202
5203
5204
5205
5206


5207
5208
5209
5210
5211
5212
5213

5214
5215
5216
5217
5218
5219
5220
5221
5222
5223

5224
5225
5226
5227
5228
5229
5230
5231
5232







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+












-
+
-






+


-
-
+
+
+
+
+
+




+













+







+





+
+
+
+
+
+

-
+



+
-
+









-
-
+
+





-
+
+








-
+
+







      );
    }
  }

  return rc;
}


static int fts5IndexExtractColset (
  Fts5Colset *pColset,            /* Colset to filter on */
  const u8 *pPos, int nPos,       /* Position list */
  Fts5Buffer *pBuf                /* Output buffer */
){
  int rc = SQLITE_OK;
  int i;

  fts5BufferZero(pBuf);
  for(i=0; i<pColset->nCol; i++){
    const u8 *pSub = pPos;
    int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
    if( nSub ){
      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
    }
  }
  return rc;
}

/*
** xSetOutputs callback used by detail=none tables.
*/
static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
  assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
  pIter->base.iRowid = pSeg->iRowid;
  pIter->base.nData = pSeg->nPos;
}

/*
** xSetOutputs callback used by detail=full and detail=col tables when no
** column filters are specified.
*/
static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
  pIter->base.iRowid = pSeg->iRowid;
  pIter->base.nData = pSeg->nPos;

  assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
  assert( pIter->pColset==0 || pIter->bFiltered );

  if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
    /* All data is stored on the current page. Populate the output 
    ** variables to point into the body of the page object. */
    pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    /* The data is distributed over two or more pages. Copy it into the
    ** Fts5Iter.poslist buffer and then set the output pointer to point
    ** to this buffer.  */
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
  }
}

/*
** xSetOutputs callback used by detail=col when there is a column filter
** and there are 100 or more columns. Also called as a fallback from
** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
*/
static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
  fts5BufferZero(&pIter->poslist);
  fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
  pIter->base.iRowid = pSeg->iRowid;
  pIter->base.pData = pIter->poslist.p;
  pIter->base.nData = pIter->poslist.n;
}

/*
** xSetOutputs callback used when: 
**
**   * detail=col,
**   * there is a column filter, and
**   * the table contains 100 or fewer columns. 
**
** The last point is to ensure all column numbers are stored as 
** single-byte varints.
*/
static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){

  assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
  assert( pIter->pColset );

  if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
    fts5IterSetOutputs_Col(pIter, pSeg);
  }else{
    u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
    u8 *pEnd = (u8*)&a[pSeg->nPos]; 
    int iPrev = 0;
    int *aiCol = pIter->pColset->aiCol;
    int *aiColEnd = &aiCol[pIter->pColset->nCol];

    u8 *aOut = pIter->poslist.p;
    int iPrevOut = 0;

    pIter->base.iRowid = pSeg->iRowid;

    while( a<pEnd ){
      iPrev += (int)a++[0] - 2;
      while( *aiCol<iPrev ){
        aiCol++;
        if( aiCol==aiColEnd ) goto setoutputs_col_out;
      }
      if( *aiCol==iPrev ){
        *aOut++ = (iPrev - iPrevOut) + 2;
        iPrevOut = iPrev;
      }
    }

setoutputs_col_out:
    pIter->base.pData = pIter->poslist.p;
    pIter->base.nData = aOut - pIter->poslist.p;
  }
}

/*
** xSetOutputs callback used by detail=full when there is a column filter.
*/
static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
  Fts5Colset *pColset = pIter->pColset;
  pIter->base.iRowid = pSeg->iRowid;

  assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
  assert( pColset );

  if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
    /* All data is stored on the current page. Populate the output 
    ** variables to point into the body of the page object. */
    const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    if( pColset->nCol==1 ){
      pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
      pIter->base.pData = a;
    }else{
      fts5BufferZero(&pIter->poslist);
      fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
      pIter->base.pData = pIter->poslist.p;
      pIter->base.nData = pIter->poslist.n;
    }
  }else{
    /* The data is distributed over two or more pages. Copy it into the
    ** Fts5Iter.poslist buffer and then set the output pointer to point
    ** to this buffer.  */
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
    pIter->base.nData = pIter->poslist.n;
  }
}

static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
  Fts5Config *pConfig = pIter->pIndex->pConfig;
  if( pConfig->eDetail==FTS5_DETAIL_NONE ){
    pIter->xSetOutputs = fts5IterSetOutputs_None;
  }

  else if( pIter->pColset==0 || pIter->bFiltered ){
    pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
  }

  else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
    pIter->xSetOutputs = fts5IterSetOutputs_Full;
  }

  else{
    assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
    if( pConfig->nCol<=100 ){
      pIter->xSetOutputs = fts5IterSetOutputs_Col100;
      sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
    }else{
      pIter->xSetOutputs = fts5IterSetOutputs_Col;
    }
  }
}

/*
** Open a new iterator to iterate though all rowid that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
  Fts5Colset *pColset,            /* Match these columns only */
  Fts5IndexIter **ppIter          /* OUT: New iterator object */
){
  Fts5Config *pConfig = p->pConfig;
  Fts5IndexIter *pRet = 0;
  Fts5Iter *pRet = 0;
  int iIdx = 0;
  Fts5Buffer buf = {0, 0, 0};

  /* If the QUERY_SCAN flag is set, all other flags must be clear. */
  assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );

  if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
    int iIdx = 0;                 /* Index to search */
    memcpy(&buf.p[1], pToken, nToken);

#ifdef SQLITE_DEBUG
    /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
    /* Figure out which index to search and set iIdx accordingly. If this
    ** is a prefix query for which there is no prefix index, set iIdx to
    ** greater than pConfig->nPrefix to indicate that the query will be
    ** satisfied by scanning multiple terms in the main index.
    **
    ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
    ** prefix-query. Instead of using a prefix-index (if one exists), 
    ** evaluate the prefix query using the main FTS index. This is used
    ** for internal sanity checking by the integrity-check in debug 
    ** mode only.  */
#ifdef SQLITE_DEBUG
    if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
      assert( flags & FTS5INDEX_QUERY_PREFIX );
      iIdx = 1+pConfig->nPrefix;
    }else
#endif
    if( flags & FTS5INDEX_QUERY_PREFIX ){
      int nChar = fts5IndexCharlen(pToken, nToken);
      for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
        if( pConfig->aPrefix[iIdx-1]==nChar ) break;
      }
    }

    if( iIdx<=pConfig->nPrefix ){
      /* Straight index lookup */
      Fts5Structure *pStruct = fts5StructureRead(p);
      buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
      if( pStruct ){
        fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet);
        fts5StructureRelease(pStruct);
      }
    }else{
      /* Scan multiple terms in the main index */
      int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
    }

    if( p->rc==SQLITE_OK ){
      Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
      pRet->pColset = pColset;
      fts5IterSetOutputCb(&p->rc, pRet);
      if( p->rc==SQLITE_OK && pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
    }
    if( p->rc ){
      sqlite3Fts5IterClose(pRet);
      sqlite3Fts5IterClose(&pRet->base);
      pRet = 0;
      fts5CloseReader(p);
    }

    *ppIter = pRet;
    *ppIter = &pRet->base;
    sqlite3Fts5BufferFree(&buf);
  }
  return fts5IndexReturn(p);
}

/*
** Return true if the iterator passed as the only argument is at EOF.
*/
int sqlite3Fts5IterEof(Fts5IndexIter *pIter){
  assert( pIter->pIndex->rc==SQLITE_OK );
  return pIter->bEof;
  assert( ((Fts5Iter*)pIter)->pIndex->rc==SQLITE_OK );
  return ((Fts5Iter*)pIter)->bEof;
}

/*
** Move to the next matching rowid. 
*/
int sqlite3Fts5IterNext(Fts5IndexIter *pIter){
int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  assert( pIter->pIndex->rc==SQLITE_OK );
  fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
  return fts5IndexReturn(pIter->pIndex);
}

/*
** Move to the next matching term/rowid. Used by the fts5vocab module.
*/
int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  Fts5Index *p = pIter->pIndex;

  assert( pIter->pIndex->rc==SQLITE_OK );

  fts5MultiIterNext(p, pIter, 0, 0);
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5035
5036
5037
5038
5039
5040
5041
5042


5043
5044
5045
5046
5047
5048
5049
5050
5051


5052
5053
5054
5055
5056
5057

5058
5059

5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164



5165
5166
5167
5168
5169
5170
5171
5241
5242
5243
5244
5245
5246
5247

5248
5249
5250
5251
5252
5253
5254
5255
5256


5257
5258
5259
5260
5261
5262
5263

5264
5265

5266
5267
5268
5269
5270
































































































5271
5272
5273


5274
5275
5276
5277
5278
5279
5280
5281
5282
5283







-
+
+







-
-
+
+





-
+

-
+




-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-



-
-
+
+
+







}

/*
** Move to the next matching rowid that occurs at or after iMatch. The
** definition of "at or after" depends on whether this iterator iterates
** in ascending or descending rowid order.
*/
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
  return fts5IndexReturn(pIter->pIndex);
}

/*
** Return the current rowid.
*/
i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
  return fts5MultiIterRowid(pIter);
i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIndexIter){
  return fts5MultiIterRowid((Fts5Iter*)pIndexIter);
}

/*
** Return the current term.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
  int n;
  const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
  const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
  *pn = n-1;
  return &z[1];
}


static int fts5IndexExtractColset (
  Fts5Colset *pColset,            /* Colset to filter on */
  const u8 *pPos, int nPos,       /* Position list */
  Fts5Buffer *pBuf                /* Output buffer */
){
  int rc = SQLITE_OK;
  int i;

  fts5BufferZero(pBuf);
  for(i=0; i<pColset->nCol; i++){
    const u8 *pSub = pPos;
    int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
    if( nSub ){
      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
    }
  }
  return rc;
}


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(
  Fts5IndexIter *pIter, 
  Fts5Colset *pColset,            /* Column filter (or NULL) */
  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  int eDetail = pIter->pIndex->pConfig->eDetail;

  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  if( eDetail==FTS5_DETAIL_NONE ){
    *pn = pSeg->nPos;
  }else
  if( eDetail==FTS5_DETAIL_FULL 
   && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf 
  ){
    u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    if( pColset==0 || pIter->bFiltered ){
      *pn = pSeg->nPos;
      *pp = pPos;
    }else if( pColset->nCol==1 ){
      *pp = pPos;
      *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
    }else{
      fts5BufferZero(&pIter->poslist);
      fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
      *pp = pIter->poslist.p;
      *pn = pIter->poslist.n;
    }
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
    if( eDetail==FTS5_DETAIL_FULL ){
      *pp = pIter->poslist.p;
    }
    *pn = pIter->poslist.n;
  }
  return fts5IndexReturn(pIter->pIndex);
}

int sqlite3Fts5IterCollist(
  Fts5IndexIter *pIter, 
  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn                         /* OUT: Size of position-list in bytes */
){
  assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
  *pp = pIter->poslist.p;
  *pn = pIter->poslist.n;
  return SQLITE_OK;
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5SegiterPoslist(p, pSeg, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
  if( pIndexIter ){
    Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
    Fts5Index *pIndex = pIter->pIndex;
    fts5MultiIterFree(pIter->pIndex, pIter);
    fts5CloseReader(pIndex);
  }
}

/*
5324
5325
5326
5327
5328
5329
5330
5331

5332
5333

5334
5335
5336


5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350








5351
5352
5353
5354
5355

5356
5357
5358

5359
5360
5361
5362
5363
5364
5365
5366
5436
5437
5438
5439
5440
5441
5442

5443


5444
5445


5446
5447
5448
5449
5450
5451










5452
5453
5454
5455
5456
5457
5458
5459

5460
5461
5462

5463
5464
5465

5466

5467
5468
5469
5470
5471
5472
5473







-
+
-
-
+

-
-
+
+




-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-



-
+


-
+
-







  const char *z,                  /* Index key to query for */
  int n,                          /* Size of index key in bytes */
  int flags,                      /* Flags for Fts5IndexQuery */
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  int eDetail = p->pConfig->eDetail;
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  Fts5IndexIter *pIter = 0;
  Fts5Buffer buf = {0, 0, 0};
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
    i64 rowid = sqlite3Fts5IterRowid(pIter);

    if( eDetail==FTS5_DETAIL_NONE ){
      cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(pIdxIter, &buf);
      if( rc==SQLITE_OK ){
        Fts5PoslistReader sReader;
        for(sqlite3Fts5PoslistReaderInit(buf.p, buf.n, &sReader);
            sReader.bEof==0;
            sqlite3Fts5PoslistReaderNext(&sReader)
        ){
          int iCol = FTS5_POS2COLUMN(sReader.iPos);
          int iOff = FTS5_POS2OFFSET(sReader.iPos);
          cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
        }
      }
    }
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5IterNext(pIdxIter);
      rc = sqlite3Fts5IterNext(pIter);
    }
  }
  sqlite3Fts5IterClose(pIdxIter);
  sqlite3Fts5IterClose(pIter);
  fts5BufferFree(&buf);

  *pCksum = cksum;
  return rc;
}


/*
5657
5658
5659
5660
5661
5662
5663
5664

5665
5666
5667
5668
5669
5670
5671
5764
5765
5766
5767
5768
5769
5770

5771
5772
5773
5774
5775
5776
5777
5778







-
+







** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  int eDetail = p->pConfig->eDetail;
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
  Fts5IndexIter *pIter;           /* Used to iterate through entire index */
  Fts5Iter *pIter;                /* Used to iterate through entire index */
  Fts5Structure *pStruct;         /* Index structure */

#ifdef SQLITE_DEBUG
  /* Used by extra internal tests only run if NDEBUG is not defined */
  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
#endif

Changes to ext/fts5/fts5_main.c.

534
535
536
537
538
539
540
541

542
543
544
545
546
547
548
534
535
536
537
538
539
540

541
542
543
544
545
546
547
548







-
+







  aColMap[1] = pConfig->nCol;
  aColMap[2] = pConfig->nCol+1;

  /* Set idxFlags flags for all WHERE clause terms that will be used. */
  for(i=0; i<pInfo->nConstraint; i++){
    struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
    int j;
    for(j=0; j<(int)ArraySize(aConstraint); j++){
    for(j=0; j<ArraySize(aConstraint); j++){
      struct Constraint *pC = &aConstraint[j];
      if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){
        if( p->usable ){
          pC->iConsIndex = i;
          idxFlags |= pC->fts5op;
        }else if( j==0 ){
          /* As there exists an unusable MATCH constraint this is an 
581
582
583
584
585
586
587
588

589
590
591
592
593
594
595
581
582
583
584
585
586
587

588
589
590
591
592
593
594
595







-
+







    pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0;
  }else{
    pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0;
  }

  /* Assign argvIndex values to each constraint in use. */
  iNext = 1;
  for(i=0; i<(int)ArraySize(aConstraint); i++){
  for(i=0; i<ArraySize(aConstraint); i++){
    struct Constraint *pC = &aConstraint[i];
    if( pC->iConsIndex>=0 ){
      pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++;
      pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit;
    }
  }

Changes to ext/fts5/fts5_storage.c.

334
335
336
337
338
339
340
341

342
343
344
345
346
347
348
334
335
336
337
338
339
340

341
342
343
344
345
346
347
348







-
+







*/
int sqlite3Fts5StorageClose(Fts5Storage *p){
  int rc = SQLITE_OK;
  if( p ){
    int i;

    /* Finalize all SQL statements */
    for(i=0; i<(int)ArraySize(p->aStmt); i++){
    for(i=0; i<ArraySize(p->aStmt); i++){
      sqlite3_finalize(p->aStmt[i]);
    }

    sqlite3_free(p);
  }
  return rc;
}

Changes to ext/fts5/fts5_tokenize.c.

1216
1217
1218
1219
1220
1221
1222
1223

1224
1225
1226
1227
1228
1229
1230
1216
1217
1218
1219
1220
1221
1222

1223
1224
1225
1226
1227
1228
1229
1230







-
+







    { "ascii",     {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
    { "porter",    {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
  };
  
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){
  for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
    rc = pApi->xCreateTokenizer(pApi,
        aBuiltin[i].zName,
        (void*)pApi,
        &aBuiltin[i].x,
        0
    );
  }

Changes to ext/fts5/fts5_vocab.c.

180
181
182
183
184
185
186
187

188
189
190
191
192
193
194
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194







-
+







    const char *zType = bDb ? argv[5] : argv[4];
    int nDb = (int)strlen(zDb)+1; 
    int nTab = (int)strlen(zTab)+1;
    int eType = 0;
    
    rc = fts5VocabTableType(zType, pzErr, &eType);
    if( rc==SQLITE_OK ){
      assert( eType>=0 && eType<sizeof(azSchema)/sizeof(azSchema[0]) );
      assert( eType>=0 && eType<ArraySize(azSchema) );
      rc = sqlite3_declare_vtab(db, azSchema[eType]);
    }

    nByte = sizeof(Fts5VocabTable) + nDb + nTab;
    pRet = sqlite3Fts5MallocZero(&rc, nByte);
    if( pRet ){
      pRet->pGlobal = (Fts5Global*)pAux;
403
404
405
406
407
408
409
410
411
412
413
414


415
416

417

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

















436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456








457
458
459
460
461
462
463
464
465
403
404
405
406
407
408
409

410
411
412
413
414
415
416
417
418

419


















420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436

437
438
439
440
441
442
443
444
445











446
447
448
449
450
451
452
453


454
455
456
457
458
459
460







-




+
+


+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-









-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-







      sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
      memset(pCsr->aCnt, 0, nCol * sizeof(i64));
      memset(pCsr->aDoc, 0, nCol * sizeof(i64));
      pCsr->iCol = 0;

      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){
        i64 dummy;
        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        pPos = pCsr->pIter->pData;
        nPos = pCsr->pIter->nData;
        switch( pCsr->pConfig->eDetail ){
          case FTS5_DETAIL_FULL:
            pPos = pCsr->pIter->pData;
            rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy);
            nPos = pCsr->pIter->nData;
            if( rc==SQLITE_OK ){
              if( pTab->eType==FTS5_VOCAB_ROW ){
                while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                  pCsr->aCnt[0]++;
                }
                pCsr->aDoc[0]++;
              }else{
                int iCol = -1;
                while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                  int ii = FTS5_POS2COLUMN(iPos);
                  pCsr->aCnt[ii]++;
                  if( iCol!=ii ){
                    if( ii>=nCol ){
                      rc = FTS5_CORRUPT;
                      break;
                    }
                    pCsr->aDoc[ii]++;
                    iCol = ii;
            if( pTab->eType==FTS5_VOCAB_ROW ){
              while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                pCsr->aCnt[0]++;
              }
              pCsr->aDoc[0]++;
            }else{
              int iCol = -1;
              while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
                int ii = FTS5_POS2COLUMN(iPos);
                pCsr->aCnt[ii]++;
                if( iCol!=ii ){
                  if( ii>=nCol ){
                    rc = FTS5_CORRUPT;
                    break;
                  }
                  pCsr->aDoc[ii]++;
                  iCol = ii;
                  }
                }
              }
            }
            break;

          case FTS5_DETAIL_COLUMNS:
            if( pTab->eType==FTS5_VOCAB_ROW ){
              pCsr->aDoc[0]++;
            }else{
              Fts5Buffer buf = {0, 0, 0};
              rc = sqlite3Fts5IterPoslistBuffer(pCsr->pIter, &buf);
              if( rc==SQLITE_OK ){
                while( 0==sqlite3Fts5PoslistNext64(buf.p, buf.n, &iOff,&iPos) ){
                  assert_nc( iPos>=0 && iPos<nCol );
                  if( iPos>=nCol ){
                    rc = FTS5_CORRUPT;
                    break;
                  }
                  pCsr->aDoc[iPos]++;
                }
              while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
                assert_nc( iPos>=0 && iPos<nCol );
                if( iPos>=nCol ){
                  rc = FTS5_CORRUPT;
                  break;
                }
                pCsr->aDoc[iPos]++;
              }
              }
              sqlite3Fts5BufferFree(&buf);
            }
            break;

          default: 
            assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE );
            pCsr->aDoc[0]++;
            break;

Changes to ext/fts5/test/fts5_common.tcl.

44
45
46
47
48
49
50
51


52
53
54
55
56
57
58
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59







-
+
+








  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
    $cmd xPhraseForeach $i c o {
      lappend res $i.$c.$o
    }
  }

  set res
  #set res
  sort_poslist $res
}

proc fts5_test_collist {cmd} {
  set res [list]

  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
    $cmd xPhraseColumnForeach $i c { lappend res $i.$c }

Changes to ext/fts5/test/fts5ac.test.

154
155
156
157
158
159
160
161
162

163
164
165
166
167
168
169
154
155
156
157
158
159
160

161
162
163
164
165
166
167
168
169







-

+







  do_execsql_test 1.$tn2.integrity {
    INSERT INTO xx(xx) VALUES('integrity-check');
  }

  #-------------------------------------------------------------------------
  #
  foreach {tn expr} {
    1.2 "a   OR b"
    1.1 "a   AND b"
    1.2 "a   OR b"
    1.3 "o"
    1.4 "b q"
    1.5 "e a e"
    1.6 "m d g q q b k b w f q q p p"
    1.7 "l o o l v v k"
    1.8 "a"
    1.9 "b"
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
245
246
247
248
249
250
251

252
253
254
255
256
257
258







-







    }

    set res [fts5_query_data $expr xx]
    do_execsql_test 1.$tn2.$tn.[llength $res].asc {
      SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) 
      FROM xx WHERE xx match $expr
    } $res


    set res [fts5_query_data $expr xx DESC]
    do_execsql_test 1.$tn2.$tn.[llength $res].desc {
      SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) 
      FROM xx WHERE xx match $expr ORDER BY 1 DESC
    } $res
  }

Added ext/fts5/test/fts5simple3.test.













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# 2015 September 05
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5simple3

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

fts5_aux_test_functions db

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, detail=col);
  INSERT INTO t1 VALUES('a', 'b', 'c');
  INSERT INTO t1 VALUES('x', 'x', 'x');
}

do_execsql_test 1.1 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('a:a');
} {1 0.0}

do_execsql_test 1.2 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('b:x');
} {2 0.1}

do_execsql_test 1.3 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('b:a');
} {}


finish_test

Changes to ext/fts5/test/fts5synonym2.test.

23
24
25
26
27
28
29















30
31
32
33
34
35
36
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








foreach tok {query document} {
foreach_detail_mode $testprefix {

fts5_tclnum_register db
fts5_aux_test_functions db

proc fts5_test_bothlist {cmd} {

  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
    set bFirst 1
    $cmd xPhraseColumnForeach $i c { 
      lappend CL $i.$c 
      if {$bFirst} { $cmd xPhraseForeach $i c o { lappend PL $i.$c.$o } }
      set bFirst 0
    }
  }

  list [sort_poslist $PL] $CL
}
sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist

proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] }
sqlite3_fts5_create_function db fts5_rowid fts5_rowid

do_execsql_test 1.$tok.0.1 "
  CREATE VIRTUAL TABLE ss USING fts5(a, b, 
       tokenize='tclnum $tok', detail=%DETAIL%);
  INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()');
85
86
87
88
89
90
91


92
93
94
95
96
97
98
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115







+
+







  INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii');
  INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4');
  INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii');
  INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one');
}

foreach {tn expr} {
  2.1 "one OR two OR three OR four"

  1.1 "one"   1.2 "two"   1.3 "three"   1.4 "four"
  1.5 "v"     1.6 "vi"    1.7 "vii"     1.8 "viii"
  1.9 "9"    1.10 "0"    1.11 "1"      1.12 "2"

  2.1 "one OR two OR three OR four"
  2.2 "(one AND two) OR (three AND four)"
  2.3 "(one AND two) OR (three AND four) NOT five"
109
110
111
112
113
114
115
116

117
118
119
120




121
122














123
124
125
126
127
128
129
126
127
128
129
130
131
132

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164







-
+




+
+
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+







  if {[fts5_expr_ok $expr ss]==0} {
    do_test 1.$tok.$tn.OMITTED { list } [list]
    continue
  }

  set res [fts5_query_data $expr ss ASC ::tclnum_syn]
  do_execsql_test 1.$tok.$tn.[llength $res].asc.1 {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
    SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr)
  } $res

  do_execsql_test 1.$tok.$tn.[llength $res].asc.2 {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
  } $res

  do_execsql_test 1.$tok.$tn.[llength $res].asc.2 {
    SELECT rowid, fts5_test_poslist2(ss), fts5_test_collist(ss) FROM ss($expr)
    ORDER BY rank ASC
  } $res

  set res2 [list]
  foreach {a b c} $res { lappend res2 $a $c $b }
  do_execsql_test 1.$tok.$tn.[llength $res].asc.3 {
    SELECT rowid, fts5_test_collist(ss), fts5_test_poslist2(ss) FROM ss($expr)
  } $res2

  set res3 [list]
  foreach {a b c} $res { lappend res3 $a [list $b $c] }
  do_execsql_test 1.$tok.$tn.[llength $res].asc.3 {
    SELECT rowid, fts5_test_bothlist(ss) FROM ss($expr)
  } $res3


}

}
}

finish_test

Changes to ext/fts5/tool/fts5speed.tcl.

1
2
3
4
5
6
7
8
9
10
11
12
13


14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22













+
+









set Q {
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"}
  {25  "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"}
  {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}

  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}

  {2   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
}

proc usage {} {
  global Q
  puts stderr "Usage: $::argv0 DATABASE QUERY"
  puts stderr ""
  for {set i 1} {$i <= [llength $Q]} {incr i} {

Changes to ext/fts5/tool/fts5txt2db.tcl.

1
2



3
4
5
6
7
8
9
10























































































































11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51


52
53

54
55
56
57



58
59
60
61
62
63
64

65
66
67
68
69

70
71
72

73
74
75

76
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91








92
93
94
95
96
97


98
99
100
101
102
103

104
105

106
107
108
109
110
111
112


113
114
115
116
117
118
119
120
1
2
3
4
5








6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138


139

























140
141


142




143
144
145







146
147
148
149
150

151
152
153

154
155
156

157
158

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185


186
187
188
189
190
191
192

193
194

195
196
197
198
199
200


201
202
203
204
205
206
207
208
209
210


+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
+
-
-
-
-
+
+
+
-
-
-
-
-
-
-
+




-
+


-
+


-
+

-
+














+
+
+
+
+
+
+
+




-
-
+
+





-
+

-
+





-
-
+
+










#-------------------------------------------------------------------------
# Command line options processor.
#
proc usage {} {
  puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..."
  puts stderr ""
  puts stderr "Options are"
  puts stderr "  -fts5"
  puts stderr "  -fts4"
  puts stderr "  -colsize <list of column sizes>"
  puts stderr {
proc command_line_error {O E {msg ""}} {
  if {$msg != ""} {
    puts stderr "Error: $msg"
    puts stderr ""
  }

  set L [list]
  foreach o $O {
    if {[llength $o]==1} {
      lappend L [string toupper $o]
    }
  }

  puts stderr "Usage: $::argv0 ?SWITCHES? $L"
  puts stderr ""
  puts stderr "Switches are:"
  foreach o $O {
    if {[llength $o]==3} {
      foreach {a b c} $o {}
      puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
    } elseif {[llength $o]==2} {
      foreach {a b} $o {}
      puts stderr [format "    -%-15s %s" $a $b]
    }
  }
  puts stderr ""
  puts stderr $E
  exit -1
}

proc process_command_line {avar lArgs O E} {

  upvar $avar A
  set zTrailing ""       ;# True if ... is present in $O
  set lPosargs [list]

  # Populate A() with default values. Also, for each switch in the command
  # line spec, set an entry in the idx() array as follows:
  #
  #  {tblname t1 "table name to use"}  
  #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
  #
  # For each position parameter, append its name to $lPosargs. If the ...
  # specifier is present, set $zTrailing to the name of the prefix.
  #
  foreach o $O {
    set nm [lindex $o 0]
    set nArg [llength $o]
    switch -- $nArg {
      1 {
        if {[string range $nm end-2 end]=="..."} {
          set zTrailing [string range $nm 0 end-3]
        } else {
          lappend lPosargs $nm
        }
      }
      2 {
        set A($nm) 0
        set idx(-$nm) $o
      }
      3 {
        set A($nm) [lindex $o 1]
        set idx(-$nm) $o
      }
      default {
        error "Error in command line specification"
      }
    }
  }

  # Set explicitly specified option values
  #
  set nArg [llength $lArgs]
  for {set i 0} {$i < $nArg} {incr i} {
    set opt [lindex $lArgs $i]
    if {[string range $opt 0 0]!="-" || $opt=="--"} break
    set c [array names idx "${opt}*"]
    if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"}
    if {[llength $c]>1}  { command_line_error $O $E "Ambiguous option: $opt"}

    if {[llength $idx($c)]==3} {
      if {$i==[llength $lArgs]-1} {
        command_line_error $O $E "Option requires argument: $c" 
      }
      incr i
      set A([lindex $idx($c) 0]) [lindex $lArgs $i]
    } else {
      set A([lindex $idx($c) 0]) 1
    }
  }

  # Deal with position arguments.
  #
  set nPosarg [llength $lPosargs]
  set nRem [expr $nArg - $i]
  if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
    command_line_error $O $E
  }
  for {set j 0} {$j < $nPosarg} {incr j} {
    set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
  }
  if {$zTrailing!=""} {
    set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
  }
}
# End of command line options processor.
#-------------------------------------------------------------------------


process_command_line A $argv {
  {fts5                 "use fts5"}
  {fts4                 "use fts4"}
  {colsize   "10 10 10" "list of column sizes"}
  {tblname   "t1"       "table name to create"}
  {detail    "full"     "Fts5 detail mode to use"}
  {repeat    1          "Load each file this many times"}
  database
  file...
} {
This script is designed to create fts4/5 tables with more than one column.
The -colsize option should be set to a Tcl list of integer values, one for
each column in the table. Each value is the number of tokens that will be
inserted into the column value for each row. For example, setting the -colsize
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
tokens per row in each, respectively.

Each "FILE" argument should be a text file. The contents of these text files is
split on whitespace characters to form a list of tokens. The first N1 tokens
are used for the first column of the first row, where N1 is the first element
of the -colsize list. The next N2 are used for the second column of the first
row, and so on. Rows are added to the table until the entire list of tokens
is exhausted.
}
  exit -1
}


set O(aColSize)       [list 10 10 10]
set O(tblname)        t1
set O(fts)            fts5


set options_with_values {-colsize}

for {set i 0} {$i < [llength $argv]} {incr i} {
  set opt [lindex $argv $i]
  if {[string range $opt 0 0]!="-"} break

  if {[lsearch $options_with_values $opt]>=0} {
    incr i
    if {$i==[llength $argv]} usage
    set val [lindex $argv $i]
  }

  switch -- $opt {
    -colsize {
      set O(aColSize) $val
    }

    -fts4 {
      set O(fts) fts4
if {$A(fts4)} {
  set A(fts) fts4
    }

} else {
    -fts5 {
      set O(fts) fts5
    }
  }
  set A(fts) fts5
}

}

if {$i > [llength $argv]-2} usage
set O(db) [lindex $argv $i]
set O(files) [lrange $argv [expr $i+1] end]

sqlite3 db $O(db)
sqlite3 db $A(database)

# Create the FTS table in the db. Return a list of the table columns.
#
proc create_table {} {
  global O
  global A
  set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]

  set nCol [llength $O(aColSize)]
  set nCol [llength $A(colsize)]
  set cols [lrange $cols 0 [expr $nCol-1]]

  set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) ("
  set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
  append sql [join $cols ,]
  append sql ");"
  if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" }

  db eval $sql
  return $cols
}

# Return a list of tokens from the named file.
#
proc readfile {file} {
  set fd [open $file]
  set data [read $fd]
  close $fd
  split $data
}

proc repeat {L n} {
  set res [list]
  for {set i 0} {$i < $n} {incr i} {
    set res [concat $res $L]
  }
  set res
}


# Load all the data into a big list of tokens.
#
set tokens [list]
foreach f $O(files) {
  set tokens [concat $tokens [readfile $f]]
foreach f $A(file) {
  set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]]
}

set N [llength $tokens]
set i 0
set cols [create_table]
set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]"
set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])"
foreach c [lrange $cols 1 end] {
  append sql ", \$A($c)"
  append sql ", \$R($c)"
}
append sql ")"

db eval BEGIN
  while {$i < $N} {
    foreach c $cols s $O(aColSize) {
      set A($c) [lrange $tokens $i [expr $i+$s-1]]
    foreach c $cols s $A(colsize) {
      set R($c) [lrange $tokens $i [expr $i+$s-1]]
      incr i $s
    }
    db eval $sql
  }
db eval COMMIT