/ Check-in [aa34bf666c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: aa34bf666c384cf32a8d8166ab6d9afbca26a256
User & Date: dan 2015-05-13 17:15:32
Context
2015-05-13
18:12
Merge latest trunk changes with this branch. check-in: b5f0e8c5b4 user: dan tags: fts5
17:15
Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. check-in: aa34bf666c user: dan tags: fts5
2015-05-09
18:28
Allow the fts5vocab table to optionally provide data on a per-column basis. check-in: 3922276135 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.c.

    13     13   ** This is an SQLite module implementing full-text search.
    14     14   */
    15     15   
    16     16   #if defined(SQLITE_ENABLE_FTS5)
    17     17   
    18     18   #include "fts5Int.h"
    19     19   
           20  +/*
           21  +** This variable is set to true when running corruption tests. Otherwise
           22  +** false. If it is false, extra assert() conditions in the fts5 code are
           23  +** activated - conditions that are only true if it is guaranteed that the
           24  +** fts5 database is not corrupt.
           25  +*/
           26  +int sqlite3_fts5_may_be_corrupt = 0;
           27  +
    20     28   
    21     29   typedef struct Fts5Table Fts5Table;
    22     30   typedef struct Fts5Cursor Fts5Cursor;
    23     31   typedef struct Fts5Global Fts5Global;
    24     32   typedef struct Fts5Auxiliary Fts5Auxiliary;
    25     33   typedef struct Fts5Auxdata Fts5Auxdata;
    26     34   

Changes to ext/fts5/fts5Int.h.

    40     40   #endif
    41     41   
    42     42   /*
    43     43   ** The assert_nc() macro is similar to the assert() macro, except that it
    44     44   ** is used for assert() conditions that are true only if it can be 
    45     45   ** guranteed that the database is not corrupt.
    46     46   */
    47         -#ifdef SQLITE_TEST
           47  +#ifdef SQLITE_DEBUG
    48     48   extern int sqlite3_fts5_may_be_corrupt;
    49     49   # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
    50     50   #else
    51     51   # define assert_nc(x) assert(x)
    52     52   #endif
    53     53   
    54     54   typedef struct Fts5Global Fts5Global;
................................................................................
   111    111     char *zRankArgs;                /* Arguments to rank function */
   112    112   
   113    113     /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
   114    114     char **pzErrmsg;
   115    115   };
   116    116   
   117    117   /* Current expected value of %_config table 'version' field */
   118         -#define FTS5_CURRENT_VERSION 1
          118  +#define FTS5_CURRENT_VERSION 2
   119    119   
   120    120   #define FTS5_CONTENT_NORMAL   0
   121    121   #define FTS5_CONTENT_NONE     1
   122    122   #define FTS5_CONTENT_EXTERNAL 2
   123    123   
   124    124   
   125    125   

Changes to ext/fts5/fts5_index.c.

   198    198   **
   199    199   **     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
   200    200   **       is an associated index-by-rowid record.
   201    201   **     * the number of zero-term leaves as a varint.
   202    202   **
   203    203   ** 5. Segment doclist indexes:
   204    204   **
   205         -**   A list of varints. If the first termless page contains at least one
   206         -**   docid, the list begins with that docid as a varint followed by the
   207         -**   value 1 (0x01). Or, if the first termless page contains no docids,
   208         -**   a varint containing the last docid stored on the term page followed
   209         -**   by a 0 (0x00) value.
          205  +**   Doclist indexes are themselves b-trees, however they usually consist of
          206  +**   a single leaf record only. The format of each doclist index leaf page 
          207  +**   is:
   210    208   **
   211         -**   For each subsequent page in the doclist, either a 0x00 byte if the
   212         -**   page contains no terms, or a delta-encoded docid (always +ve) 
   213         -**   representing the first docid on the page otherwise.
          209  +**     * Flags byte. Bits are:
          210  +**         0x01: Clear if leaf is also the root page, otherwise set.
          211  +**
          212  +**     * Page number of fts index leaf page. As a varint.
          213  +**
          214  +**     * First docid on page indicated by previous field. As a varint.
          215  +**
          216  +**     * A list of varints, one for each subsequent termless page. A 
          217  +**       positive delta if the termless page contains at least one docid, 
          218  +**       or an 0x00 byte otherwise.
          219  +**
          220  +**   Internal doclist index nodes are:
          221  +**
          222  +**     * Flags byte. Bits are:
          223  +**         0x01: Clear for root page, otherwise set.
          224  +**
          225  +**     * Page number of first child page. As a varint.
          226  +**
          227  +**     * Copy of first docid on page indicated by previous field. As a varint.
          228  +**
          229  +**     * A list of delta-encoded varints - the first docid on each subsequent
          230  +**       child page. 
          231  +**
   214    232   */
   215    233   
   216    234   /*
   217    235   ** Rowids for the averages and structure records in the %_data table.
   218    236   */
   219    237   #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
   220    238   #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
................................................................................
   236    254   **
   237    255   ** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
   238    256   ** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
   239    257   ** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
   240    258   ** SQLITE_FULL and fails the current operation if they ever prove too small.
   241    259   */
   242    260   #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
          261  +#define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
   243    262   #define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
   244    263   #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
   245    264   
   246         -#define FTS5_SEGMENT_ROWID(segid, height, pgno) (                         \
   247         - ((i64)(segid)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
          265  +#define fts5_dri(segid, dlidx, height, pgno) (                                 \
          266  + ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
          267  + ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
   248    268    ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
   249    269    ((i64)(pgno))                                                                 \
   250    270   )
   251    271   
          272  +#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
          273  +#define FTS5_DLIDX_ROWID(segid, height, pgno)   fts5_dri(segid, 1, height, pgno)
          274  +
          275  +#if 0
   252    276   /*
   253    277   ** The height of segment b-trees is actually limited to one less than 
   254    278   ** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
   255    279   ** with such a height is used by doclist indexes.
   256    280   */
   257    281   #define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)
          282  +#endif
   258    283   
   259    284   /*
   260    285   ** Maximum segments permitted in a single index 
   261    286   */
   262    287   #define FTS5_MAX_SEGMENT 2000
   263    288   
          289  +#if 0
   264    290   /*
   265    291   ** The rowid for the doclist index associated with leaf page pgno of segment
   266    292   ** segid in index idx.
   267    293   */
   268         -#define FTS5_DOCLIST_IDX_ROWID(segid, pgno) \
          294  +#define FTS5_DOCLIST_IDX_ROWID(segid, height, pgno) \
   269    295           FTS5_SEGMENT_ROWID(segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
          296  +#endif
   270    297   
   271    298   #ifdef SQLITE_DEBUG
   272    299   int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
   273    300   #endif
   274    301   
   275    302   
   276    303   /*
................................................................................
   281    308   #define FTS5_DATA_ZERO_PADDING 8
   282    309   
   283    310   typedef struct Fts5BtreeIter Fts5BtreeIter;
   284    311   typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
   285    312   typedef struct Fts5ChunkIter Fts5ChunkIter;
   286    313   typedef struct Fts5Data Fts5Data;
   287    314   typedef struct Fts5DlidxIter Fts5DlidxIter;
          315  +typedef struct Fts5DlidxLvl Fts5DlidxLvl;
          316  +typedef struct Fts5DlidxWriter Fts5DlidxWriter;
   288    317   typedef struct Fts5MultiSegIter Fts5MultiSegIter;
   289    318   typedef struct Fts5NodeIter Fts5NodeIter;
   290    319   typedef struct Fts5PageWriter Fts5PageWriter;
   291    320   typedef struct Fts5PosIter Fts5PosIter;
   292    321   typedef struct Fts5SegIter Fts5SegIter;
   293    322   typedef struct Fts5DoclistIter Fts5DoclistIter;
   294    323   typedef struct Fts5SegWriter Fts5SegWriter;
................................................................................
   380    409   ** An object of type Fts5SegWriter is used to write to segments.
   381    410   */
   382    411   struct Fts5PageWriter {
   383    412     int pgno;                       /* Page number for this page */
   384    413     Fts5Buffer buf;                 /* Buffer containing page data */
   385    414     Fts5Buffer term;                /* Buffer containing previous term on page */
   386    415   };
          416  +struct Fts5DlidxWriter {
          417  +  int pgno;                       /* Page number for this page */
          418  +  int bPrevValid;                 /* True if iPrev is valid */
          419  +  i64 iPrev;                      /* Previous docid value written to page */
          420  +  Fts5Buffer buf;                 /* Buffer containing page data */
          421  +};
   387    422   struct Fts5SegWriter {
   388    423     int iSegid;                     /* Segid to write to */
   389    424     int nWriter;                    /* Number of entries in aWriter */
   390    425     Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
   391    426     i64 iPrevRowid;                 /* Previous docid written to current leaf */
   392    427     u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
   393    428     u8 bFirstRowidInPage;           /* True if next rowid is first in page */
   394    429     u8 bFirstTermInPage;            /* True if next term will be first in leaf */
   395    430     int nLeafWritten;               /* Number of leaf pages written */
   396    431     int nEmpty;                     /* Number of contiguous term-less nodes */
   397         -  Fts5Buffer cdlidx;               /* Doclist index */
   398         -  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
   399         -  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
          432  +
          433  +  int nDlidx;                     /* Allocated size of aDlidx[] array */
          434  +  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
   400    435   };
   401    436   
   402    437   /*
   403    438   ** Object for iterating through the merged results of one or more segments,
   404    439   ** visiting each term/docid pair in the merged data.
   405    440   **
   406    441   ** nSeg is always a power of two greater than or equal to the number of
................................................................................
   557    592   **
   558    593   ** bEof:
   559    594   **   Set to true once iterator has reached EOF.
   560    595   **
   561    596   ** iOff:
   562    597   **   Set to the current offset within record pData.
   563    598   */
   564         -struct Fts5DlidxIter {
   565         -  Fts5Data *pData;              /* Data for doclist index, if any */
   566         -  int iOff;                     /* Current offset into pDlidx */
          599  +struct Fts5DlidxLvl {
          600  +  Fts5Data *pData;              /* Data for current page of this level */
          601  +  int iOff;                     /* Current offset into pData */
   567    602     int bEof;                     /* At EOF already */
   568         -  int iFirstOff;                /* Used by reverse iterators only */
          603  +  int iFirstOff;                /* Used by reverse iterators */
   569    604   
   570    605     /* Output variables */
   571    606     int iLeafPgno;                /* Page number of current leaf page */
   572    607     i64 iRowid;                   /* First rowid on leaf iLeafPgno */
   573    608   };
          609  +struct Fts5DlidxIter {
          610  +  int nLvl;
          611  +  int iSegid;
          612  +  Fts5DlidxLvl aLvl[1];
          613  +};
          614  +
   574    615   
   575    616   
   576    617   /*
   577    618   ** An Fts5BtreeIter object is used to iterate through all entries in the
   578    619   ** b-tree hierarchy belonging to a single fts5 segment. In this case the
   579    620   ** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
   580    621   ** b-tree hierarchy consists of the following:
................................................................................
  1404   1445   ** Free any memory allocated by the iterator object.
  1405   1446   */
  1406   1447   static void fts5NodeIterFree(Fts5NodeIter *pIter){
  1407   1448     fts5BufferFree(&pIter->term);
  1408   1449   }
  1409   1450   
  1410   1451   /*
  1411         -** The iterator passed as the first argument has the following fields set
  1412         -** as follows. This function sets up the rest of the iterator so that it
  1413         -** points to the first rowid in the doclist-index.
  1414         -**
  1415         -**   pData: pointer to doclist-index record, 
  1416         -**   iLeafPgno: page number that this doclist-index is associated with.
  1417         -**
  1418         -** When this function is called pIter->iLeafPgno is the page number the
  1419         -** doclist is associated with (the one featuring the term).
  1420         -*/
  1421         -static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  1422         -  Fts5Data *pData = pIter->pData;
  1423         -  int i;
  1424         -  int bPresent;
  1425         -
  1426         -  assert( pIter->pData );
  1427         -  assert( pIter->iLeafPgno>0 );
  1428         -
  1429         -  /* Read the first rowid value. And the "present" flag that follows it. */
  1430         -  pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid);
  1431         -  bPresent = pData->p[pIter->iOff++];
  1432         -  if( bPresent ){
  1433         -    i = 0;
  1434         -  }else{
  1435         -    /* Count the number of leading 0x00 bytes. */
  1436         -    for(i=1; pIter->iOff<pData->n; i++){ 
  1437         -      if( pData->p[pIter->iOff] ) break;
  1438         -      pIter->iOff++;
  1439         -    }
  1440         -
  1441         -    /* Unless we are already at the end of the doclist-index, load the first
  1442         -    ** rowid value.  */
  1443         -    if( pIter->iOff<pData->n ){
         1452  +** Advance the iterator passed as the only argument. If the end of the 
         1453  +** doclist-index page is reached, return non-zero.
         1454  +*/
         1455  +static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
         1456  +  Fts5Data *pData = pLvl->pData;
         1457  +
         1458  +  if( pLvl->iOff==0 ){
         1459  +    assert( pLvl->bEof==0 );
         1460  +    pLvl->iOff = 1;
         1461  +    pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
         1462  +    pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
         1463  +    pLvl->iFirstOff = pLvl->iOff;
         1464  +  }else{
         1465  +    int iOff;
         1466  +    for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
         1467  +      if( pData->p[iOff] ) break; 
         1468  +    }
         1469  +
         1470  +    if( iOff<pData->n ){
  1444   1471         i64 iVal;
  1445         -      pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal);
  1446         -      pIter->iRowid += iVal;
         1472  +      pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
         1473  +      iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
         1474  +      pLvl->iRowid += iVal;
         1475  +      pLvl->iOff = iOff;
  1447   1476       }else{
  1448         -      pIter->bEof = 1;
         1477  +      pLvl->bEof = 1;
  1449   1478       }
  1450   1479     }
  1451         -  pIter->iLeafPgno += (i+1);
  1452   1480   
  1453         -  pIter->iFirstOff = pIter->iOff;
  1454         -  return pIter->bEof;
         1481  +  return pLvl->bEof;
  1455   1482   }
  1456   1483   
  1457   1484   /*
  1458   1485   ** Advance the iterator passed as the only argument.
  1459   1486   */
  1460         -static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
  1461         -  Fts5Data *pData = pIter->pData;
  1462         -  int iOff;
  1463         -
  1464         -  for(iOff=pIter->iOff; iOff<pData->n; iOff++){
  1465         -    if( pData->p[iOff] ) break; 
  1466         -  }
  1467         -
  1468         -  if( iOff<pData->n ){
  1469         -    i64 iVal;
  1470         -    pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
  1471         -    iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
  1472         -    pIter->iRowid += iVal;
  1473         -    pIter->iOff = iOff;
  1474         -  }else{
  1475         -    pIter->bEof = 1;
  1476         -  }
  1477         -
  1478         -  return pIter->bEof;
  1479         -}
         1487  +static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
         1488  +  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
         1489  +
         1490  +  assert( iLvl<pIter->nLvl );
         1491  +  if( fts5DlidxLvlNext(pLvl) ){
         1492  +    if( (iLvl+1) < pIter->nLvl ){
         1493  +      fts5DlidxIterNextR(p, pIter, iLvl+1);
         1494  +      if( pLvl[1].bEof==0 ){
         1495  +        fts5DataRelease(pLvl->pData);
         1496  +        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
         1497  +        pLvl->pData = fts5DataRead(p, 
         1498  +            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
         1499  +        );
         1500  +        if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
         1501  +      }
         1502  +    }
         1503  +  }
         1504  +
         1505  +  return pIter->aLvl[0].bEof;
         1506  +}
         1507  +static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
         1508  +  return fts5DlidxIterNextR(p, pIter, 0);
         1509  +}
         1510  +
         1511  +/*
         1512  +** The iterator passed as the first argument has the following fields set
         1513  +** as follows. This function sets up the rest of the iterator so that it
         1514  +** points to the first rowid in the doclist-index.
         1515  +**
         1516  +**   pData:
         1517  +**     pointer to doclist-index record, 
         1518  +**
         1519  +** When this function is called pIter->iLeafPgno is the page number the
         1520  +** doclist is associated with (the one featuring the term).
         1521  +*/
         1522  +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
         1523  +  int i;
         1524  +  for(i=0; i<pIter->nLvl; i++){
         1525  +    fts5DlidxLvlNext(&pIter->aLvl[i]);
         1526  +  }
         1527  +  return pIter->aLvl[0].bEof;
         1528  +}
         1529  +
  1480   1530   
  1481   1531   static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  1482         -  return pIter->bEof;
  1483         -}
  1484         -
  1485         -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){
  1486         -  if( fts5DlidxIterFirst(pIter)==0 ){
  1487         -    while( 0==fts5DlidxIterNext(pIter) );
  1488         -    pIter->bEof = 0;
  1489         -  }
  1490         -}
  1491         -
  1492         -static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
  1493         -  int iOff = pIter->iOff;
  1494         -
  1495         -  assert( pIter->bEof==0 );
  1496         -  if( iOff<=pIter->iFirstOff ){
  1497         -    pIter->bEof = 1;
  1498         -  }else{
  1499         -    u8 *a = pIter->pData->p;
         1532  +  return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
         1533  +}
         1534  +
         1535  +static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
         1536  +  int i;
         1537  +
         1538  +  /* Advance each level to the last entry on the last page */
         1539  +  for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
         1540  +    Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
         1541  +    while( fts5DlidxLvlNext(pLvl)==0 );
         1542  +    pLvl->bEof = 0;
         1543  +
         1544  +    if( i>0 ){
         1545  +      Fts5DlidxLvl *pChild = &pLvl[-1];
         1546  +      fts5DataRelease(pChild->pData);
         1547  +      memset(pChild, 0, sizeof(Fts5DlidxLvl));
         1548  +      pChild->pData = fts5DataRead(p, 
         1549  +          FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
         1550  +      );
         1551  +    }
         1552  +  }
         1553  +}
         1554  +
         1555  +/*
         1556  +** Move the iterator passed as the only argument to the previous entry.
         1557  +*/
         1558  +static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
         1559  +  int iOff = pLvl->iOff;
         1560  +
         1561  +  assert( pLvl->bEof==0 );
         1562  +  if( iOff<=pLvl->iFirstOff ){
         1563  +    pLvl->bEof = 1;
         1564  +  }else{
         1565  +    u8 *a = pLvl->pData->p;
  1500   1566       i64 iVal;
  1501   1567       int iLimit;
         1568  +    int ii;
         1569  +    int nZero = 0;
  1502   1570   
  1503   1571       /* Currently iOff points to the first byte of a varint. This block 
  1504   1572       ** decrements iOff until it points to the first byte of the previous 
  1505   1573       ** varint. Taking care not to read any memory locations that occur
  1506   1574       ** before the buffer in memory.  */
  1507   1575       iLimit = (iOff>9 ? iOff-9 : 0);
  1508   1576       for(iOff--; iOff>iLimit; iOff--){
  1509   1577         if( (a[iOff-1] & 0x80)==0 ) break;
  1510   1578       }
  1511   1579   
  1512   1580       getVarint(&a[iOff], (u64*)&iVal);
  1513         -    pIter->iRowid -= iVal;
  1514         -    pIter->iLeafPgno--;
  1515         -
  1516         -    /* Skip backwards passed any 0x00 bytes. */
  1517         -    while( iOff>pIter->iFirstOff 
  1518         -        && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 
  1519         -    ){
  1520         -      iOff--;
  1521         -      pIter->iLeafPgno--;
  1522         -    }
  1523         -    pIter->iOff = iOff;
  1524         -  }
  1525         -
  1526         -  return pIter->bEof;
         1581  +    pLvl->iRowid -= iVal;
         1582  +    pLvl->iLeafPgno--;
         1583  +
         1584  +    /* Skip backwards past any 0x00 varints. */
         1585  +    for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
         1586  +      nZero++;
         1587  +    }
         1588  +    if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
         1589  +      /* The byte immediately before the last 0x00 byte has the 0x80 bit
         1590  +      ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
         1591  +      ** bytes before a[ii]. */
         1592  +      int bZero = 0;              /* True if last 0x00 counts */
         1593  +      if( (ii-8)>=pLvl->iFirstOff ){
         1594  +        int j;
         1595  +        for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
         1596  +        bZero = (j>8);
         1597  +      }
         1598  +      if( bZero==0 ) nZero--;
         1599  +    }
         1600  +    pLvl->iLeafPgno -= nZero;
         1601  +    pLvl->iOff = iOff - nZero;
         1602  +  }
         1603  +
         1604  +  return pLvl->bEof;
         1605  +}
         1606  +
         1607  +static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
         1608  +  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
         1609  +
         1610  +  assert( iLvl<pIter->nLvl );
         1611  +  if( fts5DlidxLvlPrev(pLvl) ){
         1612  +    if( (iLvl+1) < pIter->nLvl ){
         1613  +      fts5DlidxIterPrevR(p, pIter, iLvl+1);
         1614  +      if( pLvl[1].bEof==0 ){
         1615  +        fts5DataRelease(pLvl->pData);
         1616  +        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
         1617  +        pLvl->pData = fts5DataRead(p, 
         1618  +            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
         1619  +        );
         1620  +        if( pLvl->pData ){
         1621  +          while( fts5DlidxLvlNext(pLvl)==0 );
         1622  +          pLvl->bEof = 0;
         1623  +        }
         1624  +      }
         1625  +    }
         1626  +  }
         1627  +
         1628  +  return pIter->aLvl[0].bEof;
         1629  +}
         1630  +static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
         1631  +  return fts5DlidxIterPrevR(p, pIter, 0);
         1632  +}
         1633  +
         1634  +/*
         1635  +** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
         1636  +*/
         1637  +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
         1638  +  if( pIter ){
         1639  +    int i;
         1640  +    for(i=0; i<pIter->nLvl; i++){
         1641  +      fts5DataRelease(pIter->aLvl[i].pData);
         1642  +    }
         1643  +    sqlite3_free(pIter);
         1644  +  }
  1527   1645   }
  1528   1646   
  1529   1647   static Fts5DlidxIter *fts5DlidxIterInit(
  1530   1648     Fts5Index *p,                   /* Fts5 Backend to iterate within */
  1531   1649     int bRev,                       /* True for ORDER BY ASC */
  1532   1650     int iSegid,                     /* Segment id */
  1533   1651     int iLeafPg                     /* Leaf page number to load dlidx for */
  1534   1652   ){
  1535         -  Fts5DlidxIter *pIter;
         1653  +  Fts5DlidxIter *pIter = 0;
         1654  +  int i;
         1655  +  int bDone = 0;
  1536   1656   
  1537         -  pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
  1538         -  if( pIter==0 ) return 0;
         1657  +  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
         1658  +    int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
         1659  +    Fts5DlidxIter *pNew;
  1539   1660   
  1540         -  pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg));
  1541         -  if( pIter->pData==0 ){
  1542         -    sqlite3_free(pIter);
  1543         -    pIter = 0;
  1544         -  }else{
  1545         -    pIter->iLeafPgno = iLeafPg;
         1661  +    pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
         1662  +    if( pNew==0 ){
         1663  +      p->rc = SQLITE_NOMEM;
         1664  +    }else{
         1665  +      i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
         1666  +      Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
         1667  +      pIter = pNew;
         1668  +      memset(pLvl, 0, sizeof(Fts5DlidxLvl));
         1669  +      pLvl->pData = fts5DataRead(p, iRowid);
         1670  +      if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
         1671  +        bDone = 1;
         1672  +      }
         1673  +      pIter->nLvl = i+1;
         1674  +    }
         1675  +  }
         1676  +
         1677  +  if( p->rc==SQLITE_OK ){
         1678  +    pIter->iSegid = iSegid;
  1546   1679       if( bRev==0 ){
  1547   1680         fts5DlidxIterFirst(pIter);
  1548   1681       }else{
  1549         -      fts5DlidxIterLast(pIter);
         1682  +      fts5DlidxIterLast(p, pIter);
  1550   1683       }
  1551   1684     }
         1685  +
         1686  +  if( p->rc!=SQLITE_OK ){
         1687  +    fts5DlidxIterFree(pIter);
         1688  +    pIter = 0;
         1689  +  }
  1552   1690   
  1553   1691     return pIter;
  1554   1692   }
  1555   1693   
  1556         -/*
  1557         -** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
  1558         -*/
  1559         -static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  1560         -  if( pIter ){
  1561         -    fts5DataRelease(pIter->pData);
  1562         -    sqlite3_free(pIter);
  1563         -  }
         1694  +static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
         1695  +  return pIter->aLvl[0].iRowid;
         1696  +}
         1697  +static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
         1698  +  return pIter->aLvl[0].iLeafPgno;
  1564   1699   }
  1565   1700   
  1566   1701   static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){
  1567   1702     *piRowid = (int)fts5GetU16(&pLeaf->p[0]);
  1568   1703     *piTerm = (int)fts5GetU16(&pLeaf->p[2]);
  1569   1704   }
  1570   1705   
................................................................................
  1936   2071     int pgnoLast = 0;
  1937   2072   
  1938   2073     if( pDlidx ){
  1939   2074       /* If the doclist-iterator is already at EOF, then the current doclist
  1940   2075       ** contains no entries except those on the current page. */
  1941   2076       if( fts5DlidxIterEof(p, pDlidx)==0 ){
  1942   2077         int iSegid = pIter->pSeg->iSegid;
  1943         -      pgnoLast = pDlidx->iLeafPgno;
         2078  +      pgnoLast = fts5DlidxIterPgno(pDlidx);
  1944   2079         pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
  1945   2080       }else{
  1946   2081         pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
  1947   2082       }
  1948   2083     }else{
  1949   2084       int iOff;                               /* Byte offset within pLeaf */
  1950   2085       Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
................................................................................
  2342   2477   
  2343   2478     pRes->iFirst = iRes;
  2344   2479     return 0;
  2345   2480   }
  2346   2481   
  2347   2482   /*
  2348   2483   ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
  2349         -** It is an error if leaf iLeafPgno contains no rowid.
         2484  +** It is an error if leaf iLeafPgno does not exist or contains no rowids.
  2350   2485   */
  2351   2486   static void fts5SegIterGotoPage(
  2352   2487     Fts5Index *p,                   /* FTS5 backend object */
  2353   2488     Fts5SegIter *pIter,             /* Iterator to advance */
  2354   2489     int iLeafPgno
  2355   2490   ){
  2356   2491     assert( iLeafPgno>pIter->iLeafPgno );
  2357         -  pIter->iLeafPgno = iLeafPgno-1;
  2358         -  fts5SegIterNextPage(p, pIter);
  2359         -  assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
         2492  +  if( iLeafPgno>pIter->pSeg->pgnoLast ){
         2493  +    p->rc = FTS5_CORRUPT;
         2494  +  }else{
         2495  +    pIter->iLeafPgno = iLeafPgno-1;
         2496  +    fts5SegIterNextPage(p, pIter);
         2497  +    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
  2360   2498   
  2361         -  if( p->rc==SQLITE_OK ){
  2362         -    int iOff;
  2363         -    u8 *a = pIter->pLeaf->p;
  2364         -    int n = pIter->pLeaf->n;
         2499  +    if( p->rc==SQLITE_OK ){
         2500  +      int iOff;
         2501  +      u8 *a = pIter->pLeaf->p;
         2502  +      int n = pIter->pLeaf->n;
  2365   2503   
  2366         -    iOff = fts5GetU16(&a[0]);
  2367         -    if( iOff<4 || iOff>=n ){
  2368         -      p->rc = FTS5_CORRUPT;
  2369         -    }else{
  2370         -      iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid);
  2371         -      pIter->iLeafOffset = iOff;
  2372         -      fts5SegIterLoadNPos(p, pIter);
         2504  +      iOff = fts5GetU16(&a[0]);
         2505  +      if( iOff<4 || iOff>=n ){
         2506  +        p->rc = FTS5_CORRUPT;
         2507  +      }else{
         2508  +        iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid);
         2509  +        pIter->iLeafOffset = iOff;
         2510  +        fts5SegIterLoadNPos(p, pIter);
         2511  +      }
  2373   2512       }
  2374   2513     }
  2375   2514   }
  2376   2515   
  2377   2516   /*
  2378   2517   ** Advance the iterator passed as the second argument until it is at or 
  2379   2518   ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
................................................................................
  2390   2529     int bMove = 1;
  2391   2530   
  2392   2531     assert( pIter->flags & FTS5_SEGITER_ONETERM );
  2393   2532     assert( pIter->pDlidx );
  2394   2533     assert( pIter->pLeaf );
  2395   2534   
  2396   2535     if( bRev==0 ){
  2397         -    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
  2398         -      iLeafPgno = pDlidx->iLeafPgno;
  2399         -      fts5DlidxIterNext(pDlidx);
         2536  +    while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
         2537  +      iLeafPgno = fts5DlidxIterPgno(pDlidx);
         2538  +      fts5DlidxIterNext(p, pDlidx);
  2400   2539       }
  2401         -    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
         2540  +    assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
  2402   2541       if( iLeafPgno>pIter->iLeafPgno ){
  2403   2542         fts5SegIterGotoPage(p, pIter, iLeafPgno);
  2404   2543         bMove = 0;
  2405   2544       }
  2406   2545     }else{
  2407   2546       assert( iMatch<pIter->iRowid );
  2408         -    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
  2409         -      fts5DlidxIterPrev(pDlidx);
         2547  +    while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
         2548  +      fts5DlidxIterPrev(p, pDlidx);
  2410   2549       }
  2411         -    iLeafPgno = pDlidx->iLeafPgno;
         2550  +    iLeafPgno = fts5DlidxIterPgno(pDlidx);
  2412   2551   
  2413   2552       assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
  2414   2553   
  2415   2554       if( iLeafPgno<pIter->iLeafPgno ){
  2416   2555         pIter->iLeafPgno = iLeafPgno+1;
  2417   2556         fts5SegIterReverseNewPage(p, pIter);
  2418   2557         bMove = 0;
................................................................................
  2799   2938     int i;
  2800   2939     assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
  2801   2940     for(i=0; i<nOld; i++){
  2802   2941       if( pOld[i]!=pNew[i] ) break;
  2803   2942     }
  2804   2943     return i;
  2805   2944   }
         2945  +
         2946  +static void fts5WriteDlidxClear(
         2947  +  Fts5Index *p, 
         2948  +  Fts5SegWriter *pWriter,
         2949  +  int bFlush                      /* If true, write dlidx to disk */
         2950  +){
         2951  +  int i;
         2952  +  assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
         2953  +  for(i=0; i<pWriter->nDlidx; i++){
         2954  +    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
         2955  +    if( pDlidx->buf.n==0 ) break;
         2956  +    if( bFlush ){
         2957  +      assert( pDlidx->pgno!=0 );
         2958  +      fts5DataWrite(p, 
         2959  +          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
         2960  +          pDlidx->buf.p, pDlidx->buf.n
         2961  +      );
         2962  +    }
         2963  +    sqlite3Fts5BufferZero(&pDlidx->buf);
         2964  +    pDlidx->bPrevValid = 0;
         2965  +  }
         2966  +}
         2967  +
         2968  +/*
         2969  +** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
         2970  +** Any new array elements are zeroed before returning.
         2971  +*/
         2972  +static int fts5WriteDlidxGrow(
         2973  +  Fts5Index *p,
         2974  +  Fts5SegWriter *pWriter,
         2975  +  int nLvl
         2976  +){
         2977  +  if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
         2978  +    Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
         2979  +        pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
         2980  +    );
         2981  +    if( aDlidx==0 ){
         2982  +      p->rc = SQLITE_NOMEM;
         2983  +    }else{
         2984  +      int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
         2985  +      memset(&aDlidx[pWriter->nDlidx], 0, nByte);
         2986  +      pWriter->aDlidx = aDlidx;
         2987  +      pWriter->nDlidx = nLvl;
         2988  +    }
         2989  +  }
         2990  +  return p->rc;
         2991  +}
  2806   2992   
  2807   2993   /*
  2808   2994   ** If an "nEmpty" record must be written to the b-tree before the next
  2809   2995   ** term, write it now. 
  2810   2996   */
  2811   2997   static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  2812   2998     if( pWriter->nEmpty ){
  2813   2999       int bFlag = 0;
  2814   3000       Fts5PageWriter *pPg;
  2815   3001       pPg = &pWriter->aWriter[1];
  2816         -    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
  2817         -      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
  2818         -          pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
  2819         -      );
  2820         -      assert( pWriter->cdlidx.n>0 );
  2821         -      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
         3002  +
         3003  +    /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
         3004  +    ** to the database, also write the doclist-index to disk.  */
         3005  +    if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
  2822   3006         bFlag = 1;
  2823   3007       }
         3008  +    fts5WriteDlidxClear(p, pWriter, bFlag);
  2824   3009       fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
  2825   3010       fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
  2826   3011       pWriter->nEmpty = 0;
         3012  +  }else{
         3013  +    fts5WriteDlidxClear(p, pWriter, 0);
  2827   3014     }
  2828   3015   
  2829         -  /* Whether or not it was written to disk, zero the doclist index at this
  2830         -  ** point */
  2831         -  sqlite3Fts5BufferZero(&pWriter->cdlidx);
  2832         -  pWriter->bDlidxPrevValid = 0;
         3016  +  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 );
         3017  +  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 );
  2833   3018   }
  2834   3019   
  2835   3020   static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  2836   3021     if( p->rc==SQLITE_OK ){
  2837   3022       Fts5PageWriter *aNew;
  2838   3023       Fts5PageWriter *pNew;
  2839   3024       int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
................................................................................
  2896   3081         fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre);
  2897   3082         fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  2898   3083         break;
  2899   3084       }
  2900   3085     }
  2901   3086   }
  2902   3087   
         3088  +/*
         3089  +** This function is called when flushing a leaf page that contains no
         3090  +** terms at all to disk.
         3091  +*/
  2903   3092   static void fts5WriteBtreeNoTerm(
  2904   3093     Fts5Index *p,                   /* FTS5 backend object */
  2905   3094     Fts5SegWriter *pWriter          /* Writer object */
  2906   3095   ){
  2907         -  if( pWriter->bFirstRowidInPage ){
  2908         -    /* No rowids on this page. Append an 0x00 byte to the current 
  2909         -    ** doclist-index */
  2910         -    if( pWriter->bDlidxPrevValid==0 ){
  2911         -      i64 iRowid = pWriter->iPrevRowid;
  2912         -      sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
  2913         -      pWriter->bDlidxPrevValid = 1;
  2914         -      pWriter->iDlidxPrev = iRowid;
  2915         -    }
  2916         -    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0);
         3096  +  /* If there were no rowids on the leaf page either and the doclist-index
         3097  +  ** has already been started, append an 0x00 byte to it.  */
         3098  +  if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
         3099  +    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
         3100  +    assert( pDlidx->bPrevValid );
         3101  +    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  2917   3102     }
         3103  +
         3104  +  /* Increment the "number of sequential leaves without a term" counter. */
  2918   3105     pWriter->nEmpty++;
  2919   3106   }
         3107  +
         3108  +static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
         3109  +  i64 iRowid;
         3110  +  int iOff;
         3111  +
         3112  +  iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid);
         3113  +  getVarint(&pBuf->p[iOff], (u64*)&iRowid);
         3114  +  return iRowid;
         3115  +}
  2920   3116   
  2921   3117   /*
  2922         -** Rowid iRowid has just been appended to the current leaf page. As it is
  2923         -** the first on its page, append an entry to the current doclist-index.
         3118  +** Rowid iRowid has just been appended to the current leaf page. It is the
         3119  +** first on the page. This function appends an appropriate entry to the current
         3120  +** doclist-index.
  2924   3121   */
  2925   3122   static void fts5WriteDlidxAppend(
  2926   3123     Fts5Index *p, 
  2927   3124     Fts5SegWriter *pWriter, 
  2928   3125     i64 iRowid
  2929   3126   ){
  2930         -  i64 iVal;
  2931         -  if( pWriter->bDlidxPrevValid ){
  2932         -    iVal = iRowid - pWriter->iDlidxPrev;
  2933         -  }else{
  2934         -    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
  2935         -    iVal = 1;
  2936         -  }
  2937         -  sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal);
  2938         -  pWriter->bDlidxPrevValid = 1;
  2939         -  pWriter->iDlidxPrev = iRowid;
         3127  +  int i;
         3128  +  int bDone = 0;
         3129  +
         3130  +  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
         3131  +    i64 iVal;
         3132  +    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
         3133  +
         3134  +    if( pDlidx->buf.n>=p->pConfig->pgsz ){
         3135  +      /* The current doclist-index page is full. Write it to disk and push
         3136  +      ** a copy of iRowid (which will become the first rowid on the next
         3137  +      ** doclist-index leaf page) up into the next level of the b-tree 
         3138  +      ** hierarchy. If the node being flushed is currently the root node,
         3139  +      ** also push its first rowid upwards. */
         3140  +      pDlidx->buf.p[0] = 0x01;    /* Not the root node */
         3141  +      fts5DataWrite(p, 
         3142  +          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
         3143  +          pDlidx->buf.p, pDlidx->buf.n
         3144  +      );
         3145  +      fts5WriteDlidxGrow(p, pWriter, i+2);
         3146  +      pDlidx = &pWriter->aDlidx[i];
         3147  +      if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
         3148  +        i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
         3149  +
         3150  +        /* This was the root node. Push its first rowid up to the new root. */
         3151  +        pDlidx[1].pgno = pDlidx->pgno;
         3152  +        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
         3153  +        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
         3154  +        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
         3155  +        pDlidx[1].bPrevValid = 1;
         3156  +        pDlidx[1].iPrev = iFirst;
         3157  +      }
         3158  +
         3159  +      sqlite3Fts5BufferZero(&pDlidx->buf);
         3160  +      pDlidx->bPrevValid = 0;
         3161  +      pDlidx->pgno++;
         3162  +    }else{
         3163  +      bDone = 1;
         3164  +    }
         3165  +
         3166  +    if( pDlidx->bPrevValid ){
         3167  +      iVal = iRowid - pDlidx->iPrev;
         3168  +    }else{
         3169  +      i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno);
         3170  +      assert( pDlidx->buf.n==0 );
         3171  +      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
         3172  +      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
         3173  +      iVal = iRowid;
         3174  +    }
         3175  +
         3176  +    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
         3177  +    pDlidx->bPrevValid = 1;
         3178  +    pDlidx->iPrev = iRowid;
         3179  +  }
  2940   3180   }
  2941   3181   
  2942   3182   static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  2943   3183     static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  2944   3184     Fts5PageWriter *pPage = &pWriter->aWriter[0];
  2945   3185     i64 iRowid;
  2946   3186   
................................................................................
  3029   3269   
  3030   3270     /* Update the Fts5PageWriter.term field. */
  3031   3271     fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  3032   3272     pWriter->bFirstTermInPage = 0;
  3033   3273   
  3034   3274     pWriter->bFirstRowidInPage = 0;
  3035   3275     pWriter->bFirstRowidInDoclist = 1;
         3276  +
         3277  +  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
         3278  +  pWriter->aDlidx[0].pgno = pPage->pgno;
  3036   3279   
  3037   3280     /* If the current leaf page is full, flush it to disk. */
  3038   3281     if( pPage->buf.n>=p->pConfig->pgsz ){
  3039   3282       fts5WriteFlushLeaf(p, pWriter);
  3040   3283     }
  3041   3284   }
  3042   3285   
................................................................................
  3167   3410     }
  3168   3411     for(i=0; i<pWriter->nWriter; i++){
  3169   3412       Fts5PageWriter *pPg = &pWriter->aWriter[i];
  3170   3413       fts5BufferFree(&pPg->term);
  3171   3414       fts5BufferFree(&pPg->buf);
  3172   3415     }
  3173   3416     sqlite3_free(pWriter->aWriter);
  3174         -  sqlite3Fts5BufferFree(&pWriter->cdlidx);
         3417  +
         3418  +  for(i=0; i<pWriter->nDlidx; i++){
         3419  +    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
         3420  +  }
         3421  +  sqlite3_free(pWriter->aDlidx);
  3175   3422   }
  3176   3423   
  3177   3424   static void fts5WriteInit(
  3178   3425     Fts5Index *p, 
  3179   3426     Fts5SegWriter *pWriter, 
  3180   3427     int iSegid
  3181   3428   ){
  3182   3429     memset(pWriter, 0, sizeof(Fts5SegWriter));
  3183   3430     pWriter->iSegid = iSegid;
  3184   3431   
  3185         -  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter));
  3186         -  if( pWriter->aWriter==0 ) return;
         3432  +  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter));
         3433  +  pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));
         3434  +  if( pWriter->aDlidx==0 ) return;
  3187   3435     pWriter->nWriter = 1;
         3436  +  pWriter->nDlidx = 1;
  3188   3437     pWriter->aWriter[0].pgno = 1;
  3189   3438     pWriter->bFirstTermInPage = 1;
  3190   3439   }
  3191   3440   
  3192   3441   static void fts5WriteInitForAppend(
  3193   3442     Fts5Index *p,                   /* FTS5 backend object */
  3194   3443     Fts5SegWriter *pWriter,         /* Writer to initialize */
  3195   3444     Fts5StructureSegment *pSeg      /* Segment object to append to */
  3196   3445   ){
  3197   3446     int nByte = pSeg->nHeight * sizeof(Fts5PageWriter);
  3198   3447     memset(pWriter, 0, sizeof(Fts5SegWriter));
  3199   3448     pWriter->iSegid = pSeg->iSegid;
  3200   3449     pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte);
         3450  +  pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));
  3201   3451   
  3202   3452     if( p->rc==SQLITE_OK ){
  3203   3453       int pgno = 1;
  3204   3454       int i;
         3455  +    pWriter->nDlidx = 1;
  3205   3456       pWriter->nWriter = pSeg->nHeight;
  3206   3457       pWriter->aWriter[0].pgno = pSeg->pgnoLast+1;
  3207   3458       for(i=pSeg->nHeight-1; i>0; i--){
  3208   3459         i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno);
  3209   3460         Fts5PageWriter *pPg = &pWriter->aWriter[i];
  3210   3461         pPg->pgno = pgno;
  3211   3462         fts5DataBuffer(p, &pPg->buf, iRowid);
................................................................................
  3579   3830   
  3580   3831       /* Pre-allocate the buffer used to assemble leaf pages to the target
  3581   3832       ** page size.  */
  3582   3833       assert( pgsz>0 );
  3583   3834       pBuf = &writer.aWriter[0].buf;
  3584   3835       fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
  3585   3836   
  3586         -    /* Begin scanning through hash table entries. */
         3837  +    /* Begin scanning through hash table entries. This loop runs once for each
         3838  +    ** term/doclist currently stored within the hash table. */
  3587   3839       if( p->rc==SQLITE_OK ){
  3588   3840         memset(pBuf->p, 0, 4);
  3589   3841         pBuf->n = 4;
  3590   3842         p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
  3591   3843       }
  3592         -
  3593   3844       while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
  3594         -      const char *zTerm;
  3595         -      int nTerm;
  3596         -      const u8 *pDoclist;
  3597         -      int nDoclist;
         3845  +      const char *zTerm;          /* Buffer containing term */
         3846  +      int nTerm;                  /* Size of zTerm in bytes */
         3847  +      const u8 *pDoclist;         /* Pointer to doclist for this term */
         3848  +      int nDoclist;               /* Size of doclist in bytes */
  3598   3849         int nSuffix;                /* Size of term suffix */
  3599   3850   
  3600   3851         sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
  3601   3852         nTerm = strlen(zTerm);
  3602   3853   
  3603   3854         /* Decide if the term will fit on the current leaf. If it will not, 
  3604   3855         ** flush the leaf to disk here.  */
................................................................................
  3607   3858           pBuf = &writer.aWriter[0].buf;
  3608   3859           if( (nTerm + 32) > pBuf->nSpace ){
  3609   3860             fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
  3610   3861             if( p->rc ) break;
  3611   3862           }
  3612   3863         }
  3613   3864   
  3614         -      /* Write the term to the leaf. And push it up into the b-tree hierarchy */
         3865  +      /* Write the term to the leaf. And if it is the first on the leaf, and
         3866  +      ** the leaf is not page number 1, push it up into the b-tree hierarchy 
         3867  +      ** as well.  */
  3615   3868         if( writer.bFirstTermInPage==0 ){
  3616   3869           int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
  3617   3870           pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre);
  3618   3871           nSuffix = nTerm - nPre;
  3619   3872         }else{
  3620   3873           fts5PutU16(&pBuf->p[2], pBuf->n);
  3621   3874           writer.bFirstTermInPage = 0;
................................................................................
  3625   3878             pBuf = &writer.aWriter[0].buf;
  3626   3879             assert( nPre<nTerm );
  3627   3880           }
  3628   3881           nSuffix = nTerm;
  3629   3882         }
  3630   3883         pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix);
  3631   3884         fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);
         3885  +
         3886  +      /* We just wrote a term into page writer.aWriter[0].pgno. If a 
         3887  +      ** doclist-index is to be generated for this doclist, it will be
         3888  +      ** associated with this page. */
         3889  +      assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
         3890  +      writer.aDlidx[0].pgno = writer.aWriter[0].pgno;
  3632   3891   
  3633   3892         if( pgsz>=(pBuf->n + nDoclist + 1) ){
  3634   3893           /* The entire doclist will fit on the current leaf. */
  3635   3894           fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
  3636   3895         }else{
  3637   3896           i64 iRowid = 0;
  3638   3897           i64 iDelta = 0;
................................................................................
  3821   4080     int bSz,
  3822   4081     Fts5Buffer *pBuf
  3823   4082   ){
  3824   4083     if( p->rc==SQLITE_OK ){
  3825   4084       Fts5ChunkIter iter;
  3826   4085       Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
  3827   4086       assert( fts5MultiIterEof(p, pMulti)==0 );
  3828         -    static int nCall = 0;
  3829         -    nCall++;
  3830   4087   
  3831   4088       fts5ChunkIterInit(p, pSeg, &iter);
  3832   4089   
  3833   4090       if( fts5ChunkIterEof(p, &iter)==0 ){
  3834   4091         if( bSz ){
  3835   4092           /* WRITEPOSLISTSIZE */
  3836   4093           fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
................................................................................
  4412   4669   }
  4413   4670   
  4414   4671   /*
  4415   4672   ** Return the current term.
  4416   4673   */
  4417   4674   const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  4418   4675     int n;
  4419         -  const char *z = fts5MultiIterTerm(pIter->pMulti, &n);
         4676  +  const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n);
  4420   4677     *pn = n-1;
  4421   4678     return &z[1];
  4422   4679   }
  4423   4680   
  4424   4681   
  4425   4682   /*
  4426   4683   ** Return a pointer to a buffer containing a copy of the position list for
................................................................................
  4650   4907   #ifdef SQLITE_DEBUG
  4651   4908   static void fts5DlidxIterTestReverse(
  4652   4909     Fts5Index *p, 
  4653   4910     int iSegid,                     /* Segment id to load from */
  4654   4911     int iLeaf                       /* Load doclist-index for this leaf */
  4655   4912   ){
  4656   4913     Fts5DlidxIter *pDlidx = 0;
  4657         -  i64 cksum1 = 13;
  4658         -  i64 cksum2 = 13;
         4914  +  u64 cksum1 = 13;
         4915  +  u64 cksum2 = 13;
  4659   4916   
  4660   4917     for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
  4661   4918         fts5DlidxIterEof(p, pDlidx)==0;
  4662         -      fts5DlidxIterNext(pDlidx)
         4919  +      fts5DlidxIterNext(p, pDlidx)
  4663   4920     ){
  4664         -    assert( pDlidx->iLeafPgno>iLeaf );
  4665         -    cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
  4666         -    cksum1 = (cksum1 ^ pDlidx->iRowid);
         4921  +    i64 iRowid = fts5DlidxIterRowid(pDlidx);
         4922  +    int pgno = fts5DlidxIterPgno(pDlidx);
         4923  +    assert( pgno>iLeaf );
         4924  +    cksum1 += iRowid + ((i64)pgno<<32);
  4667   4925     }
  4668   4926     fts5DlidxIterFree(pDlidx);
  4669   4927     pDlidx = 0;
  4670   4928   
  4671   4929     for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
  4672   4930         fts5DlidxIterEof(p, pDlidx)==0;
  4673         -      fts5DlidxIterPrev(pDlidx)
         4931  +      fts5DlidxIterPrev(p, pDlidx)
  4674   4932     ){
  4675         -    assert( pDlidx->iLeafPgno>iLeaf );
  4676         -    cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
  4677         -    cksum2 = (cksum2 ^ pDlidx->iRowid);
         4933  +    i64 iRowid = fts5DlidxIterRowid(pDlidx);
         4934  +    int pgno = fts5DlidxIterPgno(pDlidx);
         4935  +
         4936  +    assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
         4937  +    cksum2 += iRowid + ((i64)pgno<<32);
  4678   4938     }
  4679   4939     fts5DlidxIterFree(pDlidx);
  4680   4940     pDlidx = 0;
  4681   4941   
  4682         -  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 
         4942  +  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
  4683   4943   }
  4684   4944   #else
  4685   4945   # define fts5DlidxIterTestReverse(x,y,z)
  4686   4946   #endif
  4687   4947   
  4688   4948   static void fts5IndexIntegrityCheckSegment(
  4689   4949     Fts5Index *p,                   /* FTS5 backend object */
................................................................................
  4744   5004         int iPrevLeaf = iter.iLeaf;
  4745   5005         int iSegid = pSeg->iSegid;
  4746   5006         int iPg;
  4747   5007         i64 iKey;
  4748   5008   
  4749   5009         for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf);
  4750   5010             fts5DlidxIterEof(p, pDlidx)==0;
  4751         -          fts5DlidxIterNext(pDlidx)
         5011  +          fts5DlidxIterNext(p, pDlidx)
  4752   5012         ){
  4753   5013   
  4754   5014           /* Check any rowid-less pages that occur before the current leaf. */
  4755         -        for(iPg=iPrevLeaf+1; iPg<pDlidx->iLeafPgno; iPg++){
         5015  +        for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
  4756   5016             iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
  4757   5017             pLeaf = fts5DataRead(p, iKey);
  4758   5018             if( pLeaf ){
  4759   5019               if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
  4760   5020               fts5DataRelease(pLeaf);
  4761   5021             }
  4762   5022           }
  4763         -        iPrevLeaf = pDlidx->iLeafPgno;
         5023  +        iPrevLeaf = fts5DlidxIterPgno(pDlidx);
  4764   5024   
  4765   5025           /* Check that the leaf page indicated by the iterator really does
  4766   5026           ** contain the rowid suggested by the same. */
  4767         -        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno);
         5027  +        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
  4768   5028           pLeaf = fts5DataRead(p, iKey);
  4769   5029           if( pLeaf ){
  4770   5030             i64 iRowid;
  4771   5031             int iRowidOff = fts5GetU16(&pLeaf->p[0]);
  4772   5032             getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
  4773         -          if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT;
         5033  +          if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
  4774   5034             fts5DataRelease(pLeaf);
  4775   5035           }
  4776         -
  4777   5036         }
  4778   5037   
  4779   5038         for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
  4780   5039           iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
  4781   5040           pLeaf = fts5DataRead(p, iKey);
  4782   5041           if( pLeaf ){
  4783   5042             if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
................................................................................
  4990   5249   /*
  4991   5250   ** Decode a segment-data rowid from the %_data table. This function is
  4992   5251   ** the opposite of macro FTS5_SEGMENT_ROWID().
  4993   5252   */
  4994   5253   static void fts5DecodeRowid(
  4995   5254     i64 iRowid,                     /* Rowid from %_data table */
  4996   5255     int *piSegid,                   /* OUT: Segment id */
         5256  +  int *pbDlidx,                   /* OUT: Dlidx flag */
  4997   5257     int *piHeight,                  /* OUT: Height */
  4998   5258     int *piPgno                     /* OUT: Page number */
  4999   5259   ){
  5000   5260     *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  5001   5261     iRowid >>= FTS5_DATA_PAGE_B;
  5002   5262   
  5003   5263     *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  5004   5264     iRowid >>= FTS5_DATA_HEIGHT_B;
         5265  +
         5266  +  *pbDlidx = (int)(iRowid & 0x0001);
         5267  +  iRowid >>= FTS5_DATA_DLI_B;
  5005   5268   
  5006   5269     *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
  5007   5270   }
  5008   5271   
  5009   5272   static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  5010         -  int iSegid, iHeight, iPgno;     /* Rowid compenents */
  5011         -  fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno);
         5273  +  int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
         5274  +  fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
  5012   5275   
  5013   5276     if( iSegid==0 ){
  5014   5277       if( iKey==FTS5_AVERAGES_ROWID ){
  5015   5278         sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
  5016   5279       }else{
  5017         -      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
  5018         -          "{structure idx=%d}", (int)(iKey-10)
  5019         -      );
         5280  +      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)");
  5020   5281       }
  5021   5282     }
  5022         -  else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
  5023         -    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)",
  5024         -        iSegid, iPgno
  5025         -    );
  5026         -  }else{
  5027         -    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)",
  5028         -        iSegid, iHeight, iPgno
         5283  +  else{
         5284  +    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)",
         5285  +        bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
  5029   5286       );
  5030   5287     }
  5031   5288   }
  5032   5289   
  5033   5290   static void fts5DebugStructure(
  5034   5291     int *pRc,                       /* IN/OUT: error code */
  5035   5292     Fts5Buffer *pBuf,
................................................................................
  5131   5388   */
  5132   5389   static void fts5DecodeFunction(
  5133   5390     sqlite3_context *pCtx,          /* Function call context */
  5134   5391     int nArg,                       /* Number of args (always 2) */
  5135   5392     sqlite3_value **apVal           /* Function arguments */
  5136   5393   ){
  5137   5394     i64 iRowid;                     /* Rowid for record being decoded */
  5138         -  int iSegid,iHeight,iPgno;       /* Rowid components */
         5395  +  int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
  5139   5396     const u8 *aBlob; int n;         /* Record to decode */
  5140   5397     u8 *a = 0;
  5141   5398     Fts5Buffer s;                   /* Build up text to return here */
  5142   5399     int rc = SQLITE_OK;             /* Return code */
  5143   5400     int nSpace = 0;
  5144   5401   
  5145   5402     assert( nArg==2 );
................................................................................
  5148   5405     n = sqlite3_value_bytes(apVal[1]);
  5149   5406     aBlob = sqlite3_value_blob(apVal[1]);
  5150   5407   
  5151   5408     nSpace = n + FTS5_DATA_ZERO_PADDING;
  5152   5409     a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  5153   5410     if( a==0 ) goto decode_out;
  5154   5411     memcpy(a, aBlob, n);
  5155         -  fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno);
         5412  +  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
  5156   5413   
  5157   5414     fts5DebugRowid(&rc, &s, iRowid);
  5158         -  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
         5415  +  if( bDlidx ){
  5159   5416       Fts5Data dlidx;
  5160         -    Fts5DlidxIter iter;
         5417  +    Fts5DlidxLvl lvl;
  5161   5418   
  5162   5419       dlidx.p = a;
  5163   5420       dlidx.n = n;
  5164   5421       dlidx.nRef = 2;
  5165   5422   
  5166         -    memset(&iter, 0, sizeof(Fts5DlidxIter));
  5167         -    iter.pData = &dlidx;
  5168         -    iter.iLeafPgno = iPgno;
         5423  +    memset(&lvl, 0, sizeof(Fts5DlidxLvl));
         5424  +    lvl.pData = &dlidx;
         5425  +    lvl.iLeafPgno = iPgno;
  5169   5426   
  5170         -    for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){
         5427  +    for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
  5171   5428         sqlite3Fts5BufferAppendPrintf(&rc, &s, 
  5172         -          " %d(%lld)", iter.iLeafPgno, iter.iRowid
         5429  +          " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
  5173   5430         );
  5174   5431       }
  5175   5432     }else if( iSegid==0 ){
  5176   5433       if( iRowid==FTS5_AVERAGES_ROWID ){
  5177   5434         /* todo */
  5178   5435       }else{
  5179   5436         fts5DecodeStructure(&rc, &s, a, n);

Changes to ext/fts5/fts5_tcl.c.

    18     18   
    19     19   #ifdef SQLITE_ENABLE_FTS5
    20     20   
    21     21   #include "fts5.h"
    22     22   #include <string.h>
    23     23   #include <assert.h>
    24     24   
    25         -/*
    26         -** This variable is set to true when running corruption tests. Otherwise
    27         -** false. If it is false, extra assert() conditions in the fts5 code are
    28         -** activated - conditions that are only true if it is guaranteed that the
    29         -** fts5 database is not corrupt.
    30         -*/
    31         -int sqlite3_fts5_may_be_corrupt = 0;
           25  +extern int sqlite3_fts5_may_be_corrupt;
    32     26   
    33     27   /*************************************************************************
    34     28   ** This is a copy of the first part of the SqliteDb structure in 
    35     29   ** tclsqlite.c.  We need it here so that the get_sqlite_pointer routine
    36     30   ** can extract the sqlite3* pointer from an existing Tcl SQLite
    37     31   ** connection.
    38     32   */

Changes to ext/fts5/test/fts5aa.test.

    45     45     CREATE VIRTUAL TABLE t1 USING fts5(x,y);
    46     46   }
    47     47   do_execsql_test 2.1 {
    48     48     INSERT INTO t1 VALUES('a b c', 'd e f');
    49     49   }
    50     50   do_test 2.2 {
    51     51     execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
    52         -} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
           52  +} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
    53     53   do_execsql_test 2.3 {
    54     54     INSERT INTO t1(t1) VALUES('integrity-check');
    55     55   }
    56     56   
    57     57   #-------------------------------------------------------------------------
    58     58   #
    59     59   reset_db
................................................................................
   177    177         set y [doc]
   178    178         set z [doc]
   179    179         set rowid [expr int(rand() * 100)]
   180    180         execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
   181    181       }
   182    182       execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
   183    183     } {}
   184         -#  if {$i==1} break
   185    184   }
   186    185   #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
   187    186   #exit
   188    187   
   189    188   #-------------------------------------------------------------------------
   190    189   #
   191    190   reset_db
................................................................................
   238    237         set rowid [expr int(rand() * 100)]
   239    238         execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
   240    239       }
   241    240       execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
   242    241     } {}
   243    242     if {[set_test_counter errors]} break
   244    243   }
          244  +
   245    245   
   246    246   #-------------------------------------------------------------------------
   247    247   #
   248    248   reset_db
   249    249   do_execsql_test 10.0 {
   250    250     CREATE VIRTUAL TABLE t1 USING fts5(x,y);
   251    251   }

Changes to ext/fts5/test/fts5al.test.

    22     22     finish_test
    23     23     return
    24     24   }
    25     25   
    26     26   do_execsql_test 1.1 {
    27     27     CREATE VIRTUAL TABLE ft1 USING fts5(x);
    28     28     SELECT * FROM ft1_config;
    29         -} {version 1}
           29  +} {version 2}
    30     30   
    31     31   do_execsql_test 1.2 {
    32     32     INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
    33     33     SELECT * FROM ft1_config;
    34         -} {pgsz 32 version 1}
           34  +} {pgsz 32 version 2}
    35     35   
    36     36   do_execsql_test 1.3 {
    37     37     INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
    38     38     SELECT * FROM ft1_config;
    39         -} {pgsz 64 version 1}
           39  +} {pgsz 64 version 2}
    40     40   
    41     41   #--------------------------------------------------------------------------
    42     42   # Test the logic for parsing the rank() function definition.
    43     43   #
    44     44   foreach {tn defn} {
    45     45     1 "fname()"
    46     46     2 "fname(1)"

Changes to ext/fts5/test/fts5corrupt2.test.

    12     12   # This file tests that FTS5 handles corrupt databases (i.e. internal
    13     13   # inconsistencies in the backing tables) correctly. In this case 
    14     14   # "correctly" means without crashing.
    15     15   #
    16     16   
    17     17   source [file join [file dirname [info script]] fts5_common.tcl]
    18     18   set testprefix fts5corrupt2
           19  +sqlite3_fts5_may_be_corrupt 1
    19     20   
    20     21   # Create a simple FTS5 table containing 100 documents. Each document 
    21     22   # contains 10 terms, each of which start with the character "x".
    22     23   #
    23     24   expr srand(0)
    24     25   db func rnddoc fts5_rnddoc
    25     26   do_execsql_test 1.0 {
    26     27     CREATE VIRTUAL TABLE t1 USING fts5(x);
    27     28     INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
    28     29     WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
    29     30     INSERT INTO t1 SELECT rnddoc(10) FROM ii;
    30     31   }
    31     32   set mask [expr 31 << 31]
           33  +
    32     34   
    33     35   # Test 1:
    34     36   #
    35     37   #   For each page in the t1_data table, open a transaction and DELETE
    36     38   #   the t1_data entry. Then run:
    37     39   #
    38     40   #     * an integrity-check, and
................................................................................
   190    192       execsql ROLLBACK
   191    193     }
   192    194   
   193    195     do_test 4.$tn.x { expr $nCorrupt>0 } 1
   194    196   }
   195    197   
   196    198   
          199  +sqlite3_fts5_may_be_corrupt 0
   197    200   finish_test
   198    201   

Changes to ext/fts5/test/fts5dlidx.test.

    57     57         if {($i % $spc2)==0} { 
    58     58           lappend ydoc $rowid
    59     59           append doc " y" 
    60     60         }
    61     61       }
    62     62       execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
    63     63     }
           64  +  breakpoint
    64     65     execsql COMMIT
    65     66   
    66     67     do_test $tn.1 {
    67     68       execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
    68     69     } {}
    69     70     
    70     71     do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
................................................................................
    78     79     do_fb_test $tn.5.2 { 
    79     80       SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
    80     81   }
    81     82   
    82     83   
    83     84   do_dlidx_test1 1.1     10 100 10000 0 1000
    84     85   do_dlidx_test1 1.2     10 10  10000 0 128
    85         -do_dlidx_test1 1.3     10 10  100   0 36028797018963970
    86         -do_dlidx_test1 1.3     10 10  50    0 150000000000000000
           86  +do_dlidx_test1 1.3     10 10  66   0 36028797018963970
           87  +do_dlidx_test1 1.4     10 10  50    0 150000000000000000
    87     88   
    88     89   
    89     90   
    90     91   finish_test
    91     92   

Changes to ext/fts5/test/fts5integrity.test.

    26     26   do_execsql_test 2.0 {
    27     27     CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
    28     28     INSERT INTO yy VALUES('term');
    29     29   }
    30     30   do_execsql_test 2.1 {
    31     31     INSERT INTO yy(yy) VALUES('integrity-check');
    32     32   }
           33  +
           34  +#--------------------------------------------------------------------
           35  +#
           36  +do_execsql_test 3.0 {
           37  +  CREATE VIRTUAL TABLE zz USING fts5(z);
           38  +  INSERT INTO zz(zz, rank) VALUES('pgsz', 32);
           39  +  INSERT INTO zz VALUES('b b b b b b b b b b b b b b');
           40  +  INSERT INTO zz SELECT z FROM zz;
           41  +  INSERT INTO zz SELECT z FROM zz;
           42  +  INSERT INTO zz SELECT z FROM zz;
           43  +  INSERT INTO zz SELECT z FROM zz;
           44  +  INSERT INTO zz SELECT z FROM zz;
           45  +  INSERT INTO zz SELECT z FROM zz;
           46  +  INSERT INTO zz(zz) VALUES('optimize');
           47  +}
           48  +
           49  +do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); }
           50  +
           51  +
           52  +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
           53  +#exit
           54  +
    33     55   
    34     56   finish_test
    35     57   

Changes to ext/fts5/test/fts5rowid.test.

    21     21   
    22     22   do_catchsql_test 1.2 {
    23     23     SELECT fts5_rowid('segment')
    24     24   } {1 {should be: fts5_rowid('segment', segid, height, pgno))}}
    25     25   
    26     26   do_execsql_test 1.3 {
    27     27     SELECT fts5_rowid('segment', 1, 1, 1)
    28         -} {70866960385}
           28  +} {139586437121}
    29     29   
    30     30   do_catchsql_test 1.4 {
    31     31     SELECT fts5_rowid('nosucharg');
    32     32   } {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 
    33     33   
    34     34   
    35     35   #-------------------------------------------------------------------------

Changes to ext/fts5/test/fts5version.test.

    20     20   do_execsql_test 1.1 {
    21     21     CREATE VIRTUAL TABLE t1 USING fts5(one);
    22     22     INSERT INTO t1 VALUES('a b c d');
    23     23   } {}
    24     24   
    25     25   do_execsql_test 1.2 {
    26     26     SELECT * FROM t1_config WHERE k='version'
    27         -} {version 1}
           27  +} {version 2}
    28     28   
    29     29   do_execsql_test 1.3 {
    30     30     SELECT rowid FROM t1 WHERE t1 MATCH 'a';
    31     31   } {1}
    32     32   
    33     33   do_execsql_test 1.4 {
    34         -  UPDATE t1_config set v=2 WHERE k='version';
           34  +  UPDATE t1_config set v=3 WHERE k='version';
    35     35   } 
    36     36   
    37     37   do_test 1.5 {
    38     38     db close
    39     39     sqlite3 db test.db
    40     40     catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
    41         -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}}
           41  +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}
    42     42   
    43     43   breakpoint
    44     44   do_test 1.6 {
    45     45     db close
    46     46     sqlite3 db test.db
    47     47     catchsql { INSERT INTO t1 VALUES('x y z') }
    48         -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}}
           48  +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}
    49     49   
    50     50   do_test 1.7 {
    51     51     execsql { DELETE FROM t1_config WHERE k='version' }
    52     52     db close
    53     53     sqlite3 db test.db
    54     54     catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
    55         -} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}}
           55  +} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}}
    56     56   
    57     57   
    58     58   finish_test
    59     59   

Changes to ext/fts5/tool/loadfts5.tcl.

   105    105   db func loadfile loadfile
   106    106   
   107    107   db transaction {
   108    108     set pref ""
   109    109     if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
   110    110     catch {
   111    111       db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
          112  +    # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
   112    113     }
   113    114     if {$O(automerge)>=0} {
   114    115       if {$O(vtab) == "fts5"} {
   115    116         db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
   116    117       } else {
   117    118         db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
   118    119       }

Changes to test/permutations.test.

   237    237     fts3varint.test
   238    238     fts4growth.test fts4growth2.test
   239    239   }
   240    240   
   241    241   test_suite "fts5" -prefix "" -description {
   242    242     All FTS5 tests.
   243    243   } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]
          244  +
          245  +test_suite "fts5-light" -prefix "" -description {
          246  +  All FTS5 tests.
          247  +} -files [
          248  +  test_set \
          249  +      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
          250  +      -exclude *corrupt* *fault* *big* *fts5aj*
          251  +]
   244    252   
   245    253   test_suite "nofaultsim" -prefix "" -description {
   246    254     "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
   247    255     This test suite is the same as the "quick" tests, except that some files
   248    256     that test malloc and IO errors are omitted.
   249    257   } -files [
   250    258     test_set $allquicktests -exclude *malloc* *ioerr* *fault*

Changes to tool/mksqlite3c.tcl.

   373    373      fts5_expr.c
   374    374      fts5_hash.c
   375    375      fts5_index.c
   376    376      fts5parse.c
   377    377      fts5_storage.c
   378    378      fts5_tokenize.c
   379    379      fts5_unicode2.c
          380  +   fts5_vocab.c
   380    381   
   381    382      rtree.c
   382    383      icu.c
   383    384      fts3_icu.c
   384    385   } {
   385    386     copy_file tsrc/$file
   386    387   }
   387    388   
   388    389   close $out