Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
aa34bf666c384cf32a8d8166ab6d9afb |
User & Date: | dan 2015-05-13 17:15:32 |
Context
2015-05-13
| ||
18:12 | Merge latest trunk changes with this branch. check-in: b5f0e8c5b4 user: dan tags: fts5 | |
17:15 | Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. check-in: aa34bf666c user: dan tags: fts5 | |
2015-05-09
| ||
18:28 | Allow the fts5vocab table to optionally provide data on a per-column basis. check-in: 3922276135 user: dan tags: fts5 | |
Changes
Changes to ext/fts5/fts5.c.
13 13 ** This is an SQLite module implementing full-text search. 14 14 */ 15 15 16 16 #if defined(SQLITE_ENABLE_FTS5) 17 17 18 18 #include "fts5Int.h" 19 19 20 +/* 21 +** This variable is set to true when running corruption tests. Otherwise 22 +** false. If it is false, extra assert() conditions in the fts5 code are 23 +** activated - conditions that are only true if it is guaranteed that the 24 +** fts5 database is not corrupt. 25 +*/ 26 +int sqlite3_fts5_may_be_corrupt = 0; 27 + 20 28 21 29 typedef struct Fts5Table Fts5Table; 22 30 typedef struct Fts5Cursor Fts5Cursor; 23 31 typedef struct Fts5Global Fts5Global; 24 32 typedef struct Fts5Auxiliary Fts5Auxiliary; 25 33 typedef struct Fts5Auxdata Fts5Auxdata; 26 34
Changes to ext/fts5/fts5Int.h.
40 40 #endif 41 41 42 42 /* 43 43 ** The assert_nc() macro is similar to the assert() macro, except that it 44 44 ** is used for assert() conditions that are true only if it can be 45 45 ** guranteed that the database is not corrupt. 46 46 */ 47 -#ifdef SQLITE_TEST 47 +#ifdef SQLITE_DEBUG 48 48 extern int sqlite3_fts5_may_be_corrupt; 49 49 # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) 50 50 #else 51 51 # define assert_nc(x) assert(x) 52 52 #endif 53 53 54 54 typedef struct Fts5Global Fts5Global; ................................................................................ 111 111 char *zRankArgs; /* Arguments to rank function */ 112 112 113 113 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ 114 114 char **pzErrmsg; 115 115 }; 116 116 117 117 /* Current expected value of %_config table 'version' field */ 118 -#define FTS5_CURRENT_VERSION 1 118 +#define FTS5_CURRENT_VERSION 2 119 119 120 120 #define FTS5_CONTENT_NORMAL 0 121 121 #define FTS5_CONTENT_NONE 1 122 122 #define FTS5_CONTENT_EXTERNAL 2 123 123 124 124 125 125
Changes to ext/fts5/fts5_index.c.
198 198 ** 199 199 ** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 200 200 ** is an associated index-by-rowid record. 201 201 ** * the number of zero-term leaves as a varint. 202 202 ** 203 203 ** 5. Segment doclist indexes: 204 204 ** 205 -** A list of varints. If the first termless page contains at least one 206 -** docid, the list begins with that docid as a varint followed by the 207 -** value 1 (0x01). Or, if the first termless page contains no docids, 208 -** a varint containing the last docid stored on the term page followed 209 -** by a 0 (0x00) value. 205 +** Doclist indexes are themselves b-trees, however they usually consist of 206 +** a single leaf record only. The format of each doclist index leaf page 207 +** is: 210 208 ** 211 -** For each subsequent page in the doclist, either a 0x00 byte if the 212 -** page contains no terms, or a delta-encoded docid (always +ve) 213 -** representing the first docid on the page otherwise. 209 +** * Flags byte. Bits are: 210 +** 0x01: Clear if leaf is also the root page, otherwise set. 211 +** 212 +** * Page number of fts index leaf page. As a varint. 213 +** 214 +** * First docid on page indicated by previous field. As a varint. 215 +** 216 +** * A list of varints, one for each subsequent termless page. A 217 +** positive delta if the termless page contains at least one docid, 218 +** or an 0x00 byte otherwise. 219 +** 220 +** Internal doclist index nodes are: 221 +** 222 +** * Flags byte. Bits are: 223 +** 0x01: Clear for root page, otherwise set. 224 +** 225 +** * Page number of first child page. As a varint. 226 +** 227 +** * Copy of first docid on page indicated by previous field. As a varint. 228 +** 229 +** * A list of delta-encoded varints - the first docid on each subsequent 230 +** child page. 231 +** 214 232 */ 215 233 216 234 /* 217 235 ** Rowids for the averages and structure records in the %_data table. 218 236 */ 219 237 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ 220 238 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ ................................................................................ 236 254 ** 237 255 ** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro 238 256 ** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used 239 257 ** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns 240 258 ** SQLITE_FULL and fails the current operation if they ever prove too small. 241 259 */ 242 260 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ 261 +#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ 243 262 #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ 244 263 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ 245 264 246 -#define FTS5_SEGMENT_ROWID(segid, height, pgno) ( \ 247 - ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ 265 +#define fts5_dri(segid, dlidx, height, pgno) ( \ 266 + ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ 267 + ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ 248 268 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ 249 269 ((i64)(pgno)) \ 250 270 ) 251 271 272 +#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) 273 +#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) 274 + 275 +#if 0 252 276 /* 253 277 ** The height of segment b-trees is actually limited to one less than 254 278 ** (1<<HEIGHT_BITS). This is because the rowid address space for nodes 255 279 ** with such a height is used by doclist indexes. 256 280 */ 257 281 #define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1) 282 +#endif 258 283 259 284 /* 260 285 ** Maximum segments permitted in a single index 261 286 */ 262 287 #define FTS5_MAX_SEGMENT 2000 263 288 289 +#if 0 264 290 /* 265 291 ** The rowid for the doclist index associated with leaf page pgno of segment 266 292 ** segid in index idx. 267 293 */ 268 -#define FTS5_DOCLIST_IDX_ROWID(segid, pgno) \ 294 +#define FTS5_DOCLIST_IDX_ROWID(segid, height, pgno) \ 269 295 FTS5_SEGMENT_ROWID(segid, FTS5_SEGMENT_MAX_HEIGHT, pgno) 296 +#endif 270 297 271 298 #ifdef SQLITE_DEBUG 272 299 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } 273 300 #endif 274 301 275 302 276 303 /* ................................................................................ 281 308 #define FTS5_DATA_ZERO_PADDING 8 282 309 283 310 typedef struct Fts5BtreeIter Fts5BtreeIter; 284 311 typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; 285 312 typedef struct Fts5ChunkIter Fts5ChunkIter; 286 313 typedef struct Fts5Data Fts5Data; 287 314 typedef struct Fts5DlidxIter Fts5DlidxIter; 315 +typedef struct Fts5DlidxLvl Fts5DlidxLvl; 316 +typedef struct Fts5DlidxWriter Fts5DlidxWriter; 288 317 typedef struct Fts5MultiSegIter Fts5MultiSegIter; 289 318 typedef struct Fts5NodeIter Fts5NodeIter; 290 319 typedef struct Fts5PageWriter Fts5PageWriter; 291 320 typedef struct Fts5PosIter Fts5PosIter; 292 321 typedef struct Fts5SegIter Fts5SegIter; 293 322 typedef struct Fts5DoclistIter Fts5DoclistIter; 294 323 typedef struct Fts5SegWriter Fts5SegWriter; ................................................................................ 380 409 ** An object of type Fts5SegWriter is used to write to segments. 381 410 */ 382 411 struct Fts5PageWriter { 383 412 int pgno; /* Page number for this page */ 384 413 Fts5Buffer buf; /* Buffer containing page data */ 385 414 Fts5Buffer term; /* Buffer containing previous term on page */ 386 415 }; 416 +struct Fts5DlidxWriter { 417 + int pgno; /* Page number for this page */ 418 + int bPrevValid; /* True if iPrev is valid */ 419 + i64 iPrev; /* Previous docid value written to page */ 420 + Fts5Buffer buf; /* Buffer containing page data */ 421 +}; 387 422 struct Fts5SegWriter { 388 423 int iSegid; /* Segid to write to */ 389 424 int nWriter; /* Number of entries in aWriter */ 390 425 Fts5PageWriter *aWriter; /* Array of PageWriter objects */ 391 426 i64 iPrevRowid; /* Previous docid written to current leaf */ 392 427 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ 393 428 u8 bFirstRowidInPage; /* True if next rowid is first in page */ 394 429 u8 bFirstTermInPage; /* True if next term will be first in leaf */ 395 430 int nLeafWritten; /* Number of leaf pages written */ 396 431 int nEmpty; /* Number of contiguous term-less nodes */ 397 - Fts5Buffer cdlidx; /* Doclist index */ 398 - i64 iDlidxPrev; /* Previous rowid appended to dlidx */ 399 - int bDlidxPrevValid; /* True if iDlidxPrev is valid */ 432 + 433 + int nDlidx; /* Allocated size of aDlidx[] array */ 434 + Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ 400 435 }; 401 436 402 437 /* 403 438 ** Object for iterating through the merged results of one or more segments, 404 439 ** visiting each term/docid pair in the merged data. 405 440 ** 406 441 ** nSeg is always a power of two greater than or equal to the number of ................................................................................ 557 592 ** 558 593 ** bEof: 559 594 ** Set to true once iterator has reached EOF. 560 595 ** 561 596 ** iOff: 562 597 ** Set to the current offset within record pData. 563 598 */ 564 -struct Fts5DlidxIter { 565 - Fts5Data *pData; /* Data for doclist index, if any */ 566 - int iOff; /* Current offset into pDlidx */ 599 +struct Fts5DlidxLvl { 600 + Fts5Data *pData; /* Data for current page of this level */ 601 + int iOff; /* Current offset into pData */ 567 602 int bEof; /* At EOF already */ 568 - int iFirstOff; /* Used by reverse iterators only */ 603 + int iFirstOff; /* Used by reverse iterators */ 569 604 570 605 /* Output variables */ 571 606 int iLeafPgno; /* Page number of current leaf page */ 572 607 i64 iRowid; /* First rowid on leaf iLeafPgno */ 573 608 }; 609 +struct Fts5DlidxIter { 610 + int nLvl; 611 + int iSegid; 612 + Fts5DlidxLvl aLvl[1]; 613 +}; 614 + 574 615 575 616 576 617 /* 577 618 ** An Fts5BtreeIter object is used to iterate through all entries in the 578 619 ** b-tree hierarchy belonging to a single fts5 segment. In this case the 579 620 ** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the 580 621 ** b-tree hierarchy consists of the following: ................................................................................ 1404 1445 ** Free any memory allocated by the iterator object. 1405 1446 */ 1406 1447 static void fts5NodeIterFree(Fts5NodeIter *pIter){ 1407 1448 fts5BufferFree(&pIter->term); 1408 1449 } 1409 1450 1410 1451 /* 1411 -** The iterator passed as the first argument has the following fields set 1412 -** as follows. This function sets up the rest of the iterator so that it 1413 -** points to the first rowid in the doclist-index. 1414 -** 1415 -** pData: pointer to doclist-index record, 1416 -** iLeafPgno: page number that this doclist-index is associated with. 1417 -** 1418 -** When this function is called pIter->iLeafPgno is the page number the 1419 -** doclist is associated with (the one featuring the term). 1420 -*/ 1421 -static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ 1422 - Fts5Data *pData = pIter->pData; 1423 - int i; 1424 - int bPresent; 1425 - 1426 - assert( pIter->pData ); 1427 - assert( pIter->iLeafPgno>0 ); 1428 - 1429 - /* Read the first rowid value. And the "present" flag that follows it. */ 1430 - pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); 1431 - bPresent = pData->p[pIter->iOff++]; 1432 - if( bPresent ){ 1433 - i = 0; 1434 - }else{ 1435 - /* Count the number of leading 0x00 bytes. */ 1436 - for(i=1; pIter->iOff<pData->n; i++){ 1437 - if( pData->p[pIter->iOff] ) break; 1438 - pIter->iOff++; 1439 - } 1440 - 1441 - /* Unless we are already at the end of the doclist-index, load the first 1442 - ** rowid value. */ 1443 - if( pIter->iOff<pData->n ){ 1452 +** Advance the iterator passed as the only argument. If the end of the 1453 +** doclist-index page is reached, return non-zero. 1454 +*/ 1455 +static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ 1456 + Fts5Data *pData = pLvl->pData; 1457 + 1458 + if( pLvl->iOff==0 ){ 1459 + assert( pLvl->bEof==0 ); 1460 + pLvl->iOff = 1; 1461 + pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); 1462 + pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); 1463 + pLvl->iFirstOff = pLvl->iOff; 1464 + }else{ 1465 + int iOff; 1466 + for(iOff=pLvl->iOff; iOff<pData->n; iOff++){ 1467 + if( pData->p[iOff] ) break; 1468 + } 1469 + 1470 + if( iOff<pData->n ){ 1444 1471 i64 iVal; 1445 - pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); 1446 - pIter->iRowid += iVal; 1472 + pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; 1473 + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); 1474 + pLvl->iRowid += iVal; 1475 + pLvl->iOff = iOff; 1447 1476 }else{ 1448 - pIter->bEof = 1; 1477 + pLvl->bEof = 1; 1449 1478 } 1450 1479 } 1451 - pIter->iLeafPgno += (i+1); 1452 1480 1453 - pIter->iFirstOff = pIter->iOff; 1454 - return pIter->bEof; 1481 + return pLvl->bEof; 1455 1482 } 1456 1483 1457 1484 /* 1458 1485 ** Advance the iterator passed as the only argument. 1459 1486 */ 1460 -static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ 1461 - Fts5Data *pData = pIter->pData; 1462 - int iOff; 1463 - 1464 - for(iOff=pIter->iOff; iOff<pData->n; iOff++){ 1465 - if( pData->p[iOff] ) break; 1466 - } 1467 - 1468 - if( iOff<pData->n ){ 1469 - i64 iVal; 1470 - pIter->iLeafPgno += (iOff - pIter->iOff) + 1; 1471 - iOff += getVarint(&pData->p[iOff], (u64*)&iVal); 1472 - pIter->iRowid += iVal; 1473 - pIter->iOff = iOff; 1474 - }else{ 1475 - pIter->bEof = 1; 1476 - } 1477 - 1478 - return pIter->bEof; 1479 -} 1487 +static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 1488 + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 1489 + 1490 + assert( iLvl<pIter->nLvl ); 1491 + if( fts5DlidxLvlNext(pLvl) ){ 1492 + if( (iLvl+1) < pIter->nLvl ){ 1493 + fts5DlidxIterNextR(p, pIter, iLvl+1); 1494 + if( pLvl[1].bEof==0 ){ 1495 + fts5DataRelease(pLvl->pData); 1496 + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1497 + pLvl->pData = fts5DataRead(p, 1498 + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 1499 + ); 1500 + if( pLvl->pData ) fts5DlidxLvlNext(pLvl); 1501 + } 1502 + } 1503 + } 1504 + 1505 + return pIter->aLvl[0].bEof; 1506 +} 1507 +static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ 1508 + return fts5DlidxIterNextR(p, pIter, 0); 1509 +} 1510 + 1511 +/* 1512 +** The iterator passed as the first argument has the following fields set 1513 +** as follows. This function sets up the rest of the iterator so that it 1514 +** points to the first rowid in the doclist-index. 1515 +** 1516 +** pData: 1517 +** pointer to doclist-index record, 1518 +** 1519 +** When this function is called pIter->iLeafPgno is the page number the 1520 +** doclist is associated with (the one featuring the term). 1521 +*/ 1522 +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ 1523 + int i; 1524 + for(i=0; i<pIter->nLvl; i++){ 1525 + fts5DlidxLvlNext(&pIter->aLvl[i]); 1526 + } 1527 + return pIter->aLvl[0].bEof; 1528 +} 1529 + 1480 1530 1481 1531 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ 1482 - return pIter->bEof; 1483 -} 1484 - 1485 -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ 1486 - if( fts5DlidxIterFirst(pIter)==0 ){ 1487 - while( 0==fts5DlidxIterNext(pIter) ); 1488 - pIter->bEof = 0; 1489 - } 1490 -} 1491 - 1492 -static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ 1493 - int iOff = pIter->iOff; 1494 - 1495 - assert( pIter->bEof==0 ); 1496 - if( iOff<=pIter->iFirstOff ){ 1497 - pIter->bEof = 1; 1498 - }else{ 1499 - u8 *a = pIter->pData->p; 1532 + return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; 1533 +} 1534 + 1535 +static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ 1536 + int i; 1537 + 1538 + /* Advance each level to the last entry on the last page */ 1539 + for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ 1540 + Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; 1541 + while( fts5DlidxLvlNext(pLvl)==0 ); 1542 + pLvl->bEof = 0; 1543 + 1544 + if( i>0 ){ 1545 + Fts5DlidxLvl *pChild = &pLvl[-1]; 1546 + fts5DataRelease(pChild->pData); 1547 + memset(pChild, 0, sizeof(Fts5DlidxLvl)); 1548 + pChild->pData = fts5DataRead(p, 1549 + FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) 1550 + ); 1551 + } 1552 + } 1553 +} 1554 + 1555 +/* 1556 +** Move the iterator passed as the only argument to the previous entry. 1557 +*/ 1558 +static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ 1559 + int iOff = pLvl->iOff; 1560 + 1561 + assert( pLvl->bEof==0 ); 1562 + if( iOff<=pLvl->iFirstOff ){ 1563 + pLvl->bEof = 1; 1564 + }else{ 1565 + u8 *a = pLvl->pData->p; 1500 1566 i64 iVal; 1501 1567 int iLimit; 1568 + int ii; 1569 + int nZero = 0; 1502 1570 1503 1571 /* Currently iOff points to the first byte of a varint. This block 1504 1572 ** decrements iOff until it points to the first byte of the previous 1505 1573 ** varint. Taking care not to read any memory locations that occur 1506 1574 ** before the buffer in memory. */ 1507 1575 iLimit = (iOff>9 ? iOff-9 : 0); 1508 1576 for(iOff--; iOff>iLimit; iOff--){ 1509 1577 if( (a[iOff-1] & 0x80)==0 ) break; 1510 1578 } 1511 1579 1512 1580 getVarint(&a[iOff], (u64*)&iVal); 1513 - pIter->iRowid -= iVal; 1514 - pIter->iLeafPgno--; 1515 - 1516 - /* Skip backwards passed any 0x00 bytes. */ 1517 - while( iOff>pIter->iFirstOff 1518 - && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 1519 - ){ 1520 - iOff--; 1521 - pIter->iLeafPgno--; 1522 - } 1523 - pIter->iOff = iOff; 1524 - } 1525 - 1526 - return pIter->bEof; 1581 + pLvl->iRowid -= iVal; 1582 + pLvl->iLeafPgno--; 1583 + 1584 + /* Skip backwards past any 0x00 varints. */ 1585 + for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ 1586 + nZero++; 1587 + } 1588 + if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ 1589 + /* The byte immediately before the last 0x00 byte has the 0x80 bit 1590 + ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 1591 + ** bytes before a[ii]. */ 1592 + int bZero = 0; /* True if last 0x00 counts */ 1593 + if( (ii-8)>=pLvl->iFirstOff ){ 1594 + int j; 1595 + for(j=1; j<=8 && (a[ii-j] & 0x80); j++); 1596 + bZero = (j>8); 1597 + } 1598 + if( bZero==0 ) nZero--; 1599 + } 1600 + pLvl->iLeafPgno -= nZero; 1601 + pLvl->iOff = iOff - nZero; 1602 + } 1603 + 1604 + return pLvl->bEof; 1605 +} 1606 + 1607 +static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 1608 + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 1609 + 1610 + assert( iLvl<pIter->nLvl ); 1611 + if( fts5DlidxLvlPrev(pLvl) ){ 1612 + if( (iLvl+1) < pIter->nLvl ){ 1613 + fts5DlidxIterPrevR(p, pIter, iLvl+1); 1614 + if( pLvl[1].bEof==0 ){ 1615 + fts5DataRelease(pLvl->pData); 1616 + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1617 + pLvl->pData = fts5DataRead(p, 1618 + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 1619 + ); 1620 + if( pLvl->pData ){ 1621 + while( fts5DlidxLvlNext(pLvl)==0 ); 1622 + pLvl->bEof = 0; 1623 + } 1624 + } 1625 + } 1626 + } 1627 + 1628 + return pIter->aLvl[0].bEof; 1629 +} 1630 +static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ 1631 + return fts5DlidxIterPrevR(p, pIter, 0); 1632 +} 1633 + 1634 +/* 1635 +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). 1636 +*/ 1637 +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ 1638 + if( pIter ){ 1639 + int i; 1640 + for(i=0; i<pIter->nLvl; i++){ 1641 + fts5DataRelease(pIter->aLvl[i].pData); 1642 + } 1643 + sqlite3_free(pIter); 1644 + } 1527 1645 } 1528 1646 1529 1647 static Fts5DlidxIter *fts5DlidxIterInit( 1530 1648 Fts5Index *p, /* Fts5 Backend to iterate within */ 1531 1649 int bRev, /* True for ORDER BY ASC */ 1532 1650 int iSegid, /* Segment id */ 1533 1651 int iLeafPg /* Leaf page number to load dlidx for */ 1534 1652 ){ 1535 - Fts5DlidxIter *pIter; 1653 + Fts5DlidxIter *pIter = 0; 1654 + int i; 1655 + int bDone = 0; 1536 1656 1537 - pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); 1538 - if( pIter==0 ) return 0; 1657 + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 1658 + int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); 1659 + Fts5DlidxIter *pNew; 1539 1660 1540 - pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg)); 1541 - if( pIter->pData==0 ){ 1542 - sqlite3_free(pIter); 1543 - pIter = 0; 1544 - }else{ 1545 - pIter->iLeafPgno = iLeafPg; 1661 + pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); 1662 + if( pNew==0 ){ 1663 + p->rc = SQLITE_NOMEM; 1664 + }else{ 1665 + i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); 1666 + Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; 1667 + pIter = pNew; 1668 + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1669 + pLvl->pData = fts5DataRead(p, iRowid); 1670 + if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ 1671 + bDone = 1; 1672 + } 1673 + pIter->nLvl = i+1; 1674 + } 1675 + } 1676 + 1677 + if( p->rc==SQLITE_OK ){ 1678 + pIter->iSegid = iSegid; 1546 1679 if( bRev==0 ){ 1547 1680 fts5DlidxIterFirst(pIter); 1548 1681 }else{ 1549 - fts5DlidxIterLast(pIter); 1682 + fts5DlidxIterLast(p, pIter); 1550 1683 } 1551 1684 } 1685 + 1686 + if( p->rc!=SQLITE_OK ){ 1687 + fts5DlidxIterFree(pIter); 1688 + pIter = 0; 1689 + } 1552 1690 1553 1691 return pIter; 1554 1692 } 1555 1693 1556 -/* 1557 -** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). 1558 -*/ 1559 -static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ 1560 - if( pIter ){ 1561 - fts5DataRelease(pIter->pData); 1562 - sqlite3_free(pIter); 1563 - } 1694 +static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ 1695 + return pIter->aLvl[0].iRowid; 1696 +} 1697 +static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ 1698 + return pIter->aLvl[0].iLeafPgno; 1564 1699 } 1565 1700 1566 1701 static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ 1567 1702 *piRowid = (int)fts5GetU16(&pLeaf->p[0]); 1568 1703 *piTerm = (int)fts5GetU16(&pLeaf->p[2]); 1569 1704 } 1570 1705 ................................................................................ 1936 2071 int pgnoLast = 0; 1937 2072 1938 2073 if( pDlidx ){ 1939 2074 /* If the doclist-iterator is already at EOF, then the current doclist 1940 2075 ** contains no entries except those on the current page. */ 1941 2076 if( fts5DlidxIterEof(p, pDlidx)==0 ){ 1942 2077 int iSegid = pIter->pSeg->iSegid; 1943 - pgnoLast = pDlidx->iLeafPgno; 2078 + pgnoLast = fts5DlidxIterPgno(pDlidx); 1944 2079 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); 1945 2080 }else{ 1946 2081 pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); 1947 2082 } 1948 2083 }else{ 1949 2084 int iOff; /* Byte offset within pLeaf */ 1950 2085 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ ................................................................................ 2342 2477 2343 2478 pRes->iFirst = iRes; 2344 2479 return 0; 2345 2480 } 2346 2481 2347 2482 /* 2348 2483 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. 2349 -** It is an error if leaf iLeafPgno contains no rowid. 2484 +** It is an error if leaf iLeafPgno does not exist or contains no rowids. 2350 2485 */ 2351 2486 static void fts5SegIterGotoPage( 2352 2487 Fts5Index *p, /* FTS5 backend object */ 2353 2488 Fts5SegIter *pIter, /* Iterator to advance */ 2354 2489 int iLeafPgno 2355 2490 ){ 2356 2491 assert( iLeafPgno>pIter->iLeafPgno ); 2357 - pIter->iLeafPgno = iLeafPgno-1; 2358 - fts5SegIterNextPage(p, pIter); 2359 - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); 2492 + if( iLeafPgno>pIter->pSeg->pgnoLast ){ 2493 + p->rc = FTS5_CORRUPT; 2494 + }else{ 2495 + pIter->iLeafPgno = iLeafPgno-1; 2496 + fts5SegIterNextPage(p, pIter); 2497 + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); 2360 2498 2361 - if( p->rc==SQLITE_OK ){ 2362 - int iOff; 2363 - u8 *a = pIter->pLeaf->p; 2364 - int n = pIter->pLeaf->n; 2499 + if( p->rc==SQLITE_OK ){ 2500 + int iOff; 2501 + u8 *a = pIter->pLeaf->p; 2502 + int n = pIter->pLeaf->n; 2365 2503 2366 - iOff = fts5GetU16(&a[0]); 2367 - if( iOff<4 || iOff>=n ){ 2368 - p->rc = FTS5_CORRUPT; 2369 - }else{ 2370 - iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); 2371 - pIter->iLeafOffset = iOff; 2372 - fts5SegIterLoadNPos(p, pIter); 2504 + iOff = fts5GetU16(&a[0]); 2505 + if( iOff<4 || iOff>=n ){ 2506 + p->rc = FTS5_CORRUPT; 2507 + }else{ 2508 + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); 2509 + pIter->iLeafOffset = iOff; 2510 + fts5SegIterLoadNPos(p, pIter); 2511 + } 2373 2512 } 2374 2513 } 2375 2514 } 2376 2515 2377 2516 /* 2378 2517 ** Advance the iterator passed as the second argument until it is at or 2379 2518 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is ................................................................................ 2390 2529 int bMove = 1; 2391 2530 2392 2531 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 2393 2532 assert( pIter->pDlidx ); 2394 2533 assert( pIter->pLeaf ); 2395 2534 2396 2535 if( bRev==0 ){ 2397 - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ 2398 - iLeafPgno = pDlidx->iLeafPgno; 2399 - fts5DlidxIterNext(pDlidx); 2536 + while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ 2537 + iLeafPgno = fts5DlidxIterPgno(pDlidx); 2538 + fts5DlidxIterNext(p, pDlidx); 2400 2539 } 2401 - assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); 2540 + assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); 2402 2541 if( iLeafPgno>pIter->iLeafPgno ){ 2403 2542 fts5SegIterGotoPage(p, pIter, iLeafPgno); 2404 2543 bMove = 0; 2405 2544 } 2406 2545 }else{ 2407 2546 assert( iMatch<pIter->iRowid ); 2408 - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){ 2409 - fts5DlidxIterPrev(pDlidx); 2547 + while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ 2548 + fts5DlidxIterPrev(p, pDlidx); 2410 2549 } 2411 - iLeafPgno = pDlidx->iLeafPgno; 2550 + iLeafPgno = fts5DlidxIterPgno(pDlidx); 2412 2551 2413 2552 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); 2414 2553 2415 2554 if( iLeafPgno<pIter->iLeafPgno ){ 2416 2555 pIter->iLeafPgno = iLeafPgno+1; 2417 2556 fts5SegIterReverseNewPage(p, pIter); 2418 2557 bMove = 0; ................................................................................ 2799 2938 int i; 2800 2939 assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 ); 2801 2940 for(i=0; i<nOld; i++){ 2802 2941 if( pOld[i]!=pNew[i] ) break; 2803 2942 } 2804 2943 return i; 2805 2944 } 2945 + 2946 +static void fts5WriteDlidxClear( 2947 + Fts5Index *p, 2948 + Fts5SegWriter *pWriter, 2949 + int bFlush /* If true, write dlidx to disk */ 2950 +){ 2951 + int i; 2952 + assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); 2953 + for(i=0; i<pWriter->nDlidx; i++){ 2954 + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 2955 + if( pDlidx->buf.n==0 ) break; 2956 + if( bFlush ){ 2957 + assert( pDlidx->pgno!=0 ); 2958 + fts5DataWrite(p, 2959 + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 2960 + pDlidx->buf.p, pDlidx->buf.n 2961 + ); 2962 + } 2963 + sqlite3Fts5BufferZero(&pDlidx->buf); 2964 + pDlidx->bPrevValid = 0; 2965 + } 2966 +} 2967 + 2968 +/* 2969 +** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. 2970 +** Any new array elements are zeroed before returning. 2971 +*/ 2972 +static int fts5WriteDlidxGrow( 2973 + Fts5Index *p, 2974 + Fts5SegWriter *pWriter, 2975 + int nLvl 2976 +){ 2977 + if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ 2978 + Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( 2979 + pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl 2980 + ); 2981 + if( aDlidx==0 ){ 2982 + p->rc = SQLITE_NOMEM; 2983 + }else{ 2984 + int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); 2985 + memset(&aDlidx[pWriter->nDlidx], 0, nByte); 2986 + pWriter->aDlidx = aDlidx; 2987 + pWriter->nDlidx = nLvl; 2988 + } 2989 + } 2990 + return p->rc; 2991 +} 2806 2992 2807 2993 /* 2808 2994 ** If an "nEmpty" record must be written to the b-tree before the next 2809 2995 ** term, write it now. 2810 2996 */ 2811 2997 static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ 2812 2998 if( pWriter->nEmpty ){ 2813 2999 int bFlag = 0; 2814 3000 Fts5PageWriter *pPg; 2815 3001 pPg = &pWriter->aWriter[1]; 2816 - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ 2817 - i64 iKey = FTS5_DOCLIST_IDX_ROWID( 2818 - pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty 2819 - ); 2820 - assert( pWriter->cdlidx.n>0 ); 2821 - fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); 3002 + 3003 + /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written 3004 + ** to the database, also write the doclist-index to disk. */ 3005 + if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ 2822 3006 bFlag = 1; 2823 3007 } 3008 + fts5WriteDlidxClear(p, pWriter, bFlag); 2824 3009 fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); 2825 3010 fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); 2826 3011 pWriter->nEmpty = 0; 3012 + }else{ 3013 + fts5WriteDlidxClear(p, pWriter, 0); 2827 3014 } 2828 3015 2829 - /* Whether or not it was written to disk, zero the doclist index at this 2830 - ** point */ 2831 - sqlite3Fts5BufferZero(&pWriter->cdlidx); 2832 - pWriter->bDlidxPrevValid = 0; 3016 + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); 3017 + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); 2833 3018 } 2834 3019 2835 3020 static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ 2836 3021 if( p->rc==SQLITE_OK ){ 2837 3022 Fts5PageWriter *aNew; 2838 3023 Fts5PageWriter *pNew; 2839 3024 int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); ................................................................................ 2896 3081 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); 2897 3082 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); 2898 3083 break; 2899 3084 } 2900 3085 } 2901 3086 } 2902 3087 3088 +/* 3089 +** This function is called when flushing a leaf page that contains no 3090 +** terms at all to disk. 3091 +*/ 2903 3092 static void fts5WriteBtreeNoTerm( 2904 3093 Fts5Index *p, /* FTS5 backend object */ 2905 3094 Fts5SegWriter *pWriter /* Writer object */ 2906 3095 ){ 2907 - if( pWriter->bFirstRowidInPage ){ 2908 - /* No rowids on this page. Append an 0x00 byte to the current 2909 - ** doclist-index */ 2910 - if( pWriter->bDlidxPrevValid==0 ){ 2911 - i64 iRowid = pWriter->iPrevRowid; 2912 - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); 2913 - pWriter->bDlidxPrevValid = 1; 2914 - pWriter->iDlidxPrev = iRowid; 2915 - } 2916 - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); 3096 + /* If there were no rowids on the leaf page either and the doclist-index 3097 + ** has already been started, append an 0x00 byte to it. */ 3098 + if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ 3099 + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; 3100 + assert( pDlidx->bPrevValid ); 3101 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); 2917 3102 } 3103 + 3104 + /* Increment the "number of sequential leaves without a term" counter. */ 2918 3105 pWriter->nEmpty++; 2919 3106 } 3107 + 3108 +static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ 3109 + i64 iRowid; 3110 + int iOff; 3111 + 3112 + iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid); 3113 + getVarint(&pBuf->p[iOff], (u64*)&iRowid); 3114 + return iRowid; 3115 +} 2920 3116 2921 3117 /* 2922 -** Rowid iRowid has just been appended to the current leaf page. As it is 2923 -** the first on its page, append an entry to the current doclist-index. 3118 +** Rowid iRowid has just been appended to the current leaf page. It is the 3119 +** first on the page. This function appends an appropriate entry to the current 3120 +** doclist-index. 2924 3121 */ 2925 3122 static void fts5WriteDlidxAppend( 2926 3123 Fts5Index *p, 2927 3124 Fts5SegWriter *pWriter, 2928 3125 i64 iRowid 2929 3126 ){ 2930 - i64 iVal; 2931 - if( pWriter->bDlidxPrevValid ){ 2932 - iVal = iRowid - pWriter->iDlidxPrev; 2933 - }else{ 2934 - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); 2935 - iVal = 1; 2936 - } 2937 - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); 2938 - pWriter->bDlidxPrevValid = 1; 2939 - pWriter->iDlidxPrev = iRowid; 3127 + int i; 3128 + int bDone = 0; 3129 + 3130 + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 3131 + i64 iVal; 3132 + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 3133 + 3134 + if( pDlidx->buf.n>=p->pConfig->pgsz ){ 3135 + /* The current doclist-index page is full. Write it to disk and push 3136 + ** a copy of iRowid (which will become the first rowid on the next 3137 + ** doclist-index leaf page) up into the next level of the b-tree 3138 + ** hierarchy. If the node being flushed is currently the root node, 3139 + ** also push its first rowid upwards. */ 3140 + pDlidx->buf.p[0] = 0x01; /* Not the root node */ 3141 + fts5DataWrite(p, 3142 + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 3143 + pDlidx->buf.p, pDlidx->buf.n 3144 + ); 3145 + fts5WriteDlidxGrow(p, pWriter, i+2); 3146 + pDlidx = &pWriter->aDlidx[i]; 3147 + if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ 3148 + i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); 3149 + 3150 + /* This was the root node. Push its first rowid up to the new root. */ 3151 + pDlidx[1].pgno = pDlidx->pgno; 3152 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); 3153 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); 3154 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); 3155 + pDlidx[1].bPrevValid = 1; 3156 + pDlidx[1].iPrev = iFirst; 3157 + } 3158 + 3159 + sqlite3Fts5BufferZero(&pDlidx->buf); 3160 + pDlidx->bPrevValid = 0; 3161 + pDlidx->pgno++; 3162 + }else{ 3163 + bDone = 1; 3164 + } 3165 + 3166 + if( pDlidx->bPrevValid ){ 3167 + iVal = iRowid - pDlidx->iPrev; 3168 + }else{ 3169 + i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); 3170 + assert( pDlidx->buf.n==0 ); 3171 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); 3172 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); 3173 + iVal = iRowid; 3174 + } 3175 + 3176 + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); 3177 + pDlidx->bPrevValid = 1; 3178 + pDlidx->iPrev = iRowid; 3179 + } 2940 3180 } 2941 3181 2942 3182 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ 2943 3183 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; 2944 3184 Fts5PageWriter *pPage = &pWriter->aWriter[0]; 2945 3185 i64 iRowid; 2946 3186 ................................................................................ 3029 3269 3030 3270 /* Update the Fts5PageWriter.term field. */ 3031 3271 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); 3032 3272 pWriter->bFirstTermInPage = 0; 3033 3273 3034 3274 pWriter->bFirstRowidInPage = 0; 3035 3275 pWriter->bFirstRowidInDoclist = 1; 3276 + 3277 + assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); 3278 + pWriter->aDlidx[0].pgno = pPage->pgno; 3036 3279 3037 3280 /* If the current leaf page is full, flush it to disk. */ 3038 3281 if( pPage->buf.n>=p->pConfig->pgsz ){ 3039 3282 fts5WriteFlushLeaf(p, pWriter); 3040 3283 } 3041 3284 } 3042 3285 ................................................................................ 3167 3410 } 3168 3411 for(i=0; i<pWriter->nWriter; i++){ 3169 3412 Fts5PageWriter *pPg = &pWriter->aWriter[i]; 3170 3413 fts5BufferFree(&pPg->term); 3171 3414 fts5BufferFree(&pPg->buf); 3172 3415 } 3173 3416 sqlite3_free(pWriter->aWriter); 3174 - sqlite3Fts5BufferFree(&pWriter->cdlidx); 3417 + 3418 + for(i=0; i<pWriter->nDlidx; i++){ 3419 + sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); 3420 + } 3421 + sqlite3_free(pWriter->aDlidx); 3175 3422 } 3176 3423 3177 3424 static void fts5WriteInit( 3178 3425 Fts5Index *p, 3179 3426 Fts5SegWriter *pWriter, 3180 3427 int iSegid 3181 3428 ){ 3182 3429 memset(pWriter, 0, sizeof(Fts5SegWriter)); 3183 3430 pWriter->iSegid = iSegid; 3184 3431 3185 - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); 3186 - if( pWriter->aWriter==0 ) return; 3432 + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); 3433 + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); 3434 + if( pWriter->aDlidx==0 ) return; 3187 3435 pWriter->nWriter = 1; 3436 + pWriter->nDlidx = 1; 3188 3437 pWriter->aWriter[0].pgno = 1; 3189 3438 pWriter->bFirstTermInPage = 1; 3190 3439 } 3191 3440 3192 3441 static void fts5WriteInitForAppend( 3193 3442 Fts5Index *p, /* FTS5 backend object */ 3194 3443 Fts5SegWriter *pWriter, /* Writer to initialize */ 3195 3444 Fts5StructureSegment *pSeg /* Segment object to append to */ 3196 3445 ){ 3197 3446 int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); 3198 3447 memset(pWriter, 0, sizeof(Fts5SegWriter)); 3199 3448 pWriter->iSegid = pSeg->iSegid; 3200 3449 pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); 3450 + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); 3201 3451 3202 3452 if( p->rc==SQLITE_OK ){ 3203 3453 int pgno = 1; 3204 3454 int i; 3455 + pWriter->nDlidx = 1; 3205 3456 pWriter->nWriter = pSeg->nHeight; 3206 3457 pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; 3207 3458 for(i=pSeg->nHeight-1; i>0; i--){ 3208 3459 i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); 3209 3460 Fts5PageWriter *pPg = &pWriter->aWriter[i]; 3210 3461 pPg->pgno = pgno; 3211 3462 fts5DataBuffer(p, &pPg->buf, iRowid); ................................................................................ 3579 3830 3580 3831 /* Pre-allocate the buffer used to assemble leaf pages to the target 3581 3832 ** page size. */ 3582 3833 assert( pgsz>0 ); 3583 3834 pBuf = &writer.aWriter[0].buf; 3584 3835 fts5BufferGrow(&p->rc, pBuf, pgsz + 20); 3585 3836 3586 - /* Begin scanning through hash table entries. */ 3837 + /* Begin scanning through hash table entries. This loop runs once for each 3838 + ** term/doclist currently stored within the hash table. */ 3587 3839 if( p->rc==SQLITE_OK ){ 3588 3840 memset(pBuf->p, 0, 4); 3589 3841 pBuf->n = 4; 3590 3842 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); 3591 3843 } 3592 - 3593 3844 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ 3594 - const char *zTerm; 3595 - int nTerm; 3596 - const u8 *pDoclist; 3597 - int nDoclist; 3845 + const char *zTerm; /* Buffer containing term */ 3846 + int nTerm; /* Size of zTerm in bytes */ 3847 + const u8 *pDoclist; /* Pointer to doclist for this term */ 3848 + int nDoclist; /* Size of doclist in bytes */ 3598 3849 int nSuffix; /* Size of term suffix */ 3599 3850 3600 3851 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); 3601 3852 nTerm = strlen(zTerm); 3602 3853 3603 3854 /* Decide if the term will fit on the current leaf. If it will not, 3604 3855 ** flush the leaf to disk here. */ ................................................................................ 3607 3858 pBuf = &writer.aWriter[0].buf; 3608 3859 if( (nTerm + 32) > pBuf->nSpace ){ 3609 3860 fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); 3610 3861 if( p->rc ) break; 3611 3862 } 3612 3863 } 3613 3864 3614 - /* Write the term to the leaf. And push it up into the b-tree hierarchy */ 3865 + /* Write the term to the leaf. And if it is the first on the leaf, and 3866 + ** the leaf is not page number 1, push it up into the b-tree hierarchy 3867 + ** as well. */ 3615 3868 if( writer.bFirstTermInPage==0 ){ 3616 3869 int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); 3617 3870 pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); 3618 3871 nSuffix = nTerm - nPre; 3619 3872 }else{ 3620 3873 fts5PutU16(&pBuf->p[2], pBuf->n); 3621 3874 writer.bFirstTermInPage = 0; ................................................................................ 3625 3878 pBuf = &writer.aWriter[0].buf; 3626 3879 assert( nPre<nTerm ); 3627 3880 } 3628 3881 nSuffix = nTerm; 3629 3882 } 3630 3883 pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); 3631 3884 fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); 3885 + 3886 + /* We just wrote a term into page writer.aWriter[0].pgno. If a 3887 + ** doclist-index is to be generated for this doclist, it will be 3888 + ** associated with this page. */ 3889 + assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); 3890 + writer.aDlidx[0].pgno = writer.aWriter[0].pgno; 3632 3891 3633 3892 if( pgsz>=(pBuf->n + nDoclist + 1) ){ 3634 3893 /* The entire doclist will fit on the current leaf. */ 3635 3894 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); 3636 3895 }else{ 3637 3896 i64 iRowid = 0; 3638 3897 i64 iDelta = 0; ................................................................................ 3821 4080 int bSz, 3822 4081 Fts5Buffer *pBuf 3823 4082 ){ 3824 4083 if( p->rc==SQLITE_OK ){ 3825 4084 Fts5ChunkIter iter; 3826 4085 Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; 3827 4086 assert( fts5MultiIterEof(p, pMulti)==0 ); 3828 - static int nCall = 0; 3829 - nCall++; 3830 4087 3831 4088 fts5ChunkIterInit(p, pSeg, &iter); 3832 4089 3833 4090 if( fts5ChunkIterEof(p, &iter)==0 ){ 3834 4091 if( bSz ){ 3835 4092 /* WRITEPOSLISTSIZE */ 3836 4093 fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); ................................................................................ 4412 4669 } 4413 4670 4414 4671 /* 4415 4672 ** Return the current term. 4416 4673 */ 4417 4674 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ 4418 4675 int n; 4419 - const char *z = fts5MultiIterTerm(pIter->pMulti, &n); 4676 + const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); 4420 4677 *pn = n-1; 4421 4678 return &z[1]; 4422 4679 } 4423 4680 4424 4681 4425 4682 /* 4426 4683 ** Return a pointer to a buffer containing a copy of the position list for ................................................................................ 4650 4907 #ifdef SQLITE_DEBUG 4651 4908 static void fts5DlidxIterTestReverse( 4652 4909 Fts5Index *p, 4653 4910 int iSegid, /* Segment id to load from */ 4654 4911 int iLeaf /* Load doclist-index for this leaf */ 4655 4912 ){ 4656 4913 Fts5DlidxIter *pDlidx = 0; 4657 - i64 cksum1 = 13; 4658 - i64 cksum2 = 13; 4914 + u64 cksum1 = 13; 4915 + u64 cksum2 = 13; 4659 4916 4660 4917 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); 4661 4918 fts5DlidxIterEof(p, pDlidx)==0; 4662 - fts5DlidxIterNext(pDlidx) 4919 + fts5DlidxIterNext(p, pDlidx) 4663 4920 ){ 4664 - assert( pDlidx->iLeafPgno>iLeaf ); 4665 - cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); 4666 - cksum1 = (cksum1 ^ pDlidx->iRowid); 4921 + i64 iRowid = fts5DlidxIterRowid(pDlidx); 4922 + int pgno = fts5DlidxIterPgno(pDlidx); 4923 + assert( pgno>iLeaf ); 4924 + cksum1 += iRowid + ((i64)pgno<<32); 4667 4925 } 4668 4926 fts5DlidxIterFree(pDlidx); 4669 4927 pDlidx = 0; 4670 4928 4671 4929 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); 4672 4930 fts5DlidxIterEof(p, pDlidx)==0; 4673 - fts5DlidxIterPrev(pDlidx) 4931 + fts5DlidxIterPrev(p, pDlidx) 4674 4932 ){ 4675 - assert( pDlidx->iLeafPgno>iLeaf ); 4676 - cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); 4677 - cksum2 = (cksum2 ^ pDlidx->iRowid); 4933 + i64 iRowid = fts5DlidxIterRowid(pDlidx); 4934 + int pgno = fts5DlidxIterPgno(pDlidx); 4935 + 4936 + assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); 4937 + cksum2 += iRowid + ((i64)pgno<<32); 4678 4938 } 4679 4939 fts5DlidxIterFree(pDlidx); 4680 4940 pDlidx = 0; 4681 4941 4682 - if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 4942 + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 4683 4943 } 4684 4944 #else 4685 4945 # define fts5DlidxIterTestReverse(x,y,z) 4686 4946 #endif 4687 4947 4688 4948 static void fts5IndexIntegrityCheckSegment( 4689 4949 Fts5Index *p, /* FTS5 backend object */ ................................................................................ 4744 5004 int iPrevLeaf = iter.iLeaf; 4745 5005 int iSegid = pSeg->iSegid; 4746 5006 int iPg; 4747 5007 i64 iKey; 4748 5008 4749 5009 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); 4750 5010 fts5DlidxIterEof(p, pDlidx)==0; 4751 - fts5DlidxIterNext(pDlidx) 5011 + fts5DlidxIterNext(p, pDlidx) 4752 5012 ){ 4753 5013 4754 5014 /* Check any rowid-less pages that occur before the current leaf. */ 4755 - for(iPg=iPrevLeaf+1; iPg<pDlidx->iLeafPgno; iPg++){ 5015 + for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ 4756 5016 iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); 4757 5017 pLeaf = fts5DataRead(p, iKey); 4758 5018 if( pLeaf ){ 4759 5019 if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; 4760 5020 fts5DataRelease(pLeaf); 4761 5021 } 4762 5022 } 4763 - iPrevLeaf = pDlidx->iLeafPgno; 5023 + iPrevLeaf = fts5DlidxIterPgno(pDlidx); 4764 5024 4765 5025 /* Check that the leaf page indicated by the iterator really does 4766 5026 ** contain the rowid suggested by the same. */ 4767 - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno); 5027 + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); 4768 5028 pLeaf = fts5DataRead(p, iKey); 4769 5029 if( pLeaf ){ 4770 5030 i64 iRowid; 4771 5031 int iRowidOff = fts5GetU16(&pLeaf->p[0]); 4772 5032 getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); 4773 - if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; 5033 + if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; 4774 5034 fts5DataRelease(pLeaf); 4775 5035 } 4776 - 4777 5036 } 4778 5037 4779 5038 for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ 4780 5039 iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); 4781 5040 pLeaf = fts5DataRead(p, iKey); 4782 5041 if( pLeaf ){ 4783 5042 if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; ................................................................................ 4990 5249 /* 4991 5250 ** Decode a segment-data rowid from the %_data table. This function is 4992 5251 ** the opposite of macro FTS5_SEGMENT_ROWID(). 4993 5252 */ 4994 5253 static void fts5DecodeRowid( 4995 5254 i64 iRowid, /* Rowid from %_data table */ 4996 5255 int *piSegid, /* OUT: Segment id */ 5256 + int *pbDlidx, /* OUT: Dlidx flag */ 4997 5257 int *piHeight, /* OUT: Height */ 4998 5258 int *piPgno /* OUT: Page number */ 4999 5259 ){ 5000 5260 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); 5001 5261 iRowid >>= FTS5_DATA_PAGE_B; 5002 5262 5003 5263 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); 5004 5264 iRowid >>= FTS5_DATA_HEIGHT_B; 5265 + 5266 + *pbDlidx = (int)(iRowid & 0x0001); 5267 + iRowid >>= FTS5_DATA_DLI_B; 5005 5268 5006 5269 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); 5007 5270 } 5008 5271 5009 5272 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ 5010 - int iSegid, iHeight, iPgno; /* Rowid compenents */ 5011 - fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno); 5273 + int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ 5274 + fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); 5012 5275 5013 5276 if( iSegid==0 ){ 5014 5277 if( iKey==FTS5_AVERAGES_ROWID ){ 5015 5278 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); 5016 5279 }else{ 5017 - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 5018 - "{structure idx=%d}", (int)(iKey-10) 5019 - ); 5280 + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); 5020 5281 } 5021 5282 } 5022 - else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ 5023 - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)", 5024 - iSegid, iPgno 5025 - ); 5026 - }else{ 5027 - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)", 5028 - iSegid, iHeight, iPgno 5283 + else{ 5284 + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", 5285 + bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno 5029 5286 ); 5030 5287 } 5031 5288 } 5032 5289 5033 5290 static void fts5DebugStructure( 5034 5291 int *pRc, /* IN/OUT: error code */ 5035 5292 Fts5Buffer *pBuf, ................................................................................ 5131 5388 */ 5132 5389 static void fts5DecodeFunction( 5133 5390 sqlite3_context *pCtx, /* Function call context */ 5134 5391 int nArg, /* Number of args (always 2) */ 5135 5392 sqlite3_value **apVal /* Function arguments */ 5136 5393 ){ 5137 5394 i64 iRowid; /* Rowid for record being decoded */ 5138 - int iSegid,iHeight,iPgno; /* Rowid components */ 5395 + int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ 5139 5396 const u8 *aBlob; int n; /* Record to decode */ 5140 5397 u8 *a = 0; 5141 5398 Fts5Buffer s; /* Build up text to return here */ 5142 5399 int rc = SQLITE_OK; /* Return code */ 5143 5400 int nSpace = 0; 5144 5401 5145 5402 assert( nArg==2 ); ................................................................................ 5148 5405 n = sqlite3_value_bytes(apVal[1]); 5149 5406 aBlob = sqlite3_value_blob(apVal[1]); 5150 5407 5151 5408 nSpace = n + FTS5_DATA_ZERO_PADDING; 5152 5409 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); 5153 5410 if( a==0 ) goto decode_out; 5154 5411 memcpy(a, aBlob, n); 5155 - fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno); 5412 + fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); 5156 5413 5157 5414 fts5DebugRowid(&rc, &s, iRowid); 5158 - if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ 5415 + if( bDlidx ){ 5159 5416 Fts5Data dlidx; 5160 - Fts5DlidxIter iter; 5417 + Fts5DlidxLvl lvl; 5161 5418 5162 5419 dlidx.p = a; 5163 5420 dlidx.n = n; 5164 5421 dlidx.nRef = 2; 5165 5422 5166 - memset(&iter, 0, sizeof(Fts5DlidxIter)); 5167 - iter.pData = &dlidx; 5168 - iter.iLeafPgno = iPgno; 5423 + memset(&lvl, 0, sizeof(Fts5DlidxLvl)); 5424 + lvl.pData = &dlidx; 5425 + lvl.iLeafPgno = iPgno; 5169 5426 5170 - for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){ 5427 + for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ 5171 5428 sqlite3Fts5BufferAppendPrintf(&rc, &s, 5172 - " %d(%lld)", iter.iLeafPgno, iter.iRowid 5429 + " %d(%lld)", lvl.iLeafPgno, lvl.iRowid 5173 5430 ); 5174 5431 } 5175 5432 }else if( iSegid==0 ){ 5176 5433 if( iRowid==FTS5_AVERAGES_ROWID ){ 5177 5434 /* todo */ 5178 5435 }else{ 5179 5436 fts5DecodeStructure(&rc, &s, a, n);
Changes to ext/fts5/fts5_tcl.c.
18 18 19 19 #ifdef SQLITE_ENABLE_FTS5 20 20 21 21 #include "fts5.h" 22 22 #include <string.h> 23 23 #include <assert.h> 24 24 25 -/* 26 -** This variable is set to true when running corruption tests. Otherwise 27 -** false. If it is false, extra assert() conditions in the fts5 code are 28 -** activated - conditions that are only true if it is guaranteed that the 29 -** fts5 database is not corrupt. 30 -*/ 31 -int sqlite3_fts5_may_be_corrupt = 0; 25 +extern int sqlite3_fts5_may_be_corrupt; 32 26 33 27 /************************************************************************* 34 28 ** This is a copy of the first part of the SqliteDb structure in 35 29 ** tclsqlite.c. We need it here so that the get_sqlite_pointer routine 36 30 ** can extract the sqlite3* pointer from an existing Tcl SQLite 37 31 ** connection. 38 32 */
Changes to ext/fts5/test/fts5aa.test.
45 45 CREATE VIRTUAL TABLE t1 USING fts5(x,y); 46 46 } 47 47 do_execsql_test 2.1 { 48 48 INSERT INTO t1 VALUES('a b c', 'd e f'); 49 49 } 50 50 do_test 2.2 { 51 51 execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } 52 -} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} 52 +} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} 53 53 do_execsql_test 2.3 { 54 54 INSERT INTO t1(t1) VALUES('integrity-check'); 55 55 } 56 56 57 57 #------------------------------------------------------------------------- 58 58 # 59 59 reset_db ................................................................................ 177 177 set y [doc] 178 178 set z [doc] 179 179 set rowid [expr int(rand() * 100)] 180 180 execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } 181 181 } 182 182 execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } 183 183 } {} 184 -# if {$i==1} break 185 184 } 186 185 #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} 187 186 #exit 188 187 189 188 #------------------------------------------------------------------------- 190 189 # 191 190 reset_db ................................................................................ 238 237 set rowid [expr int(rand() * 100)] 239 238 execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } 240 239 } 241 240 execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } 242 241 } {} 243 242 if {[set_test_counter errors]} break 244 243 } 244 + 245 245 246 246 #------------------------------------------------------------------------- 247 247 # 248 248 reset_db 249 249 do_execsql_test 10.0 { 250 250 CREATE VIRTUAL TABLE t1 USING fts5(x,y); 251 251 }
Changes to ext/fts5/test/fts5al.test.
22 22 finish_test 23 23 return 24 24 } 25 25 26 26 do_execsql_test 1.1 { 27 27 CREATE VIRTUAL TABLE ft1 USING fts5(x); 28 28 SELECT * FROM ft1_config; 29 -} {version 1} 29 +} {version 2} 30 30 31 31 do_execsql_test 1.2 { 32 32 INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); 33 33 SELECT * FROM ft1_config; 34 -} {pgsz 32 version 1} 34 +} {pgsz 32 version 2} 35 35 36 36 do_execsql_test 1.3 { 37 37 INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); 38 38 SELECT * FROM ft1_config; 39 -} {pgsz 64 version 1} 39 +} {pgsz 64 version 2} 40 40 41 41 #-------------------------------------------------------------------------- 42 42 # Test the logic for parsing the rank() function definition. 43 43 # 44 44 foreach {tn defn} { 45 45 1 "fname()" 46 46 2 "fname(1)"
Changes to ext/fts5/test/fts5corrupt2.test.
12 12 # This file tests that FTS5 handles corrupt databases (i.e. internal 13 13 # inconsistencies in the backing tables) correctly. In this case 14 14 # "correctly" means without crashing. 15 15 # 16 16 17 17 source [file join [file dirname [info script]] fts5_common.tcl] 18 18 set testprefix fts5corrupt2 19 +sqlite3_fts5_may_be_corrupt 1 19 20 20 21 # Create a simple FTS5 table containing 100 documents. Each document 21 22 # contains 10 terms, each of which start with the character "x". 22 23 # 23 24 expr srand(0) 24 25 db func rnddoc fts5_rnddoc 25 26 do_execsql_test 1.0 { 26 27 CREATE VIRTUAL TABLE t1 USING fts5(x); 27 28 INSERT INTO t1(t1, rank) VALUES('pgsz', 32); 28 29 WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) 29 30 INSERT INTO t1 SELECT rnddoc(10) FROM ii; 30 31 } 31 32 set mask [expr 31 << 31] 33 + 32 34 33 35 # Test 1: 34 36 # 35 37 # For each page in the t1_data table, open a transaction and DELETE 36 38 # the t1_data entry. Then run: 37 39 # 38 40 # * an integrity-check, and ................................................................................ 190 192 execsql ROLLBACK 191 193 } 192 194 193 195 do_test 4.$tn.x { expr $nCorrupt>0 } 1 194 196 } 195 197 196 198 199 +sqlite3_fts5_may_be_corrupt 0 197 200 finish_test 198 201
Changes to ext/fts5/test/fts5dlidx.test.
57 57 if {($i % $spc2)==0} { 58 58 lappend ydoc $rowid 59 59 append doc " y" 60 60 } 61 61 } 62 62 execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } 63 63 } 64 + breakpoint 64 65 execsql COMMIT 65 66 66 67 do_test $tn.1 { 67 68 execsql { INSERT INTO t1(t1) VALUES('integrity-check') } 68 69 } {} 69 70 70 71 do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc ................................................................................ 78 79 do_fb_test $tn.5.2 { 79 80 SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc 80 81 } 81 82 82 83 83 84 do_dlidx_test1 1.1 10 100 10000 0 1000 84 85 do_dlidx_test1 1.2 10 10 10000 0 128 85 -do_dlidx_test1 1.3 10 10 100 0 36028797018963970 86 -do_dlidx_test1 1.3 10 10 50 0 150000000000000000 86 +do_dlidx_test1 1.3 10 10 66 0 36028797018963970 87 +do_dlidx_test1 1.4 10 10 50 0 150000000000000000 87 88 88 89 89 90 90 91 finish_test 91 92
Changes to ext/fts5/test/fts5integrity.test.
26 26 do_execsql_test 2.0 { 27 27 CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1); 28 28 INSERT INTO yy VALUES('term'); 29 29 } 30 30 do_execsql_test 2.1 { 31 31 INSERT INTO yy(yy) VALUES('integrity-check'); 32 32 } 33 + 34 +#-------------------------------------------------------------------- 35 +# 36 +do_execsql_test 3.0 { 37 + CREATE VIRTUAL TABLE zz USING fts5(z); 38 + INSERT INTO zz(zz, rank) VALUES('pgsz', 32); 39 + INSERT INTO zz VALUES('b b b b b b b b b b b b b b'); 40 + INSERT INTO zz SELECT z FROM zz; 41 + INSERT INTO zz SELECT z FROM zz; 42 + INSERT INTO zz SELECT z FROM zz; 43 + INSERT INTO zz SELECT z FROM zz; 44 + INSERT INTO zz SELECT z FROM zz; 45 + INSERT INTO zz SELECT z FROM zz; 46 + INSERT INTO zz(zz) VALUES('optimize'); 47 +} 48 + 49 +do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } 50 + 51 + 52 +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} 53 +#exit 54 + 33 55 34 56 finish_test 35 57
Changes to ext/fts5/test/fts5rowid.test.
21 21 22 22 do_catchsql_test 1.2 { 23 23 SELECT fts5_rowid('segment') 24 24 } {1 {should be: fts5_rowid('segment', segid, height, pgno))}} 25 25 26 26 do_execsql_test 1.3 { 27 27 SELECT fts5_rowid('segment', 1, 1, 1) 28 -} {70866960385} 28 +} {139586437121} 29 29 30 30 do_catchsql_test 1.4 { 31 31 SELECT fts5_rowid('nosucharg'); 32 32 } {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 33 33 34 34 35 35 #-------------------------------------------------------------------------
Changes to ext/fts5/test/fts5version.test.
20 20 do_execsql_test 1.1 { 21 21 CREATE VIRTUAL TABLE t1 USING fts5(one); 22 22 INSERT INTO t1 VALUES('a b c d'); 23 23 } {} 24 24 25 25 do_execsql_test 1.2 { 26 26 SELECT * FROM t1_config WHERE k='version' 27 -} {version 1} 27 +} {version 2} 28 28 29 29 do_execsql_test 1.3 { 30 30 SELECT rowid FROM t1 WHERE t1 MATCH 'a'; 31 31 } {1} 32 32 33 33 do_execsql_test 1.4 { 34 - UPDATE t1_config set v=2 WHERE k='version'; 34 + UPDATE t1_config set v=3 WHERE k='version'; 35 35 } 36 36 37 37 do_test 1.5 { 38 38 db close 39 39 sqlite3 db test.db 40 40 catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } 41 -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} 41 +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} 42 42 43 43 breakpoint 44 44 do_test 1.6 { 45 45 db close 46 46 sqlite3 db test.db 47 47 catchsql { INSERT INTO t1 VALUES('x y z') } 48 -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} 48 +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} 49 49 50 50 do_test 1.7 { 51 51 execsql { DELETE FROM t1_config WHERE k='version' } 52 52 db close 53 53 sqlite3 db test.db 54 54 catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } 55 -} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}} 55 +} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}} 56 56 57 57 58 58 finish_test 59 59
Changes to ext/fts5/tool/loadfts5.tcl.
105 105 db func loadfile loadfile 106 106 107 107 db transaction { 108 108 set pref "" 109 109 if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } 110 110 catch { 111 111 db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" 112 + # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" 112 113 } 113 114 if {$O(automerge)>=0} { 114 115 if {$O(vtab) == "fts5"} { 115 116 db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } 116 117 } else { 117 118 db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } 118 119 }
Changes to test/permutations.test.
237 237 fts3varint.test 238 238 fts4growth.test fts4growth2.test 239 239 } 240 240 241 241 test_suite "fts5" -prefix "" -description { 242 242 All FTS5 tests. 243 243 } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] 244 + 245 +test_suite "fts5-light" -prefix "" -description { 246 + All FTS5 tests. 247 +} -files [ 248 + test_set \ 249 + [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ 250 + -exclude *corrupt* *fault* *big* *fts5aj* 251 +] 244 252 245 253 test_suite "nofaultsim" -prefix "" -description { 246 254 "Very" quick test suite. Runs in less than 5 minutes on a workstation. 247 255 This test suite is the same as the "quick" tests, except that some files 248 256 that test malloc and IO errors are omitted. 249 257 } -files [ 250 258 test_set $allquicktests -exclude *malloc* *ioerr* *fault*
Changes to tool/mksqlite3c.tcl.
373 373 fts5_expr.c 374 374 fts5_hash.c 375 375 fts5_index.c 376 376 fts5parse.c 377 377 fts5_storage.c 378 378 fts5_tokenize.c 379 379 fts5_unicode2.c 380 + fts5_vocab.c 380 381 381 382 rtree.c 382 383 icu.c 383 384 fts3_icu.c 384 385 } { 385 386 copy_file tsrc/$file 386 387 } 387 388 388 389 close $out