Index: ext/fts3/README.tokenizers ================================================================== --- ext/fts3/README.tokenizers +++ ext/fts3/README.tokenizers @@ -50,12 +50,14 @@ encoded as a blob. Or, if no such tokenizer exists, an SQL exception (error) is raised. SECURITY: If the fts3 extension is used in an environment where potentially malicious users may execute arbitrary SQL (i.e. gears), they should be - prevented from invoking the fts3_tokenizer() function, possibly using the - authorisation callback. + prevented from invoking the fts3_tokenizer() function. The + fts3_tokenizer() function is disabled by default. It is only enabled + by SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER. Do not enable it in + security sensitive environments. See "Sample code" below for an example of calling the fts3_tokenizer() function from C code. 3. ICU Library Tokenizers Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -318,10 +318,18 @@ int sqlite3Fts3Always(int b) { assert( b ); return b; } int sqlite3Fts3Never(int b) { assert( !b ); return b; } # endif #endif +/* +** This variable is set to false when running tests for which the on disk +** structures should not be corrupt. Otherwise, true. If it is false, extra +** assert() conditions in the fts3 code are activated - conditions that are +** only true if it is guaranteed that the fts3 database is not corrupt. +*/ +int sqlite3_fts3_may_be_corrupt = 1; + /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. ** The number of bytes written is returned. */ @@ -336,22 +344,17 @@ assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); return (int) (q - (unsigned char *)p); } #define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ - v = (v & mask1) | ( (*ptr++) << shift ); \ + v = (v & mask1) | ( (*(const unsigned char*)(ptr++)) << shift ); \ if( (v & mask2)==0 ){ var = v; return ret; } #define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ v = (*ptr++); \ if( (v & mask2)==0 ){ var = v; return ret; } -/* -** Read a 64-bit variable-length integer from memory starting at p[0]. -** Return the number of bytes read, or 0 on error. -** The value is stored in *v. -*/ -int sqlite3Fts3GetVarint(const char *pBuf, sqlite_int64 *v){ +int sqlite3Fts3GetVarintU(const char *pBuf, sqlite_uint64 *v){ const unsigned char *p = (const unsigned char*)pBuf; const unsigned char *pStart = p; u32 a; u64 b; int shift; @@ -368,30 +371,66 @@ if( (c & 0x80)==0 ) break; } *v = b; return (int)(p - pStart); } + +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read, or 0 on error. +** The value is stored in *v. +*/ +int sqlite3Fts3GetVarint(const char *pBuf, sqlite_int64 *v){ + return sqlite3Fts3GetVarintU(pBuf, (sqlite3_uint64*)v); +} + +/* +** Read a 64-bit variable-length integer from memory starting at p[0] and +** not extending past pEnd[-1]. +** Return the number of bytes read, or 0 on error. +** The value is stored in *v. +*/ +int sqlite3Fts3GetVarintBounded( + const char *pBuf, + const char *pEnd, + sqlite_int64 *v +){ + const unsigned char *p = (const unsigned char*)pBuf; + const unsigned char *pStart = p; + const unsigned char *pX = (const unsigned char*)pEnd; + u64 b = 0; + int shift; + for(shift=0; shift<=63; shift+=7){ + u64 c = p=0 ); return 5; } @@ -558,17 +597,22 @@ int rc = SQLITE_OK; /* Return code */ const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ sqlite3 *db = p->db; /* Database handle */ /* Drop the shadow tables */ - if( p->zContentTbl==0 ){ - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); - } - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); + fts3DbExec(&rc, db, + "DROP TABLE IF EXISTS %Q.'%q_segments';" + "DROP TABLE IF EXISTS %Q.'%q_segdir';" + "DROP TABLE IF EXISTS %Q.'%q_docsize';" + "DROP TABLE IF EXISTS %Q.'%q_stat';" + "%s DROP TABLE IF EXISTS %Q.'%q_content';", + zDb, p->zName, + zDb, p->zName, + zDb, p->zName, + zDb, p->zName, + (p->zContentTbl ? "--" : ""), zDb,p->zName + ); /* If everything has worked, invoke fts3DisconnectMethod() to free the ** memory associated with the Fts3Table structure and return SQLITE_OK. ** Otherwise, return an SQLite error code. */ @@ -796,14 +840,14 @@ ** The pointer returned points to memory obtained from sqlite3_malloc(). It ** is the callers responsibility to call sqlite3_free() to release this ** memory. */ static char *fts3QuoteId(char const *zInput){ - int nRet; + sqlite3_int64 nRet; char *zRet; nRet = 2 + (int)strlen(zInput)*2 + 1; - zRet = sqlite3_malloc(nRet); + zRet = sqlite3_malloc64(nRet); if( zRet ){ int i; char *z = zRet; *(z++) = '"'; for(i=0; zInput[i]; i++){ @@ -980,11 +1024,11 @@ for(p=zParam; *p; p++){ if( *p==',' ) nIndex++; } } - aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); + aIndex = sqlite3_malloc64(sizeof(struct Fts3Index) * nIndex); *apIndex = aIndex; if( !aIndex ){ return SQLITE_NOMEM; } @@ -1059,25 +1103,25 @@ } sqlite3_free(zSql); if( rc==SQLITE_OK ){ const char **azCol; /* Output array */ - int nStr = 0; /* Size of all column names (incl. 0x00) */ + sqlite3_int64 nStr = 0; /* Size of all column names (incl. 0x00) */ int nCol; /* Number of table columns */ int i; /* Used to iterate through columns */ /* Loop through the returned columns. Set nStr to the number of bytes of ** space required to store a copy of each column name, including the ** nul-terminator byte. */ nCol = sqlite3_column_count(pStmt); for(i=0; inNodeSize = p->nPgsz-35; + +#if defined(SQLITE_DEBUG)||defined(SQLITE_TEST) + p->nMergeCount = FTS3_MERGE_COUNT; +#endif /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: @@ -1552,10 +1600,14 @@ int iLangidCons = -1; /* Index of langid=x constraint, if present */ int iDocidGe = -1; /* Index of docid>=x constraint, if present */ int iDocidLe = -1; /* Index of docid<=x constraint, if present */ int iIdx; + + if( p->bLock ){ + return SQLITE_ERROR; + } /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient ** strategy is possible. */ @@ -1751,11 +1803,15 @@ pCsr->pStmt = p->pSeekStmt; p->pSeekStmt = 0; }else{ zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); if( !zSql ) return SQLITE_NOMEM; - rc = sqlite3_prepare_v3(p->db, zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0); + p->bLock++; + rc = sqlite3_prepare_v3( + p->db, zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0 + ); + p->bLock--; sqlite3_free(zSql); } if( rc==SQLITE_OK ) pCsr->bSeekStmt = 1; } return rc; @@ -1769,15 +1825,19 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ int rc = SQLITE_OK; if( pCsr->isRequireSeek ){ rc = fts3CursorSeekStmt(pCsr); if( rc==SQLITE_OK ){ + Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab; + pTab->bLock++; sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); pCsr->isRequireSeek = 0; if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ + pTab->bLock--; return SQLITE_OK; }else{ + pTab->bLock--; rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ /* If no row was found and no error has occurred, then the %_content ** table is missing a row that is present in the full-text index. ** The data structures are corrupt. */ @@ -1819,11 +1879,11 @@ ){ int rc = SQLITE_OK; /* Return code */ const char *zCsr = zNode; /* Cursor to iterate through node */ const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ char *zBuffer = 0; /* Buffer to load terms into */ - int nAlloc = 0; /* Size of allocated buffer */ + i64 nAlloc = 0; /* Size of allocated buffer */ int isFirstTerm = 1; /* True when processing first term on page */ sqlite3_int64 iChild; /* Block id of child node to descend to */ /* Skip over the 'height' varint that occurs at the start of every ** interior node. Then load the blockid of the left-child of the b-tree @@ -1857,18 +1917,18 @@ } isFirstTerm = 0; zCsr += fts3GetVarint32(zCsr, &nSuffix); assert( nPrefix>=0 && nSuffix>=0 ); - if( &zCsr[nSuffix]>zEnd ){ + if( nPrefix>zCsr-zNode || nSuffix>zEnd-zCsr || nSuffix==0 ){ rc = FTS_CORRUPT_VTAB; goto finish_scan; } - if( nPrefix+nSuffix>nAlloc ){ + if( (i64)nPrefix+nSuffix>nAlloc ){ char *zNew; - nAlloc = (nPrefix+nSuffix) * 2; - zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); + nAlloc = ((i64)nPrefix+nSuffix) * 2; + zNew = (char *)sqlite3_realloc64(zBuffer, nAlloc); if( !zNew ){ rc = SQLITE_NOMEM; goto finish_scan; } zBuffer = zNew; @@ -1945,11 +2005,11 @@ assert( piLeaf || piLeaf2 ); fts3GetVarint32(zNode, &iHeight); rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); - assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); + assert_fts3_nc( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); if( rc==SQLITE_OK && iHeight>1 ){ char *zBlob = 0; /* Blob read from %_segments table */ int nBlob = 0; /* Size of zBlob in bytes */ @@ -1965,11 +2025,17 @@ if( rc==SQLITE_OK ){ rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); } if( rc==SQLITE_OK ){ - rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); + int iNewHeight = 0; + fts3GetVarint32(zBlob, &iNewHeight); + if( iNewHeight>=iHeight ){ + rc = FTS_CORRUPT_VTAB; + }else{ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); + } } sqlite3_free(zBlob); } return rc; @@ -2070,14 +2136,15 @@ } *ppPoslist = pEnd; } /* -** Value used to signify the end of an position-list. This is safe because -** it is not possible to have a document with 2^31 terms. +** Value used to signify the end of an position-list. This must be +** as large or larger than any value that might appear on the +** position-list, even a position list that has been corrupted. */ -#define POSITION_LIST_END 0x7fffffff +#define POSITION_LIST_END LARGEST_INT64 /* ** This function is used to help parse position-lists. When this function is ** called, *pp may point to the start of the next varint in the position-list ** being parsed, or it may point to 1 byte past the end of the position-list @@ -2132,11 +2199,11 @@ ** into *pp contains all positions of both *pp1 and *pp2 in sorted ** order and with any duplicates removed. All pointers are ** updated appropriately. The caller is responsible for insuring ** that there is enough space in *pp to hold the complete output. */ -static void fts3PoslistMerge( +static int fts3PoslistMerge( char **pp, /* Output buffer */ char **pp1, /* Left input list */ char **pp2 /* Right input list */ ){ char *p = *pp; @@ -2145,16 +2212,22 @@ while( *p1 || *p2 ){ int iCol1; /* The current column index in pp1 */ int iCol2; /* The current column index in pp2 */ - if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); - else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; + if( *p1==POS_COLUMN ){ + fts3GetVarint32(&p1[1], &iCol1); + if( iCol1==0 ) return FTS_CORRUPT_VTAB; + } + else if( *p1==POS_END ) iCol1 = 0x7fffffff; else iCol1 = 0; - if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); - else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; + if( *p2==POS_COLUMN ){ + fts3GetVarint32(&p2[1], &iCol2); + if( iCol2==0 ) return FTS_CORRUPT_VTAB; + } + else if( *p2==POS_END ) iCol2 = 0x7fffffff; else iCol2 = 0; if( iCol1==iCol2 ){ sqlite3_int64 i1 = 0; /* Last position from pp1 */ sqlite3_int64 i2 = 0; /* Last position from pp2 */ @@ -2197,10 +2270,11 @@ *p++ = POS_END; *pp = p; *pp1 = p1 + 1; *pp2 = p2 + 1; + return SQLITE_OK; } /* ** This function is used to merge two position lists into one. When it is ** called, *pp1 and *pp2 must both point to position lists. A position-list is @@ -2261,14 +2335,13 @@ if( iCol1 ){ *p++ = POS_COLUMN; p += sqlite3Fts3PutVarint(p, iCol1); } - assert( *p1!=POS_END && *p1!=POS_COLUMN ); - assert( *p2!=POS_END && *p2!=POS_COLUMN ); fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; + if( iPos1<0 || iPos2<0 ) break; while( 1 ){ if( iPos2==iPos1+nToken || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) ){ @@ -2413,16 +2486,16 @@ sqlite3_int64 *pVal /* IN/OUT: Integer value */ ){ if( *pp>=pEnd ){ *pp = 0; }else{ - sqlite3_int64 iVal; - *pp += sqlite3Fts3GetVarint(*pp, &iVal); + u64 iVal; + *pp += sqlite3Fts3GetVarintU(*pp, &iVal); if( bDescIdx ){ - *pVal -= iVal; + *pVal = (i64)((u64)*pVal - iVal); }else{ - *pVal += iVal; + *pVal = (i64)((u64)*pVal + iVal); } } } /* @@ -2445,18 +2518,20 @@ int bDescIdx, /* True for descending docids */ sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ int *pbFirst, /* IN/OUT: True after first int written */ sqlite3_int64 iVal /* Write this value to the list */ ){ - sqlite3_int64 iWrite; + sqlite3_uint64 iWrite; if( bDescIdx==0 || *pbFirst==0 ){ - iWrite = iVal - *piPrev; + assert_fts3_nc( *pbFirst==0 || iVal>=*piPrev ); + iWrite = (u64)iVal - (u64)*piPrev; }else{ - iWrite = *piPrev - iVal; + assert_fts3_nc( *piPrev>=iVal ); + iWrite = (u64)*piPrev - (u64)iVal; } assert( *pbFirst || *piPrev==0 ); - assert( *pbFirst==0 || iWrite>0 ); + assert_fts3_nc( *pbFirst==0 || iWrite>0 ); *pp += sqlite3Fts3PutVarint(*pp, iWrite); *piPrev = iVal; *pbFirst = 1; } @@ -2468,11 +2543,12 @@ ** Otherwise, (i2-i1). ** ** Using this makes it easier to write code that can merge doclists that are ** sorted in either ascending or descending order. */ -#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) +/* #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i64)((u64)i1-i2)) */ +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1>i2?1:((i1==i2)?0:-1))) /* ** This function does an "OR" merge of two doclists (output contains all ** positions contained in either argument doclist). If the docids in the ** input doclists are sorted in ascending order, parameter bDescDoclist @@ -2490,10 +2566,11 @@ int bDescDoclist, /* True if arguments are desc */ char *a1, int n1, /* First doclist */ char *a2, int n2, /* Second doclist */ char **paOut, int *pnOut /* OUT: Malloc'd doclist */ ){ + int rc = SQLITE_OK; sqlite3_int64 i1 = 0; sqlite3_int64 i2 = 0; sqlite3_int64 iPrev = 0; char *pEnd1 = &a1[n1]; char *pEnd2 = &a2[n2]; @@ -2533,11 +2610,11 @@ ** docids to grow. ** ** A symetric argument may be made if the doclists are in descending ** order. */ - aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); + aOut = sqlite3_malloc64((i64)n1+n2+FTS3_VARINT_MAX-1+FTS3_BUFFER_PADDING); if( !aOut ) return SQLITE_NOMEM; p = aOut; fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); @@ -2544,11 +2621,12 @@ while( p1 || p2 ){ sqlite3_int64 iDiff = DOCID_CMP(i1, i2); if( p2 && p1 && iDiff==0 ){ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); - fts3PoslistMerge(&p, &p1, &p2); + rc = fts3PoslistMerge(&p, &p1, &p2); + if( rc ) break; fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); }else if( !p2 || (p1 && iDiff<0) ){ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); fts3PoslistCopy(&p, &p1); @@ -2556,16 +2634,24 @@ }else{ fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); fts3PoslistCopy(&p, &p2); fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } + + assert( (p-aOut)<=((p1?(p1-a1):n1)+(p2?(p2-a2):n2)+FTS3_VARINT_MAX-1) ); } + if( rc!=SQLITE_OK ){ + sqlite3_free(aOut); + p = aOut = 0; + }else{ + assert( (p-aOut)<=n1+n2+FTS3_VARINT_MAX-1 ); + memset(&aOut[(p-aOut)], 0, FTS3_BUFFER_PADDING); + } *paOut = aOut; *pnOut = (int)(p-aOut); - assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); - return SQLITE_OK; + return rc; } /* ** This function does a "phrase" merge of two doclists. In a phrase merge, ** the output contains a copy of each position from the right-hand input @@ -2596,11 +2682,11 @@ int bFirstOut = 0; char *aOut; assert( nDist>0 ); if( bDescDoclist ){ - aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX); + aOut = sqlite3_malloc64((sqlite3_int64)*pnRight + FTS3_VARINT_MAX); if( aOut==0 ) return SQLITE_NOMEM; }else{ aOut = aRight; } p = aOut; @@ -2780,10 +2866,11 @@ */ pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); pTS->anOutput[0] = nDoclist; if( pTS->aaOutput[0] ){ memcpy(pTS->aaOutput[0], aDoclist, nDoclist); + memset(&pTS->aaOutput[0][nDoclist], 0, FTS3_VARINT_MAX); }else{ return SQLITE_NOMEM; } }else{ char *aMerge = aDoclist; @@ -2831,12 +2918,12 @@ Fts3MultiSegReader *pCsr, Fts3SegReader *pNew ){ if( (pCsr->nSegment%16)==0 ){ Fts3SegReader **apNew; - int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); - apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); + sqlite3_int64 nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc64(pCsr->apSegment, nByte); if( !apNew ){ sqlite3Fts3SegReaderFree(pNew); return SQLITE_NOMEM; } pCsr->apSegment = apNew; @@ -2871,11 +2958,11 @@ ** for the pending-terms. If this is a scan, then this call must be being ** made by an fts4aux module, not an FTS table. In this case calling ** Fts3SegReaderPending might segfault, as the data structures used by ** fts4aux are not completely populated. So it's easiest to filter these ** calls out here. */ - if( iLevel<0 && p->aIndex ){ + if( iLevel<0 && p->aIndex && p->iPrevLangid==iLangid ){ Fts3SegReader *pSeg = 0; rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg); if( rc==SQLITE_OK && pSeg ){ rc = fts3SegReaderCursorAppend(pCsr, pSeg); } @@ -2896,11 +2983,11 @@ int nRoot = sqlite3_column_bytes(pStmt, 4); char const *zRoot = sqlite3_column_blob(pStmt, 4); /* If zTerm is not NULL, and this segment is not stored entirely on its ** root node, the range of leaves scanned can be reduced. Do this. */ - if( iStartBlock && zTerm ){ + if( iStartBlock && zTerm && zRoot ){ sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); if( rc!=SQLITE_OK ) goto finished; if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; } @@ -3134,36 +3221,27 @@ */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ int rc; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ + Fts3Table *pTab = (Fts3Table*)pCursor->pVtab; + pTab->bLock++; if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ pCsr->isEof = 1; rc = sqlite3_reset(pCsr->pStmt); }else{ pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); rc = SQLITE_OK; } + pTab->bLock--; }else{ rc = fts3EvalNext((Fts3Cursor *)pCursor); } assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } -/* -** The following are copied from sqliteInt.h. -** -** Constants for the largest and smallest possible 64-bit signed integers. -** These macros are designed to work correctly on both 32-bit and 64-bit -** compilers. -*/ -#ifndef SQLITE_AMALGAMATION -# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) -# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) -#endif - /* ** If the numeric type of argument pVal is "integer", then return it ** converted to a 64-bit signed integer. Otherwise, return a copy of ** the second parameter, iDefault. */ @@ -3212,10 +3290,14 @@ sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ int iIdx; UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); + + if( p->bLock ){ + return SQLITE_ERROR; + } eSearch = (idxNum & 0x0000FFFF); assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( p->pSegments==0 ); @@ -3284,11 +3366,15 @@ zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") ); } if( zSql ){ - rc = sqlite3_prepare_v3(p->db,zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0); + p->bLock++; + rc = sqlite3_prepare_v3( + p->db,zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0 + ); + p->bLock--; sqlite3_free(zSql); }else{ rc = SQLITE_NOMEM; } }else if( eSearch==FTS3_DOCID_SEARCH ){ @@ -3806,11 +3892,11 @@ */ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ int rc = SQLITE_OK; UNUSED_PARAMETER(iSavepoint); assert( ((Fts3Table *)pVtab)->inTransaction ); - assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); + assert( ((Fts3Table *)pVtab)->mxSavepoint <= iSavepoint ); TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ rc = fts3SyncMethod(pVtab); } return rc; @@ -3838,18 +3924,17 @@ */ static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts3Table *p = (Fts3Table*)pVtab; UNUSED_PARAMETER(iSavepoint); assert( p->inTransaction ); - assert( p->mxSavepoint >= iSavepoint ); TESTONLY( p->mxSavepoint = iSavepoint ); sqlite3Fts3PendingTermsClear(p); return SQLITE_OK; } static const sqlite3_module fts3Module = { - /* iVersion */ 2, + /* iVersion */ 3, /* xCreate */ fts3CreateMethod, /* xConnect */ fts3ConnectMethod, /* xBestIndex */ fts3BestIndexMethod, /* xDisconnect */ fts3DisconnectMethod, /* xDestroy */ fts3DestroyMethod, @@ -3961,11 +4046,11 @@ } } #ifdef SQLITE_TEST if( rc==SQLITE_OK ){ - rc = sqlite3Fts3ExprInitTestInterface(db); + rc = sqlite3Fts3ExprInitTestInterface(db, pHash); } #endif /* Create the virtual table wrapper around the hash-table and overload ** the four scalar functions. If this is successful, register the @@ -4148,10 +4233,11 @@ } return rc; } +#ifndef SQLITE_DISABLE_FTS4_DEFERRED /* ** This function is called on each phrase after the position lists for ** any deferred tokens have been loaded into memory. It updates the phrases ** current position list to include only those positions that are really ** instances of the phrase (after considering deferred tokens). If this @@ -4251,10 +4337,11 @@ } } return SQLITE_OK; } +#endif /* SQLITE_DISABLE_FTS4_DEFERRED */ /* ** Maximum number of tokens a phrase may have to be considered for the ** incremental doclists strategy. */ @@ -4284,11 +4371,11 @@ ** tokens or prefix tokens that cannot use a prefix-index. */ int bHaveIncr = 0; int bIncrOk = (bOptOk && pCsr->bDesc==pTab->bDescIdx && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 -#ifdef SQLITE_TEST +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) && pTab->bNoIncrDoclist==0 #endif ); for(i=0; bIncrOk==1 && inToken; i++){ Fts3PhraseToken *pToken = &p->aToken[i]; @@ -4426,19 +4513,20 @@ Fts3Table *pTab, Fts3Doclist *pDL, u8 *pbEof ){ char *pIter; /* Used to iterate through aAll */ - char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + char *pEnd; /* 1 byte past end of aAll */ if( pDL->pNextDocid ){ pIter = pDL->pNextDocid; + assert( pDL->aAll!=0 || pIter==0 ); }else{ pIter = pDL->aAll; } - if( pIter>=pEnd ){ + if( pIter==0 || pIter>=(pEnd = pDL->aAll + pDL->nAll) ){ /* We have already reached the end of this doclist. EOF. */ *pbEof = 1; }else{ sqlite3_int64 iDelta; pIter += sqlite3Fts3GetVarint(pIter, &iDelta); @@ -4595,13 +4683,14 @@ /* Check if the current entries really are a phrase match */ if( bEof==0 ){ int nList = 0; int nByte = a[p->nToken-1].nList; - char *aDoclist = sqlite3_malloc(nByte+1); + char *aDoclist = sqlite3_malloc(nByte+FTS3_BUFFER_PADDING); if( !aDoclist ) return SQLITE_NOMEM; memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + memset(&aDoclist[nByte], 0, FTS3_BUFFER_PADDING); for(i=0; i<(p->nToken-1); i++){ if( a[i].bIgnore==0 ){ char *pL = a[i].pList; char *pR = aDoclist; @@ -4805,16 +4894,17 @@ const char *a; rc = sqlite3Fts3SelectDoctotal(p, &pStmt); if( rc!=SQLITE_OK ) return rc; a = sqlite3_column_blob(pStmt, 0); - assert( a ); - - pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; - a += sqlite3Fts3GetVarint(a, &nDoc); - while( a1 && pTab->bFts4 ){ Fts3TokenAndCost *aTC; Fts3Expr **apOr; - aTC = (Fts3TokenAndCost *)sqlite3_malloc( + aTC = (Fts3TokenAndCost *)sqlite3_malloc64( sizeof(Fts3TokenAndCost) * nToken + sizeof(Fts3Expr *) * nOr * 2 ); apOr = (Fts3Expr **)&aTC[nToken]; @@ -5299,20 +5389,20 @@ if( *pRc==SQLITE_OK && pExpr->eType==FTSQUERY_NEAR && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) ){ Fts3Expr *p; - int nTmp = 0; /* Bytes of temp space */ + sqlite3_int64 nTmp = 0; /* Bytes of temp space */ char *aTmp; /* Temp space for PoslistNearMerge() */ /* Allocate temporary working space. */ for(p=pExpr; p->pLeft; p=p->pLeft){ assert( p->pRight->pPhrase->doclist.nList>0 ); nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc(nTmp*2); + aTmp = sqlite3_malloc64(nTmp*2); if( !aTmp ){ *pRc = SQLITE_NOMEM; res = 0; }else{ char *aPoslist = p->pPhrase->doclist.pList; @@ -5578,19 +5668,18 @@ ** by pExpr, calling this function for each row. This function increments ** the values in Fts3Expr.aMI[] according to the position-list currently ** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase ** expression nodes. */ -static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){ if( pExpr ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase && pPhrase->doclist.pList ){ int iCol = 0; char *p = pPhrase->doclist.pList; - assert( *p ); - while( 1 ){ + do{ u8 c = 0; int iCnt = 0; while( 0xFE & (*p | c) ){ if( (c&0x80)==0 ) iCnt++; c = *p++ & 0x80; @@ -5602,15 +5691,15 @@ pExpr->aMI[iCol*3 + 1] += iCnt; pExpr->aMI[iCol*3 + 2] += (iCnt>0); if( *p==0x00 ) break; p++; p += fts3GetVarint32(p, &iCol); - } + }while( iColpLeft); - fts3EvalUpdateCounts(pExpr->pRight); + fts3EvalUpdateCounts(pExpr->pLeft, nCol); + fts3EvalUpdateCounts(pExpr->pRight, nCol); } } /* ** Expression pExpr must be of type FTSQUERY_PHRASE. @@ -5650,11 +5739,11 @@ /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ for(p=pRoot; p; p=p->pLeft){ Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); assert( pE->aMI==0 ); - pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); + pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32)); if( !pE->aMI ) return SQLITE_NOMEM; memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); } fts3EvalRestart(pCsr, pRoot, &rc); @@ -5676,11 +5765,11 @@ && pRoot->eType==FTSQUERY_NEAR && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); if( rc==SQLITE_OK && pCsr->isEof==0 ){ - fts3EvalUpdateCounts(pRoot); + fts3EvalUpdateCounts(pRoot, pTab->nColumn); } } pCsr->isEof = 0; pCsr->iPrevId = iPrevId; Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -93,10 +93,12 @@ ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 +#define FTS3_BUFFER_PADDING 8 + /* ** FTS4 virtual tables may maintain multiple indexes - one index of all terms ** in the document set and zero or more prefix indexes. All indexes are stored ** as one or more b+-trees in the %_segments and %_segdir tables. ** @@ -125,10 +127,22 @@ ** Terminator values for position-lists and column-lists. */ #define POS_COLUMN (1) /* Column-list terminator */ #define POS_END (0) /* Position-list terminator */ +/* +** The assert_fts3_nc() macro is similar to the assert() macro, except that it +** is used for assert() conditions that are true only if it can be +** guranteed that the database is not corrupt. +*/ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) +extern int sqlite3_fts3_may_be_corrupt; +# define assert_fts3_nc(x) assert(sqlite3_fts3_may_be_corrupt || (x)) +#else +# define assert_fts3_nc(x) assert(x) +#endif + /* ** This section provides definitions to allow the ** FTS3 extension to be compiled outside of the ** amalgamation. */ @@ -180,10 +194,13 @@ # define TESTONLY(X) X #else # define TESTONLY(X) #endif +#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) +#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) + #endif /* SQLITE_AMALGAMATION */ #ifdef SQLITE_DEBUG int sqlite3Fts3Corrupt(void); # define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() @@ -223,10 +240,11 @@ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ char *zContentTbl; /* content=xxx option, or NULL */ char *zLanguageid; /* languageid=xxx option, or NULL */ int nAutoincrmerge; /* Value configured by 'automerge' */ u32 nLeafAdd; /* Number of leaf blocks added this trans */ + int bLock; /* Used to prevent recursive content= tbls */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ sqlite3_stmt *aStmt[40]; @@ -281,16 +299,26 @@ */ int inTransaction; /* True after xBegin but before xCommit/xRollback */ int mxSavepoint; /* Largest valid xSavepoint integer */ #endif -#ifdef SQLITE_TEST +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) /* True to disable the incremental doclist optimization. This is controled ** by special insert command 'test-no-incr-doclist'. */ int bNoIncrDoclist; + + /* Number of segments in a level */ + int nMergeCount; #endif }; + +/* Macro to find the number of segments to merge */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) +# define MergeCount(P) ((P)->nMergeCount) +#else +# define MergeCount(P) FTS3_MERGE_COUNT +#endif /* ** When the core wants to read from the virtual table, it creates a ** virtual table cursor (an instance of the following structure) using ** the xOpen method. Cursors are destroyed using the xClose method. @@ -551,10 +579,12 @@ /* fts3.c */ void sqlite3Fts3ErrMsg(char**,const char*,...); int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); +int sqlite3Fts3GetVarintU(const char *, sqlite_uint64 *); +int sqlite3Fts3GetVarintBounded(const char*,const char*,sqlite3_int64*); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); @@ -582,11 +612,11 @@ int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, char **, int, int, int, const char *, int, Fts3Expr **, char ** ); void sqlite3Fts3ExprFree(Fts3Expr *); #ifdef SQLITE_TEST -int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); +int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash*); int sqlite3Fts3InitTerm(sqlite3 *db); #endif int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, sqlite3_tokenizer_cursor ** Index: ext/fts3/fts3_aux.c ================================================================== --- ext/fts3/fts3_aux.c +++ ext/fts3/fts3_aux.c @@ -64,11 +64,11 @@ ){ char const *zDb; /* Name of database (e.g. "main") */ char const *zFts3; /* Name of fts3 table */ int nDb; /* Result of strlen(zDb) */ int nFts3; /* Result of strlen(zFts3) */ - int nByte; /* Bytes of space to allocate here */ + sqlite3_int64 nByte; /* Bytes of space to allocate here */ int rc; /* value returned by declare_vtab() */ Fts3auxTable *p; /* Virtual table object to return */ UNUSED_PARAMETER(pUnused); @@ -96,11 +96,11 @@ rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); if( rc!=SQLITE_OK ) return rc; nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; - p = (Fts3auxTable *)sqlite3_malloc(nByte); + p = (Fts3auxTable *)sqlite3_malloc64(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, nByte); p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; @@ -246,11 +246,11 @@ } static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ if( nSize>pCsr->nStat ){ struct Fts3auxColstats *aNew; - aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, + aNew = (struct Fts3auxColstats *)sqlite3_realloc64(pCsr->aStat, sizeof(struct Fts3auxColstats) * nSize ); if( aNew==0 ) return SQLITE_NOMEM; memset(&aNew[pCsr->nStat], 0, sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) @@ -414,19 +414,19 @@ if( iEq>=0 || iGe>=0 ){ const unsigned char *zStr = sqlite3_value_text(apVal[0]); assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); if( zStr ){ pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); - pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; + pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm); } } if( iLe>=0 ){ pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); - pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); if( pCsr->zStop==0 ) return SQLITE_NOMEM; + pCsr->nStop = (int)strlen(pCsr->zStop); } if( iLangid>=0 ){ iLangVal = sqlite3_value_int(apVal[iLangid]); @@ -537,14 +537,14 @@ 0, /* xRollback */ 0, /* xFindFunction */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ - 0 /* xRollbackTo */ + 0, /* xRollbackTo */ }; int rc; /* Return code */ rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); return rc; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ Index: ext/fts3/fts3_expr.c ================================================================== --- ext/fts3/fts3_expr.c +++ ext/fts3/fts3_expr.c @@ -120,12 +120,12 @@ /* ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, ** zero the memory before returning a pointer to it. If unsuccessful, ** return NULL. */ -static void *fts3MallocZero(int nByte){ - void *pRet = sqlite3_malloc(nByte); +static void *fts3MallocZero(sqlite3_int64 nByte){ + void *pRet = sqlite3_malloc64(nByte); if( pRet ) memset(pRet, 0, nByte); return pRet; } int sqlite3Fts3OpenTokenizer( @@ -196,11 +196,11 @@ *pnConsumed = i; rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; - int nByte; /* total space to allocate */ + sqlite3_int64 nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); @@ -250,12 +250,12 @@ /* ** Enlarge a memory allocation. If an out-of-memory allocation occurs, ** then free the old allocation. */ -static void *fts3ReallocOrFree(void *pOrig, int nNew){ - void *pRet = sqlite3_realloc(pOrig, nNew); +static void *fts3ReallocOrFree(void *pOrig, sqlite3_int64 nNew){ + void *pRet = sqlite3_realloc64(pOrig, nNew); if( !pRet ){ sqlite3_free(pOrig); } return pRet; } @@ -495,11 +495,10 @@ if( sqlite3_fts3_enable_parentheses ){ if( *zInput=='(' ){ int nConsumed = 0; pParse->nNest++; rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); - if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } *pnConsumed = (int)(zInput - z) + 1 + nConsumed; return rc; }else if( *zInput==')' ){ pParse->nNest--; *pnConsumed = (int)((zInput - z) + 1); @@ -794,11 +793,11 @@ } if( rc==SQLITE_OK ){ if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ Fts3Expr **apLeaf; - apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + apLeaf = (Fts3Expr **)sqlite3_malloc64(sizeof(Fts3Expr *) * nMaxDepth); if( 0==apLeaf ){ rc = SQLITE_NOMEM; }else{ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); } @@ -1106,38 +1105,10 @@ #ifdef SQLITE_TEST #include -/* -** Function to query the hash-table of tokenizers (see README.tokenizers). -*/ -static int queryTestTokenizer( - sqlite3 *db, - const char *zName, - const sqlite3_tokenizer_module **pp -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } - } - - return sqlite3_finalize(pStmt); -} - /* ** Return a pointer to a buffer containing a text representation of the ** expression passed as the first argument. The buffer is obtained from ** sqlite3_malloc(). It is the responsibility of the caller to use ** sqlite3_free() to release the memory. If an OOM condition is encountered, @@ -1201,65 +1172,61 @@ ** of a column of the fts3 table that the query expression may refer to. ** For example: ** ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); */ -static void fts3ExprTest( +static void fts3ExprTestCommon( + int bRebalance, sqlite3_context *context, int argc, sqlite3_value **argv ){ - sqlite3_tokenizer_module const *pModule = 0; sqlite3_tokenizer *pTokenizer = 0; int rc; char **azCol = 0; const char *zExpr; int nExpr; int nCol; int ii; Fts3Expr *pExpr; char *zBuf = 0; - sqlite3 *db = sqlite3_context_db_handle(context); + Fts3Hash *pHash = (Fts3Hash*)sqlite3_user_data(context); + const char *zTokenizer = 0; + char *zErr = 0; if( argc<3 ){ sqlite3_result_error(context, "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 ); return; } - rc = queryTestTokenizer(db, - (const char *)sqlite3_value_text(argv[0]), &pModule); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - }else if( !pModule ){ - sqlite3_result_error(context, "No such tokenizer module", -1); - goto exprtest_out; - } - - rc = pModule->xCreate(0, 0, &pTokenizer); - assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - } - pTokenizer->pModule = pModule; + zTokenizer = (const char*)sqlite3_value_text(argv[0]); + rc = sqlite3Fts3InitTokenizer(pHash, zTokenizer, &pTokenizer, &zErr); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_error(context, zErr, -1); + } + sqlite3_free(zErr); + return; + } zExpr = (const char *)sqlite3_value_text(argv[1]); nExpr = sqlite3_value_bytes(argv[1]); nCol = argc-2; - azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); + azCol = (char **)sqlite3_malloc64(nCol*sizeof(char *)); if( !azCol ){ sqlite3_result_error_nomem(context); goto exprtest_out; } for(ii=0; iixDestroy(pTokenizer); + if( pTokenizer ){ + rc = pTokenizer->pModule->xDestroy(pTokenizer); } sqlite3_free(azCol); } + +static void fts3ExprTest( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts3ExprTestCommon(0, context, argc, argv); +} +static void fts3ExprTestRebalance( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts3ExprTestCommon(1, context, argc, argv); +} /* ** Register the query expression parser test function fts3_exprtest() ** with database connection db. */ -int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ +int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash *pHash){ int rc = sqlite3_create_function( - db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 + db, "fts3_exprtest", -1, SQLITE_UTF8, (void*)pHash, fts3ExprTest, 0, 0 ); if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", - -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + -1, SQLITE_UTF8, (void*)pHash, fts3ExprTestRebalance, 0, 0 ); } return rc; } #endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ Index: ext/fts3/fts3_hash.c ================================================================== --- ext/fts3/fts3_hash.c +++ ext/fts3/fts3_hash.c @@ -33,12 +33,12 @@ #include "fts3_hash.h" /* ** Malloc and Free functions */ -static void *fts3HashMalloc(int n){ - void *p = sqlite3_malloc(n); +static void *fts3HashMalloc(sqlite3_int64 n){ + void *p = sqlite3_malloc64(n); if( p ){ memset(p, 0, n); } return p; } Index: ext/fts3/fts3_icu.c ================================================================== --- ext/fts3/fts3_icu.c +++ ext/fts3/fts3_icu.c @@ -58,11 +58,11 @@ int n = 0; if( argc>0 ){ n = strlen(argv[0])+1; } - p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); + p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n); if( !p ){ return SQLITE_NOMEM; } memset(p, 0, sizeof(IcuTokenizer)); @@ -115,11 +115,11 @@ zInput = ""; }else if( nInput<0 ){ nInput = strlen(zInput); } nChar = nInput+1; - pCsr = (IcuCursor *)sqlite3_malloc( + pCsr = (IcuCursor *)sqlite3_malloc64( sizeof(IcuCursor) + /* IcuCursor */ ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ ); if( !pCsr ){ Index: ext/fts3/fts3_snippet.c ================================================================== --- ext/fts3/fts3_snippet.c +++ ext/fts3/fts3_snippet.c @@ -126,21 +126,23 @@ */ /* ** Allocate a two-slot MatchinfoBuffer object. */ -static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ +static MatchinfoBuffer *fts3MIBufferNew(size_t nElem, const char *zMatchinfo){ MatchinfoBuffer *pRet; - int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); - int nStr = (int)strlen(zMatchinfo); + sqlite3_int64 nByte = sizeof(u32) * (2*(sqlite3_int64)nElem + 1) + + sizeof(MatchinfoBuffer); + sqlite3_int64 nStr = strlen(zMatchinfo); - pRet = sqlite3_malloc(nByte + nStr+1); + pRet = sqlite3_malloc64(nByte + nStr+1); if( pRet ){ memset(pRet, 0, nByte); pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; - pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); - pRet->nElem = nElem; + pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + + sizeof(u32)*((int)nElem+1); + pRet->nElem = (int)nElem; pRet->zMatchinfo = ((char*)pRet) + nByte; memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); pRet->aRef[0] = 1; } @@ -176,11 +178,11 @@ else if( p->aRef[2]==0 ){ p->aRef[2] = 1; aOut = &p->aMatchinfo[p->nElem+2]; xRet = fts3MIBufferFree; }else{ - aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); + aOut = (u32*)sqlite3_malloc64(p->nElem * sizeof(u32)); if( aOut ){ xRet = sqlite3_free; if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); } } @@ -427,15 +429,16 @@ SnippetPhrase *pPhrase = &pIter->aPhrase[i]; if( pPhrase->pTail ){ char *pCsr = pPhrase->pTail; int iCsr = pPhrase->iTail; - while( iCsr<(iStart+pIter->nSnippet) ){ + while( iCsr<(iStart+pIter->nSnippet) && iCsr>=iStart ){ int j; - u64 mPhrase = (u64)1 << i; + u64 mPhrase = (u64)1 << (i%64); u64 mPos = (u64)1 << (iCsr - iStart); - assert( iCsr>=iStart ); + assert( iCsr>=iStart && (iCsr - iStart)<=64 ); + assert( i>=0 ); if( (mCover|mCovered)&mPhrase ){ iScore++; }else{ iScore += 1000; } @@ -473,15 +476,18 @@ assert( rc==SQLITE_OK || pCsr==0 ); if( pCsr ){ int iFirst = 0; pPhrase->pList = pCsr; fts3GetDeltaPosition(&pCsr, &iFirst); - assert( iFirst>=0 ); - pPhrase->pHead = pCsr; - pPhrase->pTail = pCsr; - pPhrase->iHead = iFirst; - pPhrase->iTail = iFirst; + if( iFirst<0 ){ + rc = FTS_CORRUPT_VTAB; + }else{ + pPhrase->pHead = pCsr; + pPhrase->pTail = pCsr; + pPhrase->iHead = iFirst; + pPhrase->iTail = iFirst; + } }else{ assert( rc!=SQLITE_OK || ( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 )); } @@ -514,11 +520,11 @@ int *piScore /* OUT: Score of snippet pFragment */ ){ int rc; /* Return Code */ int nList; /* Number of phrases in expression */ SnippetIter sIter; /* Iterates through snippet candidates */ - int nByte; /* Number of bytes of space to allocate */ + sqlite3_int64 nByte; /* Number of bytes of space to allocate */ int iBestScore = -1; /* Best snippet score found so far */ int i; /* Loop counter */ memset(&sIter, 0, sizeof(sIter)); @@ -532,11 +538,11 @@ /* Now that it is known how many phrases there are, allocate and zero ** the required space using malloc(). */ nByte = sizeof(SnippetPhrase) * nList; - sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); + sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc64(nByte); if( !sIter.aPhrase ){ return SQLITE_NOMEM; } memset(sIter.aPhrase, 0, nByte); @@ -552,11 +558,11 @@ if( rc==SQLITE_OK ){ /* Set the *pmSeen output variable. */ for(i=0; in+nAppend+1>=pStr->nAlloc ){ - int nAlloc = pStr->nAlloc+nAppend+100; - char *zNew = sqlite3_realloc(pStr->z, nAlloc); + sqlite3_int64 nAlloc = pStr->nAlloc+(sqlite3_int64)nAppend+100; + char *zNew = sqlite3_realloc64(pStr->z, nAlloc); if( !zNew ){ return SQLITE_NOMEM; } pStr->z = zNew; pStr->nAlloc = nAlloc; @@ -658,10 +664,11 @@ int nRight; /* Tokens to the right of last highlight */ int nDesired; /* Ideal number of tokens to shift forward */ for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + assert( (nSnippet-1-nRight)<=63 && (nSnippet-1-nRight)>=0 ); nDesired = (nLeft-nRight)/2; /* Ideally, the start of the snippet should be pushed forward in the ** document nDesired tokens. This block checks if there are actually ** nDesired tokens to the right of the snippet. If so, *piPos and @@ -850,11 +857,11 @@ } /* ** This function gathers 'y' or 'b' data for a single phrase. */ -static void fts3ExprLHits( +static int fts3ExprLHits( Fts3Expr *pExpr, /* Phrase expression node */ MatchInfo *p /* Matchinfo context */ ){ Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; int iStart; @@ -880,29 +887,33 @@ } assert( *pIter==0x00 || *pIter==0x01 ); if( *pIter!=0x01 ) break; pIter++; pIter += fts3GetVarint32(pIter, &iCol); + if( iCol>=p->nCol ) return FTS_CORRUPT_VTAB; } + return SQLITE_OK; } /* ** Gather the results for matchinfo directives 'y' and 'b'. */ -static void fts3ExprLHitGather( +static int fts3ExprLHitGather( Fts3Expr *pExpr, MatchInfo *p ){ + int rc = SQLITE_OK; assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ if( pExpr->pLeft ){ - fts3ExprLHitGather(pExpr->pLeft, p); - fts3ExprLHitGather(pExpr->pRight, p); + rc = fts3ExprLHitGather(pExpr->pLeft, p); + if( rc==SQLITE_OK ) rc = fts3ExprLHitGather(pExpr->pRight, p); }else{ - fts3ExprLHits(pExpr, p); + rc = fts3ExprLHits(pExpr, p); } } + return rc; } /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ** for a single query. @@ -988,12 +999,12 @@ } sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); return SQLITE_ERROR; } -static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ - int nVal; /* Number of integers output by cArg */ +static size_t fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + size_t nVal; /* Number of integers output by cArg */ switch( cArg ){ case FTS3_MATCHINFO_NDOC: case FTS3_MATCHINFO_NPHRASE: case FTS3_MATCHINFO_NCOL: @@ -1025,29 +1036,41 @@ static int fts3MatchinfoSelectDoctotal( Fts3Table *pTab, sqlite3_stmt **ppStmt, sqlite3_int64 *pnDoc, - const char **paLen + const char **paLen, + const char **ppEnd ){ sqlite3_stmt *pStmt; const char *a; + const char *pEnd; sqlite3_int64 nDoc; + int n; + if( !*ppStmt ){ int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); if( rc!=SQLITE_OK ) return rc; } pStmt = *ppStmt; assert( sqlite3_data_count(pStmt)==1 ); + n = sqlite3_column_bytes(pStmt, 0); a = sqlite3_column_blob(pStmt, 0); - a += sqlite3Fts3GetVarint(a, &nDoc); - if( nDoc==0 ) return FTS_CORRUPT_VTAB; - *pnDoc = (u32)nDoc; + if( a==0 ){ + return FTS_CORRUPT_VTAB; + } + pEnd = a + n; + a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc); + if( nDoc<=0 || a>pEnd ){ + return FTS_CORRUPT_VTAB; + } + *pnDoc = nDoc; if( paLen ) *paLen = a; + if( ppEnd ) *ppEnd = pEnd; return SQLITE_OK; } /* ** An instance of the following structure is used to store state while @@ -1115,15 +1138,16 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ LcsIterator *aIter; int i; int iCol; int nToken = 0; + int rc = SQLITE_OK; /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ - aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + aIter = sqlite3_malloc64(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; inPhrase; i++){ @@ -1135,17 +1159,20 @@ for(iCol=0; iColnCol; iCol++){ int nLcs = 0; /* LCS value for this column */ int nLive = 0; /* Number of iterators in aIter not at EOF */ for(i=0; inPhrase; i++){ - int rc; LcsIterator *pIt = &aIter[i]; rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); - if( rc!=SQLITE_OK ) return rc; + if( rc!=SQLITE_OK ) goto matchinfo_lcs_out; if( pIt->pRead ){ pIt->iPos = pIt->iPosOffset; - fts3LcsIteratorAdvance(&aIter[i]); + fts3LcsIteratorAdvance(pIt); + if( pIt->pRead==0 ){ + rc = FTS_CORRUPT_VTAB; + goto matchinfo_lcs_out; + } nLive++; } } while( nLive>0 ){ @@ -1173,12 +1200,13 @@ } pInfo->aMatchinfo[iCol] = nLcs; } + matchinfo_lcs_out: sqlite3_free(aIter); - return SQLITE_OK; + return rc; } /* ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to ** be returned by the matchinfo() function. Argument zArg contains the @@ -1219,27 +1247,32 @@ break; case FTS3_MATCHINFO_NDOC: if( bGlobal ){ sqlite3_int64 nDoc = 0; - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0, 0); pInfo->aMatchinfo[0] = (u32)nDoc; } break; case FTS3_MATCHINFO_AVGLENGTH: if( bGlobal ){ sqlite3_int64 nDoc; /* Number of rows in table */ const char *a; /* Aggregate column length array */ + const char *pEnd; /* First byte past end of length array */ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a, &pEnd); if( rc==SQLITE_OK ){ int iCol; for(iCol=0; iColnCol; iCol++){ u32 iVal; sqlite3_int64 nToken; a += sqlite3Fts3GetVarint(a, &nToken); + if( a>pEnd ){ + rc = SQLITE_CORRUPT_VTAB; + break; + } iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); pInfo->aMatchinfo[iCol] = iVal; } } } @@ -1249,13 +1282,18 @@ sqlite3_stmt *pSelectDocsize = 0; rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); if( rc==SQLITE_OK ){ int iCol; const char *a = sqlite3_column_blob(pSelectDocsize, 0); + const char *pEnd = a + sqlite3_column_bytes(pSelectDocsize, 0); for(iCol=0; iColnCol; iCol++){ sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); + a += sqlite3Fts3GetVarintBounded(a, pEnd, &nToken); + if( a>pEnd ){ + rc = SQLITE_CORRUPT_VTAB; + break; + } pInfo->aMatchinfo[iCol] = (u32)nToken; } } sqlite3_reset(pSelectDocsize); break; @@ -1268,13 +1306,13 @@ } break; case FTS3_MATCHINFO_LHITS_BM: case FTS3_MATCHINFO_LHITS: { - int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); + size_t nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); memset(pInfo->aMatchinfo, 0, nZero); - fts3ExprLHitGather(pCsr->pExpr, pInfo); + rc = fts3ExprLHitGather(pCsr->pExpr, pInfo); break; } default: { Fts3Expr *pExpr; @@ -1282,11 +1320,11 @@ pExpr = pCsr->pExpr; rc = fts3ExprLoadDoclists(pCsr, 0, 0); if( rc!=SQLITE_OK ) break; if( bGlobal ){ if( pCsr->pDeferred ){ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0); if( rc!=SQLITE_OK ) break; } rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); sqlite3Fts3EvalTestDeferred(pCsr, &rc); if( rc!=SQLITE_OK ) break; @@ -1337,11 +1375,11 @@ ** matchinfo function has been called for this query. In this case ** allocate the array used to accumulate the matchinfo data and ** initialize those elements that are constant for every row. */ if( pCsr->pMIBuffer==0 ){ - int nMatchinfo = 0; /* Number of u32 elements in match-info */ + size_t nMatchinfo = 0; /* Number of u32 elements in match-info */ int i; /* Used to iterate through zArg */ /* Determine the number of phrases in the query */ pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); sInfo.nPhrase = pCsr->nPhrase; @@ -1421,10 +1459,14 @@ if( !pCsr->pExpr ){ sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); return; } + + /* Limit the snippet length to 64 tokens. */ + if( nToken<-64 ) nToken = -64; + if( nToken>+64 ) nToken = +64; for(nSnippet=1; 1; nSnippet++){ int iSnip; /* Loop counter 0..nSnippet-1 */ u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ @@ -1523,11 +1565,11 @@ UNUSED_PARAMETER(iPhrase); rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); nTerm = pExpr->pPhrase->nToken; if( pList ){ fts3GetDeltaPosition(&pList, &iPos); - assert( iPos>=0 ); + assert_fts3_nc( iPos>=0 ); } for(iTerm=0; iTermaTerm[p->iTerm++]; pT->iOff = nTerm-iTerm-1; @@ -1564,11 +1606,11 @@ /* Count the number of terms in the query */ rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); if( rc!=SQLITE_OK ) goto offsets_out; /* Allocate the array of TermOffset iterators. */ - sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); + sCtx.aTerm = (TermOffset *)sqlite3_malloc64(sizeof(TermOffset)*nToken); if( 0==sCtx.aTerm ){ rc = SQLITE_NOMEM; goto offsets_out; } sCtx.iDocid = pCsr->iPrevId; @@ -1633,11 +1675,11 @@ if( !pTerm ){ /* All offsets for this column have been gathered. */ rc = SQLITE_DONE; }else{ - assert( iCurrent<=iMinPos ); + assert_fts3_nc( iCurrent<=iMinPos ); if( 0==(0xFE&*pTerm->pList) ){ pTerm->pList = 0; }else{ fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); } Index: ext/fts3/fts3_term.c ================================================================== --- ext/fts3/fts3_term.c +++ ext/fts3/fts3_term.c @@ -66,13 +66,13 @@ ){ char const *zDb; /* Name of database (e.g. "main") */ char const *zFts3; /* Name of fts3 table */ int nDb; /* Result of strlen(zDb) */ int nFts3; /* Result of strlen(zFts3) */ - int nByte; /* Bytes of space to allocate here */ + sqlite3_int64 nByte; /* Bytes of space to allocate here */ int rc; /* value returned by declare_vtab() */ - Fts3termTable *p; /* Virtual table object to return */ + Fts3termTable *p; /* Virtual table object to return */ int iIndex = 0; UNUSED_PARAMETER(pCtx); if( argc==5 ){ iIndex = atoi(argv[4]); @@ -94,13 +94,13 @@ rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); if( rc!=SQLITE_OK ) return rc; nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; - p = (Fts3termTable *)sqlite3_malloc(nByte); + p = (Fts3termTable *)sqlite3_malloc64(nByte); if( !p ) return SQLITE_NOMEM; - memset(p, 0, nByte); + memset(p, 0, (size_t)nByte); p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; p->pFts3Tab->db = db; @@ -359,15 +359,15 @@ 0, /* xRollback */ 0, /* xFindFunction */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ - 0 /* xRollbackTo */ + 0, /* xRollbackTo */ }; int rc; /* Return code */ rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0); return rc; } #endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ Index: ext/fts3/fts3_test.c ================================================================== --- ext/fts3/fts3_test.c +++ ext/fts3/fts3_test.c @@ -446,18 +446,18 @@ if( p==pEnd ){ rc = SQLITE_DONE; }else{ /* Advance to the end of the token */ const char *pToken = p; - int nToken; + sqlite3_int64 nToken; while( ppCsr->nBuffer ){ sqlite3_free(pCsr->aBuffer); - pCsr->aBuffer = sqlite3_malloc(nToken); + pCsr->aBuffer = sqlite3_malloc64(nToken); } if( pCsr->aBuffer==0 ){ rc = SQLITE_NOMEM; }else{ int i; @@ -469,11 +469,11 @@ } pCsr->iToken++; pCsr->iInput = (int)(p - pCsr->aInput); *ppToken = pCsr->aBuffer; - *pnBytes = nToken; + *pnBytes = (int)nToken; *piStartOffset = (int)(pToken - pCsr->aInput); *piEndOffset = (int)(p - pCsr->aInput); *piPosition = pCsr->iToken; } } @@ -571,22 +571,51 @@ } /* ** End of tokenizer code. **************************************************************************/ + +/* +** sqlite3_fts3_may_be_corrupt BOOLEAN +** +** Set or clear the global "may-be-corrupt" flag. Return the old value. +*/ +static int SQLITE_TCLAPI fts3_may_be_corrupt( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int bOld = sqlite3_fts3_may_be_corrupt; + + if( objc!=2 && objc!=1 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?"); + return TCL_ERROR; + } + if( objc==2 ){ + int bNew; + if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR; + sqlite3_fts3_may_be_corrupt = bNew; + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld)); + return TCL_OK; +} int Sqlitetestfts3_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0); Tcl_CreateObjCommand(interp, "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0 ); Tcl_CreateObjCommand( interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0 ); - Tcl_CreateObjCommand( interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0 ); + Tcl_CreateObjCommand( + interp, "sqlite3_fts3_may_be_corrupt", fts3_may_be_corrupt, 0, 0 + ); return TCL_OK; } #endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */ #endif /* ifdef SQLITE_TEST */ Index: ext/fts3/fts3_tokenize_vtab.c ================================================================== --- ext/fts3/fts3_tokenize_vtab.c +++ ext/fts3/fts3_tokenize_vtab.c @@ -120,11 +120,11 @@ for(i=0; izInput = sqlite3_malloc(nByte+1); + pCsr->zInput = sqlite3_malloc64(nByte+1); if( pCsr->zInput==0 ){ rc = SQLITE_NOMEM; }else{ memcpy(pCsr->zInput, zByte, nByte); pCsr->zInput[nByte] = 0; @@ -441,14 +441,14 @@ 0, /* xRollback */ 0, /* xFindFunction */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ - 0 /* xRollbackTo */ + 0, /* xRollbackTo */ }; int rc; /* Return code */ rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); return rc; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ Index: ext/fts3/fts3_tokenizer.c ================================================================== --- ext/fts3/fts3_tokenizer.c +++ ext/fts3/fts3_tokenizer.c @@ -38,10 +38,21 @@ sqlite3 *db = sqlite3_context_db_handle(context); int isEnabled = 0; sqlite3_db_config(db,SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER,-1,&isEnabled); return isEnabled; } + +/* +** The real sqlite3_value_frombind() implementation was not added +** until version 3.28.0 of the SQLite core. This fake version facilitates +** testing. +*/ +static int bFrombindTrue = 0; +static int sqlite3_value_frombind(sqlite3_value *NotUsed){ + (void)NotUsed; + return bFrombindTrue; +} /* ** Implementation of the SQL scalar function for accessing the underlying ** hash table. This function may be called as follows: ** @@ -77,11 +88,11 @@ zName = sqlite3_value_text(argv[0]); nName = sqlite3_value_bytes(argv[0])+1; if( argc==2 ){ - if( fts3TokenizerEnabled(context) ){ + if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[1]) ){ void *pOld; int n = sqlite3_value_bytes(argv[1]); if( zName==0 || n!=sizeof(pPtr) ){ sqlite3_result_error(context, "argument type mismatch", -1); return; @@ -104,11 +115,13 @@ sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); return; } } - sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); + if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[0]) ){ + sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); + } } int sqlite3Fts3IsIdChar(char c){ static const char isFtsIdChar[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ @@ -192,12 +205,12 @@ }else{ char const **aArg = 0; int iArg = 0; z = &z[n+1]; while( zaiException, (p->nException+nEntry)*sizeof(int)); + aNew = sqlite3_realloc64(p->aiException,(p->nException+nEntry)*sizeof(int)); if( aNew==0 ) return SQLITE_NOMEM; nNew = p->nException; z = (const unsigned char *)zIn; while( zbRemoveDiacritic = 1; + pNew->eRemoveDiacritic = 1; for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = 1; + pNew->eRemoveDiacritic = 1; } else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ - pNew->bRemoveDiacritic = 0; + pNew->eRemoveDiacritic = 0; + } + else if( n==19 && memcmp("remove_diacritics=2", z, 19)==0 ){ + pNew->eRemoveDiacritic = 2; } else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); } else if( n>=11 && memcmp("separators=", z, 11)==0 ){ @@ -339,20 +342,20 @@ do { int iOut; /* Grow the output buffer if required. */ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ - char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); + char *zNew = sqlite3_realloc64(pCsr->zToken, pCsr->nAlloc+64); if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ zEnd = z; - iOut = sqlite3FtsUnicodeFold((int)iCode, p->bRemoveDiacritic); + iOut = sqlite3FtsUnicodeFold((int)iCode, p->eRemoveDiacritic); if( iOut ){ WRITE_UTF8(zOut, iOut); } /* If the cursor is not at EOF, read the next character */ Index: ext/fts3/fts3_unicode2.c ================================================================== --- ext/fts3/fts3_unicode2.c +++ ext/fts3/fts3_unicode2.c @@ -1,7 +1,7 @@ /* -** 2012 May 25 +** 2012-05-25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. @@ -157,36 +157,52 @@ ** of the ASCII letter only. For example, if passed 235 - "LATIN ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ -static int remove_diacritic(int c){ +static int remove_diacritic(int c, int bComplex){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, - 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, - 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, - 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, - 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, - 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, - 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, - 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, - 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, - 62924, 63050, 63082, 63274, 63390, + 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, + 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, + 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, + 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, + 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, + 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, + 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, + 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, + 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, + 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, + 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, + 63182, 63242, 63274, 63310, 63368, 63390, }; - char aChar[] = { - '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', - 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', - 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', - 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', - 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', - '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', - 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', - 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', - 'e', 'i', 'o', 'u', 'y', +#define HIBIT ((unsigned char)0x80) + unsigned char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', + 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', + 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', + 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o', + 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a', + 'e', 'i', 'o', 'r', 'u', 's', + 't', 'h', 'a', 'e', 'o'|HIBIT, 'o', + 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', + 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT, + 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, + 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', + 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', + 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', + 'w', 'x', 'y', 'z', 'h', 't', + 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, + 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, + 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; int iRes = 0; int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; @@ -199,11 +215,12 @@ }else{ iHi = iTest-1; } } assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); } /* ** Return true if the argument interpreted as a unicode codepoint @@ -212,12 +229,12 @@ int sqlite3FtsUnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? - (mask0 & (1 << (c-768))) : - (mask1 & (1 << (c-768-32))); + (mask0 & ((unsigned int)1 << (c-768))) : + (mask1 & ((unsigned int)1 << (c-768-32))); } /* ** Interpret the argument as a unicode codepoint. If the codepoint @@ -226,11 +243,11 @@ ** Otherwise, return a copy of the argument. ** ** The results are undefined if the value passed to this function ** is less than zero. */ -int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ +int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){ /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** ** If the least significant bit in flags is clear, then the rule applies @@ -349,11 +366,13 @@ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); + if( eRemoveDiacritic ){ + ret = remove_diacritic(ret, eRemoveDiacritic==2); + } } else if( c>=66560 && c<66600 ){ ret = c + 40; } Index: ext/fts3/fts3_write.c ================================================================== --- ext/fts3/fts3_write.c +++ ext/fts3/fts3_write.c @@ -21,11 +21,11 @@ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include #include #include - +#include #define FTS_MAX_APPENDABLE_HEIGHT 16 /* ** When full-text index nodes are loaded from disk, the buffer that they @@ -65,11 +65,11 @@ # define FTS3_NODE_CHUNKSIZE (4*1024) # define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) #endif /* -** The two values that may be meaningfully bound to the :1 parameter in +** The values that may be meaningfully bound to the :1 parameter in ** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. */ #define FTS_STAT_DOCTOTAL 0 #define FTS_STAT_INCRMERGEHINT 1 #define FTS_STAT_AUTOINCRMERGE 2 @@ -333,11 +333,11 @@ ** of the oldest level in the db that contains at least ? segments. Or, ** if no level in the FTS index contains more than ? segments, the statement ** returns zero rows. */ /* 28 */ "SELECT level, count(*) AS cnt FROM %Q.'%q_segdir' " " GROUP BY level HAVING cnt>=?" - " ORDER BY (level %% 1024) ASC LIMIT 1", + " ORDER BY (level %% 1024) ASC, 2 DESC LIMIT 1", /* Estimate the upper limit on the number of leaf nodes in a new segment ** created by merging the oldest :2 segments from absolute level :1. See ** function sqlite3Fts3Incrmerge() for details. */ /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " @@ -394,23 +394,24 @@ assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); assert( eStmt=0 ); pStmt = p->aStmt[eStmt]; if( !pStmt ){ + int f = SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB; char *zSql; if( eStmt==SQL_CONTENT_INSERT ){ zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ + f &= ~SQLITE_PREPARE_NO_VTAB; zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); }else{ zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); } if( !zSql ){ rc = SQLITE_NOMEM; }else{ - rc = sqlite3_prepare_v3(p->db, zSql, -1, SQLITE_PREPARE_PERSISTENT, - &pStmt, NULL); + rc = sqlite3_prepare_v3(p->db, zSql, -1, f, &pStmt, NULL); sqlite3_free(zSql); assert( rc==SQLITE_OK || pStmt==0 ); p->aStmt[eStmt] = pStmt; } } @@ -564,11 +565,11 @@ int iLangid, /* Language id */ int iIndex, /* Index in p->aIndex[] */ int iLevel /* Level of segments */ ){ sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ - assert( iLangid>=0 ); + assert_fts3_nc( iLangid>=0 ); assert( p->nIndex>0 ); assert( iIndex>=0 && iIndexnIndex ); iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; return iBase + iLevel; @@ -693,11 +694,11 @@ int rc = SQLITE_OK; assert( !p || p->iLastDocid<=iDocid ); if( !p || p->iLastDocid!=iDocid ){ - sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); + u64 iDelta = (u64)iDocid - (u64)(p ? p->iLastDocid : 0); if( p ){ assert( p->nDatanSpace ); assert( p->aData[p->nData]==0 ); p->nData++; } @@ -1150,11 +1151,11 @@ /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already ** full, merge all segments in level iLevel into a single iLevel+1 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. */ - if( iNext>=FTS3_MERGE_COUNT ){ + if( iNext>=MergeCount(p) ){ fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); *piIdx = 0; }else{ *piIdx = iNext; @@ -1234,10 +1235,12 @@ aByte = 0; } } *paBlob = aByte; } + }else if( rc==SQLITE_ERROR ){ + rc = FTS_CORRUPT_VTAB; } return rc; } @@ -1345,11 +1348,13 @@ fts3SegReaderSetEof(pReader); /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf ** blocks have already been traversed. */ - assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); +#ifdef CORRUPT_DB + assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock || CORRUPT_DB ); +#endif if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ return SQLITE_OK; } rc = sqlite3Fts3ReadBlock( @@ -1372,19 +1377,23 @@ /* Because of the FTS3_NODE_PADDING bytes of padding, the following is ** safe (no risk of overread) even if the node data is corrupted. */ pNext += fts3GetVarint32(pNext, &nPrefix); pNext += fts3GetVarint32(pNext, &nSuffix); - if( nPrefix<0 || nSuffix<=0 - || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] + if( nSuffix<=0 + || (&pReader->aNode[pReader->nNode] - pNext)pReader->nTerm ){ return FTS_CORRUPT_VTAB; } - if( nPrefix+nSuffix>pReader->nTermAlloc ){ - int nNew = (nPrefix+nSuffix)*2; - char *zNew = sqlite3_realloc(pReader->zTerm, nNew); + /* Both nPrefix and nSuffix were read by fts3GetVarint32() and so are + ** between 0 and 0x7FFFFFFF. But the sum of the two may cause integer + ** overflow - hence the (i64) casts. */ + if( (i64)nPrefix+nSuffix>(i64)pReader->nTermAlloc ){ + i64 nNew = ((i64)nPrefix+nSuffix)*2; + char *zNew = sqlite3_realloc64(pReader->zTerm, nNew); if( !zNew ){ return SQLITE_NOMEM; } pReader->zTerm = zNew; pReader->nTermAlloc = nNew; @@ -1402,11 +1411,11 @@ /* Check that the doclist does not appear to extend past the end of the ** b-tree node. And that the final byte of the doclist is 0x00. If either ** of these statements is untrue, then the data structure is corrupt. */ - if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] + if( pReader->nDoclist > pReader->nNode-(pReader->aDoclist-pReader->aNode) || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) ){ return FTS_CORRUPT_VTAB; } return SQLITE_OK; @@ -1520,22 +1529,22 @@ if( p>=pEnd ){ pReader->pOffsetList = 0; }else{ rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); if( rc==SQLITE_OK ){ - sqlite3_int64 iDelta; - pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); + u64 iDelta; + pReader->pOffsetList = p + sqlite3Fts3GetVarintU(p, &iDelta); if( pTab->bDescIdx ){ - pReader->iDocid -= iDelta; + pReader->iDocid = (i64)((u64)pReader->iDocid - iDelta); }else{ - pReader->iDocid += iDelta; + pReader->iDocid = (i64)((u64)pReader->iDocid + iDelta); } } } } - return SQLITE_OK; + return rc; } int sqlite3Fts3MsrOvfl( Fts3Cursor *pCsr, @@ -1602,12 +1611,17 @@ Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ ){ Fts3SegReader *pReader; /* Newly allocated SegReader object */ int nExtra = 0; /* Bytes to allocate segment root node */ - assert( iStartLeaf<=iEndLeaf ); + assert( zRoot!=0 || nRoot==0 ); +#ifdef CORRUPT_DB + assert( zRoot!=0 || CORRUPT_DB ); +#endif + if( iStartLeaf==0 ){ + if( iEndLeaf!=0 ) return FTS_CORRUPT_VTAB; nExtra = nRoot + FTS3_NODE_PADDING; } pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); if( !pReader ){ @@ -1623,11 +1637,11 @@ if( nExtra ){ /* The entire segment is stored in the root node. */ pReader->aNode = (char *)&pReader[1]; pReader->rootOnly = 1; pReader->nNode = nRoot; - memcpy(pReader->aNode, zRoot, nRoot); + if( nRoot ) memcpy(pReader->aNode, zRoot, nRoot); memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); }else{ pReader->iCurrentBlock = iStartLeaf-1; } *ppReader = pReader; @@ -1738,12 +1752,13 @@ nElem = 1; } } if( nElem>0 ){ - int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); - pReader = (Fts3SegReader *)sqlite3_malloc(nByte); + sqlite3_int64 nByte; + nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); + pReader = (Fts3SegReader *)sqlite3_malloc64(nByte); if( !pReader ){ rc = SQLITE_NOMEM; }else{ memset(pReader, 0, nByte); pReader->iIdx = 0x7FFFFFFF; @@ -1906,10 +1921,11 @@ if( rc==SQLITE_OK ){ sqlite3_bind_int64(pStmt, 1, iBlock); sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 2); } return rc; } /* @@ -1962,10 +1978,11 @@ sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); } sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 6); } return rc; } /* @@ -2012,10 +2029,15 @@ int nPrefix; /* Number of bytes of prefix compression */ int nSuffix; /* Suffix length */ nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); nSuffix = nTerm-nPrefix; + + /* If nSuffix is zero or less, then zTerm/nTerm must be a prefix of + ** pWriter->zTerm/pWriter->nTerm. i.e. must be equal to or less than when + ** compared with BINARY collation. This indicates corruption. */ + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; if( nReq<=p->nNodeSize || !pTree->zTerm ){ if( nReq>p->nNodeSize ){ @@ -2240,10 +2262,15 @@ } nData = pWriter->nData; nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); nSuffix = nTerm-nPrefix; + + /* If nSuffix is zero or less, then zTerm/nTerm must be a prefix of + ** pWriter->zTerm/pWriter->nTerm. i.e. must be equal to or less than when + ** compared with BINARY collation. This indicates corruption. */ + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; /* Figure out how many bytes are required by this new entry */ nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ nSuffix + /* Term suffix */ @@ -2252,10 +2279,11 @@ if( nData>0 && nData+nReq>p->nNodeSize ){ int rc; /* The current leaf node is full. Write it out to the database. */ + if( pWriter->iFree==LARGEST_INT64 ) return FTS_CORRUPT_VTAB; rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); if( rc!=SQLITE_OK ) return rc; p->nLeafAdd++; /* Add the current term to the interior node tree. The term added to @@ -2301,13 +2329,15 @@ assert( nData+nReq<=pWriter->nSize ); /* Append the prefix-compressed term and doclist to the buffer. */ nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); + assert( nSuffix>0 ); memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); nData += nSuffix; nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); + assert( nDoclist>0 ); memcpy(&pWriter->aData[nData], aDoclist, nDoclist); pWriter->nData = nData + nDoclist; /* Save the current term so that it can be used to prefix-compress the next. ** If the isCopyTerm parameter is true, then the buffer pointed to by @@ -2323,10 +2353,11 @@ pWriter->nMalloc = nTerm*2; pWriter->zMalloc = zNew; pWriter->zTerm = zNew; } assert( pWriter->zTerm==pWriter->zMalloc ); + assert( nTerm>0 ); memcpy(pWriter->zTerm, zTerm, nTerm); }else{ pWriter->zTerm = (char *)zTerm; } pWriter->nTerm = nTerm; @@ -2597,18 +2628,18 @@ break; } nList -= (int)(p - pList); pList = p; - if( nList==0 ){ + if( nList<=0 ){ break; } p = &pList[1]; p += fts3GetVarint32(p, &iCurrent); } - if( bZero && &pList[nList]!=pEnd ){ + if( bZero && (pEnd - &pList[nList])>0){ memset(&pList[nList], 0, pEnd - &pList[nList]); } *ppList = pList; *pnList = nList; } @@ -2631,10 +2662,11 @@ pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); if( !pNew ) return SQLITE_NOMEM; pMsr->aBuffer = pNew; } + assert( nList>0 ); memcpy(pMsr->aBuffer, pList, nList); return SQLITE_OK; } int sqlite3Fts3MsrIncrNext( @@ -2944,16 +2976,16 @@ /* Calculate the 'docid' delta value to write into the merged ** doclist. */ sqlite3_int64 iDelta; if( p->bDescIdx && nDoclist>0 ){ - iDelta = iPrev - iDocid; + if( iPrev<=iDocid ) return FTS_CORRUPT_VTAB; + iDelta = (i64)((u64)iPrev - (u64)iDocid); }else{ - iDelta = iDocid - iPrev; + if( nDoclist>0 && iPrev>=iDocid ) return FTS_CORRUPT_VTAB; + iDelta = (i64)((u64)iDocid - (u64)iPrev); } - assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); - assert( nDoclist>0 || iDelta==iDocid ); nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); if( nDoclist+nByte>pCsr->nBuffer ){ char *aNew; pCsr->nBuffer = (nDoclist+nByte)*2; @@ -3214,12 +3246,14 @@ bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } if( rc!=SQLITE_OK ) goto finished; assert( csr.nSegment>0 ); - assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); - assert( iNewLevel=getAbsoluteLevel(p, iLangid, iIndex, 0) ); + assert_fts3_nc( + iNewLeveliPrevDocid. The sizes are encoded as @@ -3340,11 +3376,11 @@ int nBlob; /* Number of bytes in the BLOB */ sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ int rc; /* Result code from subfunctions */ if( *pRC ) return; - pBlob = sqlite3_malloc( 10*p->nColumn ); + pBlob = sqlite3_malloc64( 10*(sqlite3_int64)p->nColumn ); if( pBlob==0 ){ *pRC = SQLITE_NOMEM; return; } fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); @@ -3390,11 +3426,11 @@ int rc; /* Result code from subfunctions */ const int nStat = p->nColumn+2; if( *pRC ) return; - a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); + a = sqlite3_malloc64( (sizeof(u32)+10)*(sqlite3_int64)nStat ); if( a==0 ){ *pRC = SQLITE_NOMEM; return; } pBlob = (char*)&a[nStat]; @@ -3441,10 +3477,11 @@ } sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); sqlite3_step(pStmt); *pRC = sqlite3_reset(pStmt); + sqlite3_bind_null(pStmt, 2); sqlite3_free(a); } /* ** Merge the entire database so that there is one segment for each @@ -3453,11 +3490,14 @@ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ int bSeenDone = 0; int rc; sqlite3_stmt *pAllLangid = 0; - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + rc = sqlite3Fts3PendingTermsFlush(p); + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + } if( rc==SQLITE_OK ){ int rc2; sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); sqlite3_bind_int(pAllLangid, 2, p->nIndex); while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ @@ -3474,11 +3514,10 @@ rc2 = sqlite3_reset(pAllLangid); if( rc==SQLITE_OK ) rc = rc2; } sqlite3Fts3SegmentsClose(p); - sqlite3Fts3PendingTermsClear(p); return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; } /* @@ -3510,12 +3549,12 @@ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); } if( rc==SQLITE_OK ){ - int nByte = sizeof(u32) * (p->nColumn+1)*3; - aSz = (u32 *)sqlite3_malloc(nByte); + sqlite3_int64 nByte = sizeof(u32) * ((sqlite3_int64)p->nColumn+1)*3; + aSz = (u32 *)sqlite3_malloc64(nByte); if( aSz==0 ){ rc = SQLITE_NOMEM; }else{ memset(aSz, 0, nByte); aSzIns = &aSz[p->nColumn+1]; @@ -3577,16 +3616,16 @@ int nSeg, /* Number of segments to merge */ Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ int rc; /* Return Code */ sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ - int nByte; /* Bytes allocated at pCsr->apSegment[] */ + sqlite3_int64 nByte; /* Bytes allocated at pCsr->apSegment[] */ /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ memset(pCsr, 0, sizeof(*pCsr)); nByte = sizeof(Fts3SegReader *) * nSeg; - pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc64(nByte); if( pCsr->apSegment==0 ){ rc = SQLITE_NOMEM; }else{ memset(pCsr->apSegment, 0, nByte); @@ -3725,25 +3764,30 @@ if( bFirst==0 ){ p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); } p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); + if( nPrefix>p->term.n || nSuffix>p->nNode-p->iOff || nSuffix==0 ){ + return FTS_CORRUPT_VTAB; + } blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); if( rc==SQLITE_OK ){ memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); p->term.n = nPrefix+nSuffix; p->iOff += nSuffix; if( p->iChild==0 ){ p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); + if( (p->nNode-p->iOff)nDoclist ){ + return FTS_CORRUPT_VTAB; + } p->aDoclist = &p->aNode[p->iOff]; p->iOff += p->nDoclist; } } } - assert( p->iOff<=p->nNode ); - + assert_fts3_nc( p->iOff<=p->nNode ); return rc; } /* ** Release all dynamic resources held by node-reader object *p. @@ -3763,18 +3807,18 @@ memset(p, 0, sizeof(NodeReader)); p->aNode = aNode; p->nNode = nNode; /* Figure out if this is a leaf or an internal node. */ - if( p->aNode[0] ){ + if( aNode && aNode[0] ){ /* An internal node. */ p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); }else{ p->iOff = 1; } - return nodeReaderNext(p); + return aNode ? nodeReaderNext(p) : SQLITE_OK; } /* ** This function is called while writing an FTS segment each time a leaf o ** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed @@ -3807,10 +3851,11 @@ ** the current node of layer iLayer. Due to the prefix compression, ** the space required changes depending on which node the key is to ** be added to. */ nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); nSuffix = nTerm - nPrefix; + if(nSuffix<=0 ) return FTS_CORRUPT_VTAB; nSpace = sqlite3Fts3VarintLen(nPrefix); nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ /* If the current node of layer iLayer contains zero keys, or if adding @@ -3900,17 +3945,18 @@ int nSuffix; /* Size of term suffix in bytes */ /* Node must have already been started. There must be a doclist for a ** leaf node, and there must not be a doclist for an internal node. */ assert( pNode->n>0 ); - assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); + assert_fts3_nc( (pNode->a[0]=='\0')==(aDoclist!=0) ); blobGrowBuffer(pPrev, nTerm, &rc); if( rc!=SQLITE_OK ) return rc; nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); nSuffix = nTerm - nPrefix; + if( nSuffix<=0 ) return FTS_CORRUPT_VTAB; memcpy(pPrev->a, zTerm, nTerm); pPrev->n = nTerm; if( bFirst==0 ){ pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); @@ -4116,11 +4162,11 @@ const char *zRhs, int nRhs /* RHS of comparison */ ){ int nCmp = MIN(nLhs, nRhs); int res; - res = memcmp(zLhs, zRhs, nCmp); + res = (nCmp ? memcmp(zLhs, zRhs, nCmp) : 0); if( res==0 ) res = nLhs - nRhs; return res; } @@ -4200,10 +4246,14 @@ pWriter->nLeafData = pWriter->nLeafData * -1; } pWriter->bNoLeafData = (pWriter->nLeafData==0); nRoot = sqlite3_column_bytes(pSelect, 4); aRoot = sqlite3_column_blob(pSelect, 4); + if( aRoot==0 ){ + sqlite3_reset(pSelect); + return nRoot ? SQLITE_NOMEM : FTS_CORRUPT_VTAB; + } }else{ return sqlite3_reset(pSelect); } /* Check for the zero-length marker in the %_segments table */ @@ -4235,10 +4285,14 @@ /* It is possible to append to this segment. Set up the IncrmergeWriter ** object to do so. */ int i; int nHeight = (int)aRoot[0]; NodeWriter *pNode; + if( nHeight<1 || nHeight>FTS_MAX_APPENDABLE_HEIGHT ){ + sqlite3_reset(pSelect); + return FTS_CORRUPT_VTAB; + } pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; pWriter->iStart = iStart; pWriter->iEnd = iEnd; pWriter->iAbsLevel = iAbsLevel; @@ -4248,38 +4302,46 @@ pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; } pNode = &pWriter->aNodeWriter[nHeight]; pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; - blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); + blobGrowBuffer(&pNode->block, + MAX(nRoot, p->nNodeSize)+FTS3_NODE_PADDING, &rc + ); if( rc==SQLITE_OK ){ memcpy(pNode->block.a, aRoot, nRoot); pNode->block.n = nRoot; + memset(&pNode->block.a[nRoot], 0, FTS3_NODE_PADDING); } for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ NodeReader reader; pNode = &pWriter->aNodeWriter[i]; - rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); - while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); - blobGrowBuffer(&pNode->key, reader.term.n, &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->key.a, reader.term.a, reader.term.n); - pNode->key.n = reader.term.n; - if( i>0 ){ - char *aBlock = 0; - int nBlock = 0; - pNode = &pWriter->aNodeWriter[i-1]; - pNode->iBlock = reader.iChild; - rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); - blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->block.a, aBlock, nBlock); - pNode->block.n = nBlock; - } - sqlite3_free(aBlock); + if( pNode->block.a){ + rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); + while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); + blobGrowBuffer(&pNode->key, reader.term.n, &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->key.a, reader.term.a, reader.term.n); + pNode->key.n = reader.term.n; + if( i>0 ){ + char *aBlock = 0; + int nBlock = 0; + pNode = &pWriter->aNodeWriter[i-1]; + pNode->iBlock = reader.iChild; + rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); + blobGrowBuffer(&pNode->block, + MAX(nBlock, p->nNodeSize)+FTS3_NODE_PADDING, &rc + ); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aBlock, nBlock); + pNode->block.n = nBlock; + memset(&pNode->block.a[nBlock], 0, FTS3_NODE_PADDING); + } + sqlite3_free(aBlock); + } } } nodeReaderRelease(&reader); } } @@ -4518,11 +4580,14 @@ sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ ){ NodeReader reader; /* Reader object */ Blob prev = {0, 0, 0}; /* Previous term written to new node */ int rc = SQLITE_OK; /* Return code */ - int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ + int bLeaf; /* True for a leaf node */ + + if( nNode<1 ) return FTS_CORRUPT_VTAB; + bLeaf = aNode[0]=='\0'; /* Allocate required output space */ blobGrowBuffer(pNew, nNode, &rc); if( rc!=SQLITE_OK ) return rc; pNew->n = 0; @@ -4629,10 +4694,11 @@ sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); sqlite3_bind_int64(pChomp, 3, iAbsLevel); sqlite3_bind_int(pChomp, 4, iIdx); sqlite3_step(pChomp); rc = sqlite3_reset(pChomp); + sqlite3_bind_null(pChomp, 2); } } sqlite3_free(root.a); sqlite3_free(block.a); @@ -4708,10 +4774,11 @@ if( rc==SQLITE_OK ){ sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); + sqlite3_bind_null(pReplace, 2); } return rc; } @@ -4782,17 +4849,21 @@ */ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ const int nHint = pHint->n; int i; - i = pHint->n-2; + i = pHint->n-1; + if( (pHint->a[i] & 0x80) ) return FTS_CORRUPT_VTAB; while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + if( i==0 ) return FTS_CORRUPT_VTAB; + i--; while( i>0 && (pHint->a[i-1] & 0x80) ) i--; pHint->n = i; i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); i += fts3GetVarint32(&pHint->a[i], pnInput); + assert( i<=nHint ); if( i!=nHint ) return FTS_CORRUPT_VTAB; return SQLITE_OK; } @@ -4858,12 +4929,18 @@ sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ int nHintSeg = 0; /* Hint number of segments */ rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ + /* Based on the scan in the block above, it is known that there + ** are no levels with a relative level smaller than that of + ** iAbsLevel with more than nSeg segments, or if nSeg is -1, + ** no levels with more than nMin segments. Use this to limit the + ** value of nHintSeg to avoid a large memory allocation in case the + ** merge-hint is corrupt*/ iAbsLevel = iHintAbsLevel; - nSeg = nHintSeg; + nSeg = MIN(MAX(nMin,nSeg), nHintSeg); bUseHint = 1; bDirtyHint = 1; }else{ /* This undoes the effect of the HintPop() above - so that no entry ** is removed from the hint blob. */ @@ -4872,11 +4949,11 @@ } /* If nSeg is less that zero, then there is no level with at least ** nMin segments and no hint in the %_stat table. No work to do. ** Exit early in this case. */ - if( nSeg<0 ) break; + if( nSeg<=0 ) break; /* Open a cursor to iterate through the contents of the oldest nSeg ** indexes of absolute level iAbsLevel. If this cursor is opened using ** the 'hint' parameters, it is possible that there are less than nSeg ** segments available in level iAbsLevel. In this case, no work is @@ -4900,12 +4977,19 @@ if( rc==SQLITE_OK ){ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); } if( SQLITE_OK==rc && pCsr->nSegment==nSeg && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) - && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ + int bEmpty = 0; + rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( rc==SQLITE_OK ){ + bEmpty = 1; + }else if( rc!=SQLITE_ROW ){ + sqlite3Fts3SegReaderFinish(pCsr); + break; + } if( bUseHint && iIdx>0 ){ const char *zKey = pCsr->zTerm; int nKey = pCsr->nTerm; rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); }else{ @@ -4912,15 +4996,17 @@ rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); } if( rc==SQLITE_OK && pWriter->nLeafEst ){ fts3LogMerge(nSeg, iAbsLevel); - do { - rc = fts3IncrmergeAppend(p, pWriter, pCsr); - if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); - if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; - }while( rc==SQLITE_ROW ); + if( bEmpty==0 ){ + do { + rc = fts3IncrmergeAppend(p, pWriter, pCsr); + if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; + }while( rc==SQLITE_ROW ); + } /* Update or delete the input segments */ if( rc==SQLITE_OK ){ nRem -= (1 + pWriter->nWork); rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); @@ -4981,11 +5067,11 @@ static int fts3DoIncrmerge( Fts3Table *p, /* FTS3 table handle */ const char *zParam /* Nul-terminated string containing "A,B" */ ){ int rc; - int nMin = (FTS3_MERGE_COUNT / 2); + int nMin = (MergeCount(p) / 2); int nMerge = 0; const char *z = zParam; /* Read the first integer value */ nMerge = fts3Getint(&z); @@ -5026,11 +5112,11 @@ const char *zParam /* Nul-terminated string containing boolean */ ){ int rc = SQLITE_OK; sqlite3_stmt *pStmt = 0; p->nAutoincrmerge = fts3Getint(&zParam); - if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ + if( p->nAutoincrmerge==1 || p->nAutoincrmerge>MergeCount(p) ){ p->nAutoincrmerge = 8; } if( !p->bHasStat ){ assert( p->bFts4==0 ); sqlite3Fts3CreateStatTable(&rc, p); @@ -5109,25 +5195,29 @@ char *pCsr = csr.aDoclist; char *pEnd = &pCsr[csr.nDoclist]; i64 iDocid = 0; i64 iCol = 0; - i64 iPos = 0; + u64 iPos = 0; pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); while( pCsrbDescIdx ){ + iDocid = (i64)((u64)iDocid - iVal); + }else{ + iDocid = (i64)((u64)iDocid + iVal); + } } }else{ iPos += (iVal - 2); cksum = cksum ^ fts3ChecksumEntry( csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, @@ -5196,14 +5286,13 @@ int iCol; for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ if( p->abNotindexed[iCol]==0 ){ const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); - int nText = sqlite3_column_bytes(pStmt, iCol+1); sqlite3_tokenizer_cursor *pT = 0; - rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, -1, &pT); while( rc==SQLITE_OK ){ char const *zToken; /* Buffer containing token */ int nToken = 0; /* Number of bytes in token */ int iDum1 = 0, iDum2 = 0; /* Dummy variables */ int iPos = 0; /* Position of token in zText */ @@ -5284,11 +5373,11 @@ ** ** Argument pVal contains the result of . Currently the only ** meaningful value to insert is the text 'optimize'. */ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ - int rc; /* Return Code */ + int rc = SQLITE_ERROR; /* Return Code */ const char *zVal = (const char *)sqlite3_value_text(pVal); int nVal = sqlite3_value_bytes(pVal); if( !zVal ){ return SQLITE_NOMEM; @@ -5300,25 +5389,31 @@ rc = fts3DoIntegrityCheck(p); }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ rc = fts3DoIncrmerge(p, &zVal[6]); }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ rc = fts3DoAutoincrmerge(p, &zVal[10]); -#ifdef SQLITE_TEST - }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ - p->nNodeSize = atoi(&zVal[9]); - rc = SQLITE_OK; - }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ - p->nMaxPendingData = atoi(&zVal[11]); - rc = SQLITE_OK; - }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ - p->bNoIncrDoclist = atoi(&zVal[21]); - rc = SQLITE_OK; +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + }else{ + int v; + if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ + v = atoi(&zVal[9]); + if( v>=24 && v<=p->nPgsz-35 ) p->nNodeSize = v; + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ + v = atoi(&zVal[11]); + if( v>=64 && v<=FTS3_MAX_PENDING_DATA ) p->nMaxPendingData = v; + rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal,"test-no-incr-doclist=",21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal,"mergecount=",11) ){ + v = atoi(&zVal[11]); + if( v>=4 && v<=FTS3_MERGE_COUNT && (v&1)==0 ) p->nMergeCount = v; + rc = SQLITE_OK; + } #endif - }else{ - rc = SQLITE_ERROR; } - return rc; } #ifndef SQLITE_DISABLE_FTS4_DEFERRED /* @@ -5522,11 +5617,10 @@ sqlite3_value **apVal, /* Array of arguments */ sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ ){ Fts3Table *p = (Fts3Table *)pVtab; int rc = SQLITE_OK; /* Return Code */ - int isRemove = 0; /* True for an UPDATE or DELETE */ u32 *aSzIns = 0; /* Sizes of inserted documents */ u32 *aSzDel = 0; /* Sizes of deleted documents */ int nChng = 0; /* Net change in number of documents */ int bInsertDone = 0; @@ -5556,11 +5650,11 @@ rc = SQLITE_CONSTRAINT; goto update_out; } /* Allocate space to hold the change in document sizes */ - aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); + aSzDel = sqlite3_malloc64(sizeof(aSzDel[0])*((sqlite3_int64)p->nColumn+1)*2); if( aSzDel==0 ){ rc = SQLITE_NOMEM; goto update_out; } aSzIns = &aSzDel[p->nColumn+1]; @@ -5620,11 +5714,10 @@ /* If this is a DELETE or UPDATE operation, remove the old record. */ if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); - isRemove = 1; } /* If this is an INSERT or UPDATE operation, insert the new record. */ if( nArg>1 && rc==SQLITE_OK ){ int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); @@ -5632,11 +5725,11 @@ rc = fts3InsertData(p, apVal, pRowid); if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ rc = FTS_CORRUPT_VTAB; } } - if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ + if( rc==SQLITE_OK ){ rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid); } if( rc==SQLITE_OK ){ assert( p->iPrevDocid==*pRowid ); rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); Index: ext/fts3/unicode/mkunicode.tcl ================================================================== --- ext/fts3/unicode/mkunicode.tcl +++ ext/fts3/unicode/mkunicode.tcl @@ -7,15 +7,16 @@ set lRange [list] set nRange 1 set iFirst [lindex $map 0 0] set cPrev [lindex $map 0 1] + set fPrev [lindex $map 0 2] foreach m [lrange $map 1 end] { - foreach {i c} $m {} + foreach {i c f} $m {} - if {$cPrev == $c} { + if {$cPrev == $c && $fPrev==$f} { for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} { if {[info exists tl_lookup_table($j)]==0} break } if {$j==$i} { @@ -27,27 +28,30 @@ } } lappend lRange [list $iFirst $nRange] lappend aChar $cPrev + lappend aFlag $fPrev set iFirst $i set cPrev $c + set fPrev $f set nRange 1 } lappend lRange [list $iFirst $nRange] lappend aChar $cPrev + lappend aFlag $fPrev puts "/*" puts "** If the argument is a codepoint corresponding to a lowercase letter" puts "** in the ASCII range with a diacritic added, return the codepoint" puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" - puts "static int ${::remove_diacritic}(int c)\{" + puts "static int ${::remove_diacritic}(int c, int bComplex)\{" puts " unsigned short aDia\[\] = \{" puts -nonewline " 0, " set i 1 foreach r $lRange { foreach {iCode nRange} $r {} @@ -57,18 +61,23 @@ puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]] puts -nonewline ", " } puts "" puts " \};" - puts " char aChar\[\] = \{" - puts -nonewline " '\\0', " + puts "#define HIBIT ((unsigned char)0x80)" + puts " unsigned char aChar\[\] = \{" + puts -nonewline " '\\0', " set i 1 - foreach c $aChar { - set str "'$c', " - if {$c == ""} { set str "'\\0', " } + foreach c $aChar f $aFlag { + if { $f } { + set str "'$c'|HIBIT, " + } else { + set str "'$c', " + } + if {$c == ""} { set str "'\\0', " } - if {($i % 12)==0} {puts "" ; puts -nonewline " " } + if {($i % 6)==0} {puts "" ; puts -nonewline " " } incr i puts -nonewline "$str" } puts "" puts " \};" @@ -85,19 +94,21 @@ }else{ iHi = iTest-1; } } assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);} + if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; + return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);} puts "\}" } proc print_isdiacritic {zFunc map} { set lCode [list] foreach m $map { - foreach {code char} $m {} + foreach {code char flag} $m {} + if {$flag} continue if {$code && $char == ""} { lappend lCode $code } } set lCode [lsort -integer $lCode] set iFirst [lindex $lCode 0] set iLast [lindex $lCode end] @@ -122,12 +133,12 @@ puts " unsigned int mask0 = [format "0x%08X" $i1];" puts " unsigned int mask1 = [format "0x%08X" $i2];" puts " if( c<$iFirst || c>$iLast ) return 0;" puts " return (c < $iFirst+32) ?" - puts " (mask0 & (1 << (c-$iFirst))) :" - puts " (mask1 & (1 << (c-$iFirst-32)));" + puts " (mask0 & ((unsigned int)1 << (c-$iFirst))) :" + puts " (mask1 & ((unsigned int)1 << (c-$iFirst-32)));" puts "\}" } #------------------------------------------------------------------------- @@ -470,11 +481,11 @@ puts "** Otherwise, return a copy of the argument." puts "**" puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" - puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{" + puts "int ${zFunc}\(int c, int eRemoveDiacritic)\{" set liOff [tl_generate_ioff_table $lRecord] tl_print_table_header foreach entry $lRecord { if {[tl_print_table_entry toggle $entry $liOff]} { @@ -514,11 +525,13 @@ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } - if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); + if( eRemoveDiacritic ){ + ret = ${::remove_diacritic}(ret, eRemoveDiacritic==2); + } } }] foreach entry $lHigh { tl_print_if_entry $entry @@ -526,10 +539,263 @@ puts "" puts " return ret;" puts "\}" } + +proc code {txt} { + set txt [string trimright $txt] + set txt [string trimleft $txt "\n"] + set n [expr {[string length $txt] - [string length [string trim $txt]]}] + set ret "" + foreach L [split $txt "\n"] { + append ret "[string range $L $n end]\n" + } + return [uplevel "subst -nocommands {$ret}"] +} + +proc intarray {lInt} { + set ret "" + set n [llength $lInt] + for {set i 0} {$i < $n} {incr i 10} { + append ret "\n " + foreach int [lrange $lInt $i [expr $i+9]] { + append ret [format "%-7s" "$int, "] + } + } + append ret "\n " + set ret +} + +proc categories_switch {Cvar first lSecond} { + upvar $Cvar C + set ret "" + append ret "case '$first':\n" + append ret " switch( zCat\[1\] ){\n" + foreach s $lSecond { + append ret " case '$s': aArray\[$C($first$s)\] = 1; break;\n" + } + append ret " case '*': \n" + foreach s $lSecond { + append ret " aArray\[$C($first$s)\] = 1;\n" + } + append ret " break;\n" + append ret " default: return 1;" + append ret " }\n" + append ret " break;\n" +} + +# Argument is a list. Each element of which is itself a list of two elements: +# +# * the codepoint +# * the category +# +# List elements are sorted in order of codepoint. +# +proc print_categories {lMap} { + set categories { + Cc Cf Cn Cs + Ll Lm Lo Lt Lu + Mc Me Mn + Nd Nl No + Pc Pd Pe Pf Pi Po Ps + Sc Sk Sm So + Zl Zp Zs + + LC Co + } + + for {set i 0} {$i < [llength $categories]} {incr i} { + set C([lindex $categories $i]) [expr 1+$i] + } + + set caseC [categories_switch C C {c f n s o}] + set caseL [categories_switch C L {l m o t u C}] + set caseM [categories_switch C M {c e n}] + set caseN [categories_switch C N {d l o}] + set caseP [categories_switch C P {c d e f i o s}] + set caseS [categories_switch C S {c k m o}] + set caseZ [categories_switch C Z {l p s}] + + set nCat [expr [llength [array names C]] + 1] + puts [code { + int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ + aArray[0] = 1; + switch( zCat[0] ){ + $caseC + $caseL + $caseM + $caseN + $caseP + $caseS + $caseZ + } + return 0; + } + }] + + set nRepeat 0 + set first [lindex $lMap 0 0] + set class [lindex $lMap 0 1] + set prev -1 + + set CASE(0) "Lu" + set CASE(1) "Ll" + + foreach m $lMap { + foreach {codepoint cl} $m {} + set codepoint [expr "0x$codepoint"] + if {$codepoint>=(1<<20)} continue + + set bNew 0 + if {$codepoint!=($prev+1)} { + set bNew 1 + } elseif { + $cl==$class || ($class=="LC" && $cl==$CASE([expr $nRepeat & 0x01])) + } { + incr nRepeat + } elseif {$class=="Lu" && $nRepeat==1 && $cl=="Ll"} { + set class LC + incr nRepeat + } else { + set bNew 1 + } + if {$bNew} { + lappend lEntries [list $first $class $nRepeat] + set nRepeat 1 + set first $codepoint + set class $cl + } + set prev $codepoint + } + if {$nRepeat>0} { + lappend lEntries [list $first $class $nRepeat] + } + + set aBlock [list 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + set aMap [list] + foreach e $lEntries { + foreach {cp class nRepeat} $e {} + set block [expr ($cp>>16)] + if {$block>0 && [lindex $aBlock $block]==0} { + for {set i 1} {$i<=$block} {incr i} { + if {[lindex $aBlock $i]==0} { + lset aBlock $i [llength $aMap] + } + } + } + lappend aMap [expr {$cp & 0xFFFF}] + lappend aData [expr {($nRepeat << 5) + $C($class)}] + } + for {set i 1} {$i<[llength $aBlock]} {incr i} { + if {[lindex $aBlock $i]==0} { + lset aBlock $i [llength $aMap] + } + } + + set aBlockArray [intarray $aBlock] + set aMapArray [intarray $aMap] + set aDataArray [intarray $aData] + puts [code { + static u16 aFts5UnicodeBlock[] = {$aBlockArray}; + static u16 aFts5UnicodeMap[] = {$aMapArray}; + static u16 aFts5UnicodeData[] = {$aDataArray}; + + int sqlite3Fts5UnicodeCategory(u32 iCode) { + int iRes = -1; + int iHi; + int iLo; + int ret; + u16 iKey; + + if( iCode>=(1<<20) ){ + return 0; + } + iLo = aFts5UnicodeBlock[(iCode>>16)]; + iHi = aFts5UnicodeBlock[1+(iCode>>16)]; + iKey = (iCode & 0xFFFF); + while( iHi>iLo ){ + int iTest = (iHi + iLo) / 2; + assert( iTest>=iLo && iTest=aFts5UnicodeMap[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest; + } + } + + if( iRes<0 ) return 0; + if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; + ret = aFts5UnicodeData[iRes] & 0x1F; + if( ret!=$C(LC) ) return ret; + return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? $C(Ll) : $C(Lu); + } + + void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ + int i = 0; + int iTbl = 0; + while( i<128 ){ + int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; + int n = (aFts5UnicodeData[iTbl] >> 5) + i; + for(; i<128 && i" puts "" puts "int main(int argc, char **argv)\{" - puts " int r1, r2;" + puts " int r1, r2, r3;" puts " int code;" + puts " r3 = 0;" puts " r1 = isalnum_test(&code);" puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);" puts " else printf(\"isalnum(): test passed\\n\");" puts " r2 = fold_test(&code);" puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);" puts " else printf(\"fold(): test passed\\n\");" - puts " return (r1 || r2);" + if {$::generate_fts5_code} { + puts " r3 = categories_test(&code);" + puts " if( r3 ) printf(\"categories(): Problem with code %d\\n\",code);" + puts " else printf(\"categories(): test passed\\n\");" + } + puts " return (r1 || r2 || r3);" puts "\}" } # Proces the command line arguments. Exit early if they are not to # our liking. @@ -648,15 +920,23 @@ } } } print_fileheader + +if {$::generate_test_code} { + puts "typedef unsigned short int u16;" + puts "typedef unsigned char u8;" + puts "#include " +} # Print the isalnum() function to stdout. # set lRange [an_load_separator_ranges] -print_isalnum ${function_prefix}UnicodeIsalnum $lRange +if {$generate_fts5_code==0} { + print_isalnum ${function_prefix}UnicodeIsalnum $lRange +} # Leave a gap between the two generated C functions. # puts "" puts "" @@ -674,21 +954,30 @@ puts "" # Print the fold() function to stdout. # print_fold ${function_prefix}UnicodeFold + +if {$generate_fts5_code} { + puts "" + puts "" + print_categories [cc_load_unicodedata_text ${unicodedata.txt}] +} # Print the test routines and main() function to stdout, if -test # was specified. # if {$::generate_test_code} { - print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange + if {$generate_fts5_code==0} { + print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange + } print_fold_test ${function_prefix}UnicodeFold $mappings + print_test_categories [cc_load_unicodedata_text ${unicodedata.txt}] print_test_main } if {$generate_fts5_code} { # no-op } else { puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" } Index: ext/fts3/unicode/parseunicode.tcl ================================================================== --- ext/fts3/unicode/parseunicode.tcl +++ ext/fts3/unicode/parseunicode.tcl @@ -5,15 +5,27 @@ # diacritical marks from a unicode string. Each mapping is itself a list # consisting of two elements - the unicode codepoint and the single ASCII # character that it should be replaced with, or an empty string if the # codepoint should simply be removed from the input. Examples: # -# { 224 a } (replace codepoint 224 to "a") -# { 769 "" } (remove codepoint 769 from input) +# { 224 a 0 } (replace codepoint 224 to "a") +# { 769 "" 0 } (remove codepoint 769 from input) # # Mappings are only returned for non-upper case codepoints. It is assumed # that the input has already been folded to lower case. +# +# The third value in the list is always either 0 or 1. 0 if the +# UnicodeData.txt file maps the codepoint to a single ASCII character and +# a diacritic, or 1 if the mapping is indirect. For example, consider the +# two entries: +# +# 1ECD;LATIN SMALL LETTER O WITH DOT BELOW;Ll;0;L;006F 0323;;;;N;;;1ECC;;1ECC +# 1ED9;LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW;Ll;0;L;1ECD 0302;;;;N;;;1ED8;;1ED8 +# +# The first codepoint is a direct mapping (as 006F is ASCII and 0323 is a +# diacritic). The second is an indirect mapping, as it maps to the +# first codepoint plus 0302 (a diacritic). # proc rd_load_unicodedata_text {zName} { global tl_lookup_table set fd [open $zName] @@ -51,22 +63,33 @@ set iCode [expr "0x$code"] set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] + # Filter out upper-case characters, as they will be mapped to their + # lower-case equivalents before this data is used. if {[info exists tl_lookup_table($iCode)]} continue + + # Check if this is an indirect mapping. If so, set bIndirect to true + # and change $iAscii to the indirectly mappped ASCII character. + set bIndirect 0 + if {[info exists dia($iDia)] && [info exists mapping($iAscii)]} { + set iAscii $mapping($iAscii) + set bIndirect 1 + } if { ($iAscii >= 97 && $iAscii <= 122) || ($iAscii >= 65 && $iAscii <= 90) } { - lappend lRet [list $iCode [string tolower [format %c $iAscii]]] + lappend lRet [list $iCode [string tolower [format %c $iAscii]] $bIndirect] + set mapping($iCode) $iAscii set dia($iDia) 1 } } foreach d [array names dia] { - lappend lRet [list $d ""] + lappend lRet [list $d "" 0] } set lRet [lsort -integer -index 0 $lRet] close $fd set lRet @@ -140,7 +163,43 @@ set d [string trim $d] if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } } } + +proc cc_load_unicodedata_text {zName} { + set fd [open $zName] + set lField { + code + character_name + general_category + canonical_combining_classes + bidirectional_category + character_decomposition_mapping + decimal_digit_value + digit_value + numeric_value + mirrored + unicode_1_name + iso10646_comment_field + uppercase_mapping + lowercase_mapping + titlecase_mapping + } + set lRet [list] + + while { ![eof $fd] } { + set line [gets $fd] + if {$line == ""} continue + + set fields [split $line ";"] + if {[llength $fields] != [llength $lField]} { error "parse error: $line" } + foreach $lField $fields {} + + lappend lRet [list $code $general_category] + } + + close $fd + set lRet +} Index: ext/fts5/fts5_hash.c ================================================================== --- ext/fts5/fts5_hash.c +++ ext/fts5/fts5_hash.c @@ -443,11 +443,13 @@ memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); for(iSlot=0; iSlotnSlot; iSlot++){ Fts5HashEntry *pIter; for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ - if( pTerm==0 || 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm) ){ + if( pTerm==0 + || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) + ){ Fts5HashEntry *pEntry = pIter; pEntry->pScanNext = 0; for(i=0; ap[i]; i++){ pEntry = fts5HashEntryMerge(pEntry, ap[i]); ap[i] = 0; Index: ext/fts5/test/fts5aa.test ================================================================== --- ext/fts5/test/fts5aa.test +++ ext/fts5/test/fts5aa.test @@ -589,9 +589,21 @@ do_execsql_test 22.1 { SELECT rowid FROM t9('a*') } {1} + +#------------------------------------------------------------------------- +do_execsql_test 25.0 { + CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%); +} +do_execsql_test 25.1 { + BEGIN; + INSERT INTO t13 VALUES('AAAA'); +SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB*'); + + END; } +} finish_test ADDED ext/rbu/rbucollate.test Index: ext/rbu/rbucollate.test ================================================================== --- /dev/null +++ ext/rbu/rbucollate.test @@ -0,0 +1,63 @@ +# 2018 March 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname [info script]] rbu_common.tcl] +set ::testprefix rbucollate + +ifcapable !icu_collations { + finish_test + return +} + +db close +sqlite3_shutdown +sqlite3_config_uri 1 +reset_db + +# Create a simple RBU database. That expects to write to a table: +# +# CREATE TABLE t1(a PRIMARY KEY, b, c); +# +proc create_rbu1 {filename} { + forcedelete $filename + sqlite3 rbu1 $filename + rbu1 eval { + CREATE TABLE data_t1(a, b, c, rbu_control); + INSERT INTO data_t1 VALUES('a', 'one', 1, 0); + INSERT INTO data_t1 VALUES('b', 'two', 2, 0); + INSERT INTO data_t1 VALUES('c', 'three', 3, 0); + } + rbu1 close + return $filename +} + +do_execsql_test 1.0 { + SELECT icu_load_collation('en_US', 'my-collate'); + CREATE TABLE t1(a COLLATE "my-collate" PRIMARY KEY, b, c); +} {{}} + +do_test 1.2 { + create_rbu1 testrbu.db + sqlite3rbu rbu test.db testrbu.db + rbu dbMain_eval { SELECT icu_load_collation('en_US', 'my-collate') } + rbu dbRbu_eval { SELECT icu_load_collation('en_US', 'my-collate') } + while 1 { + set rc [rbu step] + if {$rc!="SQLITE_OK"} break + } + rbu close + db eval { SELECT * FROM t1 } +} {a one 1 b two 2 c three 3} + +#forcedelete testrbu.db +finish_test + Index: ext/rbu/sqlite3rbu.c ================================================================== --- ext/rbu/sqlite3rbu.c +++ ext/rbu/sqlite3rbu.c @@ -397,11 +397,12 @@ struct rbu_vfs { sqlite3_vfs base; /* rbu VFS shim methods */ sqlite3_vfs *pRealVfs; /* Underlying VFS */ sqlite3_mutex *mutex; /* Mutex to protect pMain */ sqlite3rbu *pRbu; /* Owner RBU object */ - rbu_file *pMain; /* Linked list of main db files */ + rbu_file *pMain; /* List of main db files */ + rbu_file *pMainRbu; /* List of main db files with pRbu!=0 */ }; /* ** Each file opened by an rbu VFS is represented by an instance of ** the following structure. @@ -426,10 +427,11 @@ char *zDel; /* Delete this when closing file */ const char *zWal; /* Wal filename for this main db file */ rbu_file *pWalFd; /* Wal file descriptor for this main db */ rbu_file *pMainNext; /* Next MAIN_DB file */ + rbu_file *pMainRbuNext; /* Next MAIN_DB file with pRbu!=0 */ }; /* ** True for an RBU vacuum handle, or false otherwise. */ @@ -1804,11 +1806,11 @@ int bKey = sqlite3_column_int(pXInfo, 5); if( bKey ){ int iCid = sqlite3_column_int(pXInfo, 1); int bDesc = sqlite3_column_int(pXInfo, 3); const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); - zCols = rbuMPrintf(p, "%z%sc%d %s COLLATE %s", zCols, zComma, + zCols = rbuMPrintf(p, "%z%sc%d %s COLLATE %Q", zCols, zComma, iCid, pIter->azTblType[iCid], zCollate ); zPk = rbuMPrintf(p, "%z%sc%d%s", zPk, zComma, iCid, bDesc?" DESC":""); zComma = ", "; } @@ -1865,11 +1867,11 @@ if( pIter->eType==RBU_PK_IPK && pIter->abTblPk[iCol] ){ /* If the target table column is an "INTEGER PRIMARY KEY", add ** "PRIMARY KEY" to the imposter table column declaration. */ zPk = "PRIMARY KEY "; } - zSql = rbuMPrintf(p, "%z%s\"%w\" %s %sCOLLATE %s%s", + zSql = rbuMPrintf(p, "%z%s\"%w\" %s %sCOLLATE %Q%s", zSql, zComma, zCol, pIter->azTblType[iCol], zPk, zColl, (pIter->abNotNull[iCol] ? " NOT NULL" : "") ); zComma = ", "; } @@ -4013,10 +4015,73 @@ pFd->sz = nNew; assert( pRbu->szTemp>=0 ); if( pRbu->szTempLimit && pRbu->szTemp>pRbu->szTempLimit ) return SQLITE_FULL; return SQLITE_OK; } + +/* +** Add an item to the main-db lists, if it is not already present. +** +** There are two main-db lists. One for all file descriptors, and one +** for all file descriptors with rbu_file.pDb!=0. If the argument has +** rbu_file.pDb!=0, then it is assumed to already be present on the +** main list and is only added to the pDb!=0 list. +*/ +static void rbuMainlistAdd(rbu_file *p){ + rbu_vfs *pRbuVfs = p->pRbuVfs; + rbu_file *pIter; + assert( (p->openFlags & SQLITE_OPEN_MAIN_DB) ); + sqlite3_mutex_enter(pRbuVfs->mutex); + if( p->pRbu==0 ){ + for(pIter=pRbuVfs->pMain; pIter; pIter=pIter->pMainNext); + p->pMainNext = pRbuVfs->pMain; + pRbuVfs->pMain = p; + }else{ + for(pIter=pRbuVfs->pMainRbu; pIter && pIter!=p; pIter=pIter->pMainRbuNext){} + if( pIter==0 ){ + p->pMainRbuNext = pRbuVfs->pMainRbu; + pRbuVfs->pMainRbu = p; + } + } + sqlite3_mutex_leave(pRbuVfs->mutex); +} + +/* +** Remove an item from the main-db lists. +*/ +static void rbuMainlistRemove(rbu_file *p){ + rbu_file **pp; + sqlite3_mutex_enter(p->pRbuVfs->mutex); + for(pp=&p->pRbuVfs->pMain; *pp && *pp!=p; pp=&((*pp)->pMainNext)){} + if( *pp ) *pp = p->pMainNext; + p->pMainNext = 0; + for(pp=&p->pRbuVfs->pMainRbu; *pp && *pp!=p; pp=&((*pp)->pMainRbuNext)){} + if( *pp ) *pp = p->pMainRbuNext; + p->pMainRbuNext = 0; + sqlite3_mutex_leave(p->pRbuVfs->mutex); +} + +/* +** Given that zWal points to a buffer containing a wal file name passed to +** either the xOpen() or xAccess() VFS method, search the main-db list for +** a file-handle opened by the same database connection on the corresponding +** database file. +** +** If parameter bRbu is true, only search for file-descriptors with +** rbu_file.pDb!=0. +*/ +static rbu_file *rbuFindMaindb(rbu_vfs *pRbuVfs, const char *zWal, int bRbu){ + rbu_file *pDb; + sqlite3_mutex_enter(pRbuVfs->mutex); + if( bRbu ){ + for(pDb=pRbuVfs->pMainRbu; pDb && pDb->zWal!=zWal; pDb=pDb->pMainRbuNext){} + }else{ + for(pDb=pRbuVfs->pMain; pDb && pDb->zWal!=zWal; pDb=pDb->pMainNext){} + } + sqlite3_mutex_leave(pRbuVfs->mutex); + return pDb; +} /* ** Close an rbu file. */ static int rbuVfsClose(sqlite3_file *pFile){ @@ -4031,21 +4096,18 @@ sqlite3_free(p->apShm); p->apShm = 0; sqlite3_free(p->zDel); if( p->openFlags & SQLITE_OPEN_MAIN_DB ){ - rbu_file **pp; - sqlite3_mutex_enter(p->pRbuVfs->mutex); - for(pp=&p->pRbuVfs->pMain; *pp!=p; pp=&((*pp)->pMainNext)); - *pp = p->pMainNext; - sqlite3_mutex_leave(p->pRbuVfs->mutex); + rbuMainlistRemove(p); rbuUnlockShm(p); p->pReal->pMethods->xShmUnmap(p->pReal, 0); } else if( (p->openFlags & SQLITE_OPEN_DELETEONCLOSE) && p->pRbu ){ rbuUpdateTempSize(p, 0); } + assert( p->pMainNext==0 && p->pRbuVfs->pMain!=p ); /* Close the underlying file handle */ rc = p->pReal->pMethods->xClose(p->pReal); return rc; } @@ -4300,10 +4362,13 @@ rc = SQLITE_ERROR; pRbu->zErrmsg = sqlite3_mprintf("rbu/zipvfs setup error"); }else if( rc==SQLITE_NOTFOUND ){ pRbu->pTargetFd = p; p->pRbu = pRbu; + if( p->openFlags & SQLITE_OPEN_MAIN_DB ){ + rbuMainlistAdd(p); + } if( p->pWalFd ) p->pWalFd->pRbu = pRbu; rc = SQLITE_OK; } } return rc; @@ -4461,24 +4526,10 @@ rc = p->pReal->pMethods->xShmUnmap(p->pReal, delFlag); } return rc; } -/* -** Given that zWal points to a buffer containing a wal file name passed to -** either the xOpen() or xAccess() VFS method, return a pointer to the -** file-handle opened by the same database connection on the corresponding -** database file. -*/ -static rbu_file *rbuFindMaindb(rbu_vfs *pRbuVfs, const char *zWal){ - rbu_file *pDb; - sqlite3_mutex_enter(pRbuVfs->mutex); - for(pDb=pRbuVfs->pMain; pDb && pDb->zWal!=zWal; pDb=pDb->pMainNext){} - sqlite3_mutex_leave(pRbuVfs->mutex); - return pDb; -} - /* ** A main database named zName has just been opened. The following ** function returns a pointer to a buffer owned by SQLite that contains ** the name of the *-wal file this db connection will use. SQLite ** happens to pass a pointer to this buffer when using xAccess() @@ -4553,11 +4604,11 @@ ** happens to pass a pointer to this buffer when using xAccess() ** or xOpen() to operate on the *-wal file. */ pFd->zWal = rbuMainToWal(zName, flags); } else if( flags & SQLITE_OPEN_WAL ){ - rbu_file *pDb = rbuFindMaindb(pRbuVfs, zName); + rbu_file *pDb = rbuFindMaindb(pRbuVfs, zName, 0); if( pDb ){ if( pDb->pRbu && pDb->pRbu->eStage==RBU_STAGE_OAL ){ /* This call is to open a *-wal file. Intead, open the *-oal. This ** code ensures that the string passed to xOpen() is terminated by a ** pair of '\0' bytes in case the VFS attempts to extract a URI @@ -4605,14 +4656,11 @@ /* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods ** pointer and, if the file is a main database file, link it into the ** mutex protected linked list of all such files. */ pFile->pMethods = &rbuvfs_io_methods; if( flags & SQLITE_OPEN_MAIN_DB ){ - sqlite3_mutex_enter(pRbuVfs->mutex); - pFd->pMainNext = pRbuVfs->pMain; - pRbuVfs->pMain = pFd; - sqlite3_mutex_leave(pRbuVfs->mutex); + rbuMainlistAdd(pFd); } }else{ sqlite3_free(pFd->zDel); } @@ -4656,11 +4704,11 @@ ** causing SQLite to call xOpen() to open it. This call will also ** be intercepted (see the rbuVfsOpen() function) and the *-oal ** file opened instead. */ if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ - rbu_file *pDb = rbuFindMaindb(pRbuVfs, zPath); + rbu_file *pDb = rbuFindMaindb(pRbuVfs, zPath, 1); if( pDb && pDb->pRbu && pDb->pRbu->eStage==RBU_STAGE_OAL ){ if( *pResOut ){ rc = SQLITE_CANTOPEN; }else{ sqlite3_int64 sz = 0; Index: ext/rbu/test_rbu.c ================================================================== --- ext/rbu/test_rbu.c +++ ext/rbu/test_rbu.c @@ -79,10 +79,11 @@ {"state", 2, ""}, /* 7 */ {"progress", 2, ""}, /* 8 */ {"close_no_error", 2, ""}, /* 9 */ {"temp_size_limit", 3, "LIMIT"}, /* 10 */ {"temp_size", 2, ""}, /* 11 */ + {"dbRbu_eval", 3, "SQL"}, /* 12 */ {0,0,0} }; int iCmd; if( objc<2 ){ @@ -144,12 +145,13 @@ Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); ret = (rc==SQLITE_OK ? TCL_OK : TCL_ERROR); break; } - case 4: /* dbMain_eval */ { - sqlite3 *db = sqlite3rbu_db(pRbu, 0); + case 12: /* dbRbu_eval */ + case 4: /* dbMain_eval */ { + sqlite3 *db = sqlite3rbu_db(pRbu, (iCmd==12)); int rc = sqlite3_exec(db, Tcl_GetString(objv[2]), 0, 0, 0); if( rc!=SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3_errmsg(db), -1)); ret = TCL_ERROR; } Index: ext/rtree/rtree.c ================================================================== --- ext/rtree/rtree.c +++ ext/rtree/rtree.c @@ -3524,10 +3524,11 @@ } return rc; } +#if defined(SQLITE_TEST) /* ** Implementation of a scalar function that decodes r-tree nodes to ** human readable strings. This can be used for debugging and analysis. ** ** The scalar function takes two arguments: (1) the number of dimensions @@ -3585,10 +3586,11 @@ } } sqlite3_result_text(ctx, zText, -1, sqlite3_free); } +#endif /* This routine implements an SQL function that returns the "depth" parameter ** from the front of a blob that is an r-tree node. For example: ** ** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; @@ -4071,13 +4073,15 @@ ** virtual table module "rtree" and the debugging/analysis scalar ** function "rtreenode". */ int sqlite3RtreeInit(sqlite3 *db){ const int utf8 = SQLITE_UTF8; - int rc; + int rc = SQLITE_OK; +#if defined(SQLITE_TEST) rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); +#endif if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); } if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "rtreecheck", -1, utf8, 0,rtreecheck, 0,0); Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -3564,10 +3564,11 @@ ** deplete the limited store of lookaside memory. Future versions of ** SQLite may act on this hint differently. ** */ #define SQLITE_PREPARE_PERSISTENT 0x01 +#define SQLITE_PREPARE_NO_VTAB 0x04 /* ** CAPI3REF: Compiling An SQL Statement ** KEYWORDS: {SQL statement compiler} ** METHOD: sqlite3 @@ -4748,11 +4749,12 @@ ** These constants may be ORed together with the ** [SQLITE_UTF8 | preferred text encoding] as the fourth argument ** to [sqlite3_create_function()], [sqlite3_create_function16()], or ** [sqlite3_create_function_v2()]. */ -#define SQLITE_DETERMINISTIC 0x800 +#define SQLITE_DETERMINISTIC 0x000000800 +#define SQLITE_DIRECTONLY 0x000080000 /* ** CAPI3REF: Deprecated Functions ** DEPRECATED ** ADDED test/fts3corrupt4.test Index: test/fts3corrupt4.test ================================================================== --- /dev/null +++ test/fts3corrupt4.test @@ -0,0 +1,147 @@ +# 2006 September 9 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# +# $Id: fts3aa.test,v 1.1 2007/08/20 17:38:42 shess Exp $ +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts3corrupt4 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + BEGIN; + CREATE VIRTUAL TABLE ft USING fts3; + INSERT INTO ft VALUES('aback'); + INSERT INTO ft VALUES('abaft'); + INSERT INTO ft VALUES('abandon'); + COMMIT; +} + +proc blob {a} { binary decode hex $a } +db func blob blob + +do_execsql_test 1.1 { + SELECT quote(root) FROM ft_segdir; +} {X'0005616261636B03010200030266740302020003046E646F6E03030200'} + +do_execsql_test 1.2 { + UPDATE ft_segdir SET root = blob( + '0005616261636B03010200 FFFFFFFF0702 66740302020003046E646F6E03030200' + ); +} + +do_catchsql_test 1.3 { + SELECT * FROM ft WHERE ft MATCH 'abandon'; +} {1 {database disk image is malformed}} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 2.0.0 { + CREATE VIRTUAL TABLE ft USING fts3; + INSERT INTO ft(ft) VALUES('nodesize=32'); +} +do_test 2.0.1 { + for {set i 0} {$i < 12} {incr i} { + execsql { + BEGIN; + INSERT INTO ft VALUES('abc' || $i); + INSERT INTO ft VALUES('abc' || $i || 'x' ); + INSERT INTO ft VALUES('abc' || $i || 'xx' ); + COMMIT + } + } + execsql { + SELECT count(*) FROM ft_segdir; + SELECT count(*) FROM ft_segments; + } +} {12 0} + +do_execsql_test 2.1 { + INSERT INTO ft(ft) VALUES('merge=1,4'); + SELECT count(*) FROM ft_segdir; + SELECT count(*) FROM ft_segments; +} {12 3} + +do_execsql_test 2.2 { + SELECT quote(block) FROM ft_segments WHERE blockid=2 +} {X'00056162633130031F0200'} + +db func blob blob +do_execsql_test 2.3.1 { + UPDATE ft_segments SET block = + blob('00056162633130031F0200 FFFFFFFF07FF55 66740302020003046E646F6E03030200') + WHERE blockid=2; +} {} +do_catchsql_test 2.3.2 { + INSERT INTO ft(ft) VALUES('merge=1,4'); +} {1 {database disk image is malformed}} + +do_execsql_test 2.4.1 { + UPDATE ft_segments SET block = + blob('00056162633130031F0200 02FFFFFFFF07 66740302020003046E646F6E03030200') + WHERE blockid=2; +} {} +do_catchsql_test 2.4.2 { + INSERT INTO ft(ft) VALUES('merge=1,4'); +} {1 {database disk image is malformed}} + +do_execsql_test 2.5.1 { + UPDATE ft_segments SET block = + blob('00056162633130031F0200 0202 6674 FFFFFF070302020003046E646F6E030200') + WHERE blockid=2; +} {} +do_catchsql_test 2.5.2 { + INSERT INTO ft(ft) VALUES('merge=1,4'); +} {1 {database disk image is malformed}} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 3.0.0 { + CREATE VIRTUAL TABLE ft USING fts3; + INSERT INTO ft(ft) VALUES('nodesize=32'); +} +do_test 3.0.1 { + execsql BEGIN + for {set i 0} {$i < 20} {incr i} { + execsql { INSERT INTO ft VALUES('abc' || $i) } + } + execsql { + COMMIT; + SELECT count(*) FROM ft_segdir; + SELECT count(*) FROM ft_segments; + } +} {1 5} + +do_execsql_test 3.1 { + SELECT quote(root) FROM ft_segdir +} {X'0101056162633132040136030132030136'} + +db func blob blob +do_execsql_test 3.2 { + UPDATE ft_segdir + SET root = blob('0101056162633132FFFFFFFF070236030132030136'); +} + +do_catchsql_test 3.1 { + SELECT * FROM ft WHERE ft MATCH 'abc20' +} {1 {database disk image is malformed}} + +finish_test + + Index: test/fts3expr.test ================================================================== --- test/fts3expr.test +++ test/fts3expr.test @@ -407,11 +407,11 @@ do_test fts3expr-5.1 { catchsql { SELECT fts3_exprtest('simple', 'a b') } } {1 {Usage: fts3_exprtest(tokenizer, expr, col1, ...}} do_test fts3expr-5.2 { catchsql { SELECT fts3_exprtest('doesnotexist', 'a b', 'c') } -} {1 {No such tokenizer module}} +} {1 {unknown tokenizer: doesnotexist}} do_test fts3expr-5.3 { catchsql { SELECT fts3_exprtest('simple', 'a b OR', 'c') } } {1 {Error parsing expression}} #------------------------------------------------------------------------ Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -253,10 +253,11 @@ fts3ae.test fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3auto.test fts3aux1.test fts3aux2.test fts3b.test fts3comp1.test fts3conf.test fts3corrupt2.test fts3corrupt.test + fts3corrupt4.test fts3cov.test fts3c.test fts3defer2.test fts3defer3.test fts3defer.test fts3drop.test fts3d.test fts3e.test fts3expr2.test fts3expr3.test fts3expr4.test fts3expr5.test fts3expr.test fts3fault2.test fts3fault.test fts3first.test fts3join.test fts3malloc.test fts3matchinfo.test fts3near.test