Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -1079,10 +1079,11 @@ char *zPrefix = 0; /* Prefix parameter value (or NULL) */ char *zCompress = 0; /* compress=? parameter (or NULL) */ char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ char *zContent = 0; /* content=? parameter (or NULL) */ char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ + char *zLanguageidBits = 0; /* languageid_bits=? parameter (or NULL) */ assert( strlen(argv[0])==4 ); assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) ); @@ -1123,17 +1124,18 @@ else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ struct Fts4Option { const char *zOpt; int nOpt; } aFts4Opt[] = { - { "matchinfo", 9 }, /* 0 -> MATCHINFO */ - { "prefix", 6 }, /* 1 -> PREFIX */ - { "compress", 8 }, /* 2 -> COMPRESS */ - { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ - { "order", 5 }, /* 4 -> ORDER */ - { "content", 7 }, /* 5 -> CONTENT */ - { "languageid", 10 } /* 6 -> LANGUAGEID */ + { "matchinfo", 9 }, /* 0 -> MATCHINFO */ + { "prefix", 6 }, /* 1 -> PREFIX */ + { "compress", 8 }, /* 2 -> COMPRESS */ + { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ + { "order", 5 }, /* 4 -> ORDER */ + { "content", 7 }, /* 5 -> CONTENT */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "languageid_bits", 15 } /* 7 -> LANGUAGEID_BITS */ }; int iOpt; if( !zVal ){ rc = SQLITE_NOMEM; @@ -1195,10 +1197,17 @@ assert( iOpt==6 ); sqlite3_free(zLanguageid); zLanguageid = zVal; zVal = 0; break; + + case 7: /* LANGUAGEID_BITS */ + assert( iOpt==7 ); + sqlite3_free(zLanguageidBits); + zLanguageidBits = zVal; + zVal = 0; + break; } } sqlite3_free(zVal); } } @@ -1290,10 +1299,19 @@ p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */ p->zContentTbl = zContent; p->zLanguageid = zLanguageid; zContent = 0; zLanguageid = 0; + if( zLanguageidBits && p->zLanguageid && p->zContentTbl==0 ){ + p->nLanguageidBits = atoi(zLanguageidBits); + if( p->nLanguageidBits>30 || p->nLanguageidBits<0 ){ + rc = SQLITE_ERROR; + *pzErr = sqlite3_mprintf("languageid_bits parameter out of range"); + goto fts3_init_out; + } + } + TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); @@ -1363,10 +1381,11 @@ sqlite3_free(aIndex); sqlite3_free(zCompress); sqlite3_free(zUncompress); sqlite3_free(zContent); sqlite3_free(zLanguageid); + sqlite3_free(zLanguageidBits); sqlite3_free((void *)aCol); if( rc!=SQLITE_OK ){ if( p ){ fts3DisconnectMethod((sqlite3_vtab *)p); }else if( pTokenizer ){ @@ -1424,16 +1443,16 @@ */ pInfo->idxNum = FTS3_FULLSCAN_SEARCH; pInfo->estimatedCost = 500000; for(i=0; inConstraint; i++){ struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; + int iCol = pCons->iColumn; if( pCons->usable==0 ) continue; /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( iCons<0 - && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + && (iCol<0 || (iCol==p->nColumn+1 && p->nLanguageidBits==0)) ){ pInfo->idxNum = FTS3_DOCID_SEARCH; pInfo->estimatedCost = 1.0; iCons = i; } @@ -1472,13 +1491,17 @@ } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or ** docid) order. Both ascending and descending are possible. */ + assert( pInfo->orderByConsumed==0 ); if( pInfo->nOrderBy==1 ){ struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; - if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ + if( pOrder->iColumn<0 || ( + (pOrder->iColumn==p->nColumn+1) + && (pInfo->idxNum>=FTS3_FULLTEXT_SEARCH || p->nLanguageidBits==0) + )){ if( pOrder->desc ){ pInfo->idxStr = "DESC"; }else{ pInfo->idxStr = "ASC"; } @@ -3060,14 +3083,14 @@ /* The column value supplied by SQLite must be in range. */ assert( iCol>=0 && iCol<=p->nColumn+2 ); if( iCol==p->nColumn+1 ){ - /* This call is a request for the "docid" column. Since "docid" is an - ** alias for "rowid", use the xRowid() method to obtain the value. - */ - sqlite3_result_int64(pCtx, pCsr->iPrevId); + /* This call is a request for the "docid" column. The value currently + ** stored in pCsr->iPrevId is a rowid. Transform this to a docid and + ** return it. */ + sqlite3_result_int64(pCtx, sqlite3Fts3RowidToDocid(p, pCsr->iPrevId)); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. ** Return a blob which is a pointer to the cursor. */ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -207,10 +207,11 @@ int nColumn; /* number of named columns in virtual table */ char **azColumn; /* column names. malloced */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ char *zContentTbl; /* content=xxx option, or NULL */ char *zLanguageid; /* languageid=xxx option, or NULL */ + int nLanguageidBits; /* languageid_bits=N option, or 0 */ u8 bAutoincrmerge; /* True if automerge=1 */ u32 nLeafAdd; /* Number of leaf blocks added this trans */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. @@ -419,12 +420,13 @@ #define FTSQUERY_NOT 2 #define FTSQUERY_AND 3 #define FTSQUERY_OR 4 #define FTSQUERY_PHRASE 5 - /* fts3_write.c */ +i64 sqlite3Fts3DocidToRowid(Fts3Table *p, i64 iDocid, int iLangid); +i64 sqlite3Fts3RowidToDocid(Fts3Table *p, i64 iRowid); int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, Index: ext/fts3/fts3_write.c ================================================================== --- ext/fts3/fts3_write.c +++ ext/fts3/fts3_write.c @@ -485,10 +485,28 @@ rc = sqlite3_reset(pStmt); } *pRC = rc; } +static void fts3SqlExecI64( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS3 table */ + int eStmt, /* Index of statement to evaluate */ + i64 iVal +){ + sqlite3_stmt *pStmt; + int rc; + if( *pRC ) return; + rc = fts3SqlStmt(p, eStmt, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iVal); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + *pRC = rc; +} + /* ** This function ensures that the caller has obtained an exclusive ** shared-cache table-lock on the %_segdir table. This is required before ** writing data to the fts3 table. If this lock is not acquired first, then @@ -925,24 +943,27 @@ ** apVal[p->nColumn+4] Hidden languageid column */ static int fts3InsertData( Fts3Table *p, /* Full-text table */ sqlite3_value **apVal, /* Array of values to insert */ - sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ + sqlite3_int64 *piRowid, /* OUT: Rowid for row just inserted */ + i64 iRowid /* Explicit rowid, if piRowid==NULL */ ){ int rc; /* Return code */ sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ if( p->zContentTbl ){ - sqlite3_value *pRowid = apVal[p->nColumn+3]; + sqlite3_value *pRowid; + assert( p->nLanguageidBits==0 && piRowid ); + pRowid = apVal[p->nColumn+3]; if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ pRowid = apVal[1]; } if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ return SQLITE_CONSTRAINT; } - *piDocid = sqlite3_value_int64(pRowid); + *piRowid = sqlite3_value_int64(pRowid); return SQLITE_OK; } /* Locate the statement handle used to insert data into the %_content ** table. The SQL for this statement is: @@ -951,15 +972,20 @@ ** ** The statement features N '?' variables, where N is the number of user ** defined columns in the FTS3 table, plus one for the docid field. */ rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); - if( rc==SQLITE_OK && p->zLanguageid ){ - rc = sqlite3_bind_int( - pContentInsert, p->nColumn+2, - sqlite3_value_int(apVal[p->nColumn+4]) - ); + if( rc==SQLITE_OK ){ + if( piRowid==0 ){ + sqlite3_bind_int64(pContentInsert, 1, iRowid); + } + if( p->zLanguageid ){ + rc = sqlite3_bind_int( + pContentInsert, p->nColumn+2, + sqlite3_value_int(apVal[p->nColumn+4]) + ); + } } if( rc!=SQLITE_OK ) return rc; /* There is a quirk here. The users INSERT statement may have specified ** a value for the "rowid" field, for the "docid" field, or for both. @@ -969,28 +995,29 @@ ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); ** ** In FTS3, this is an error. It is an error to specify non-NULL values ** for both docid and some other rowid alias. */ - if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ - if( SQLITE_NULL==sqlite3_value_type(apVal[0]) - && SQLITE_NULL!=sqlite3_value_type(apVal[1]) - ){ - /* A rowid/docid conflict. */ - return SQLITE_ERROR; - } + assert( p->nLanguageidBits==0 || piRowid==0 + || sqlite3_value_type(apVal[1])!=SQLITE_NULL + ); + if( piRowid && p->nLanguageidBits==0 + && SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) + ){ rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); if( rc!=SQLITE_OK ) return rc; } - /* Execute the statement to insert the record. Set *piDocid to the + /* Execute the statement to insert the record. Set *pRowid to the ** new docid value. */ sqlite3_step(pContentInsert); rc = sqlite3_reset(pContentInsert); - *piDocid = sqlite3_last_insert_rowid(p->db); + if( piRowid ){ + *piRowid = sqlite3_last_insert_rowid(p->db); + } return rc; } @@ -1034,21 +1061,22 @@ ** full-text index. */ static void fts3DeleteTerms( int *pRC, /* Result code */ Fts3Table *p, /* The FTS table to delete from */ - sqlite3_value *pRowid, /* The docid to be deleted */ + i64 iRowid, /* The rowid to be deleted */ u32 *aSz, /* Sizes of deleted document written here */ int *pbFound /* OUT: Set to true if row really does exist */ ){ int rc; sqlite3_stmt *pSelect; assert( *pbFound==0 ); if( *pRC ) return; - rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); + rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, 0); if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pSelect, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pSelect) ){ int i; int iLangid = langidFromSelect(p, pSelect); rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ @@ -2344,20 +2372,21 @@ ** ** If successful, *pisEmpty is set to true if the table is empty except for ** document pRowid, or false otherwise, and SQLITE_OK is returned. If an ** error occurs, an SQLite error code is returned. */ -static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ +static int fts3IsEmpty(Fts3Table *p, i64 iRowid, int *pisEmpty){ sqlite3_stmt *pStmt; int rc; if( p->zContentTbl ){ /* If using the content=xxx option, assume the table is never empty */ *pisEmpty = 0; rc = SQLITE_OK; }else{ - rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); + rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, 0); if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pStmt) ){ *pisEmpty = sqlite3_column_int(pStmt, 0); } rc = sqlite3_reset(pStmt); } @@ -5195,21 +5224,21 @@ ** present in the FTS3 table. If it is, delete it and adjust the contents ** of subsiduary data structures accordingly. */ static int fts3DeleteByRowid( Fts3Table *p, - sqlite3_value *pRowid, + i64 iRowid, int *pnChng, /* IN/OUT: Decrement if row is deleted */ u32 *aSzDel ){ int rc = SQLITE_OK; /* Return code */ int bFound = 0; /* True if *pRowid really is in the table */ - fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); + fts3DeleteTerms(&rc, p, iRowid, aSzDel, &bFound); if( bFound && rc==SQLITE_OK ){ int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ - rc = fts3IsEmpty(p, pRowid, &isEmpty); + rc = fts3IsEmpty(p, iRowid, &isEmpty); if( rc==SQLITE_OK ){ if( isEmpty ){ /* Deleting this row means the whole table is empty. In this case ** delete the contents of all three tables and throw away any ** data in the pendingTerms hash table. */ @@ -5217,21 +5246,72 @@ *pnChng = 0; memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); }else{ *pnChng = *pnChng - 1; if( p->zContentTbl==0 ){ - fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); + fts3SqlExecI64(&rc, p, SQL_DELETE_CONTENT, iRowid); } if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); + fts3SqlExecI64(&rc, p, SQL_DELETE_DOCSIZE, iRowid); } } } } return rc; } + +/* +** Convert a docid (iDocid) and a language id (iLangid) to a rowid, +** according to the configured languageid_bits= value belonging to +** FTS table *p. +** +** The conversion is as follows: +** +** * The sign bit of iDocid becomes the sign bit of the rowid. +** +** * iLangid is converted to an unsigned integer and stored in +** the next most significant Fts3Table.nLanguageidBits bits +** of the returned rowid. +** +** * The least signficant (63-nLanguageidBits) of iDocid are +** copied to the (63-nLanguageidBits) least signifcant bits of +** the returned rowid. +*/ +i64 sqlite3Fts3DocidToRowid(Fts3Table *p, i64 iDocid, int iLangid){ + u64 iRet = iDocid; + + if( p->nLanguageidBits ){ + int iShift = (63 - p->nLanguageidBits); + u64 mask = ((((u64)1 << p->nLanguageidBits) - 1) << iShift); + + iRet &= ~mask; + iRet |= (u64)iLangid << iShift; + } + + assert( sqlite3Fts3RowidToDocid(p, (i64)iRet)==iDocid ); + return (i64)iRet; +} + +/* +** Convert a rowid (iRowid) to a docid according to the languageid_bits= +** value belonging to FTS table *p. +*/ +i64 sqlite3Fts3RowidToDocid(Fts3Table *p, i64 iRowid){ + u64 iRet = iRowid; + if( p->nLanguageidBits ){ + static const u64 signbit = ((u64)1 << 63); + u64 mask = ((((u64)1 << p->nLanguageidBits)-1) << (63-p->nLanguageidBits)); + + if( iRet & signbit ){ + iRet |= mask; + }else{ + iRet &= ~mask; + } + } + return (i64)iRet; +} /* ** This function does the work for the xUpdate method of FTS3 virtual ** tables. The schema of the virtual table being: ** @@ -5240,11 +5320,10 @@ ** HIDDEN, ** docid HIDDEN, ** HIDDEN ** ); ** -** */ int sqlite3Fts3UpdateMethod( sqlite3_vtab *pVtab, /* FTS3 vtab object */ int nArg, /* Size of argument array */ sqlite3_value **apVal, /* Array of arguments */ @@ -5255,10 +5334,11 @@ int isRemove = 0; /* True for an UPDATE or DELETE */ u32 *aSzIns = 0; /* Sizes of inserted documents */ u32 *aSzDel = 0; /* Sizes of deleted documents */ int nChng = 0; /* Net change in number of documents */ int bInsertDone = 0; + int iLangid = 0; assert( p->pSegments==0 ); assert( nArg==1 /* DELETE operations */ || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ @@ -5274,13 +5354,35 @@ ){ rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); goto update_out; } - if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ - rc = SQLITE_CONSTRAINT; - goto update_out; + /* If this is an INSERT or UPDATE, check that the new value for the + ** languageid is within range. A languageid can never be a negative + ** value. If the languageid_bits option was specified when this table + ** was created, it must also be less than (2 ^ nLanguageidBits). + ** + ** Also check that if a non-zero languageid_bits value was configured, + ** the specified rowid value must be NULL. + */ + if( nArg>1 ){ + i64 iLangid64 = sqlite3_value_int64(apVal[2 + p->nColumn + 2]); + if( iLangid64<0 + || (p->nLanguageidBits && iLangid64>=((i64)1<nLanguageidBits)) + ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } + iLangid = (int)iLangid64; + + if( p->nLanguageidBits + && sqlite3_value_type(apVal[0])==SQLITE_NULL + && sqlite3_value_type(apVal[1])!=SQLITE_NULL + ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } } /* Allocate space to hold the change in document sizes */ aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); if( aSzDel==0 ){ @@ -5302,41 +5404,76 @@ ** detect the conflict and return SQLITE_CONSTRAINT before beginning to ** modify the database file. */ if( nArg>1 && p->zContentTbl==0 ){ /* Find the value object that holds the new rowid value. */ - sqlite3_value *pNewRowid = apVal[3+p->nColumn]; - if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ - pNewRowid = apVal[1]; - } - - if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( - sqlite3_value_type(apVal[0])==SQLITE_NULL - || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) - )){ - /* The new rowid is not NULL (in this case the rowid will be - ** automatically assigned and there is no chance of a conflict), and - ** the statement is either an INSERT or an UPDATE that modifies the - ** rowid column. So if the conflict mode is REPLACE, then delete any - ** existing row with rowid=pNewRowid. - ** - ** Or, if the conflict mode is not REPLACE, insert the new record into - ** the %_content table. If we hit the duplicate rowid constraint (or any - ** other error) while doing so, return immediately. - ** - ** This branch may also run if pNewRowid contains a value that cannot - ** be losslessly converted to an integer. In this case, the eventual - ** call to fts3InsertData() (either just below or further on in this - ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is - ** invoked, it will delete zero rows (since no row will have - ** docid=$pNewRowid if $pNewRowid is not an integer value). - */ - if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ - rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); - }else{ - rc = fts3InsertData(p, apVal, pRowid); - bInsertDone = 1; + sqlite3_value *pNewDocid = apVal[3+p->nColumn]; + if( sqlite3_value_type(pNewDocid)==SQLITE_NULL ){ + if( p->nLanguageidBits ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } + pNewDocid = apVal[1]; + }else if( sqlite3_value_type(apVal[0])==SQLITE_NULL + && sqlite3_value_type(apVal[1])!=SQLITE_NULL + ){ + rc = SQLITE_ERROR; + goto update_out; + } + + if( sqlite3_value_type(pNewDocid)!=SQLITE_NULL ){ + int e = sqlite3_value_numeric_type(pNewDocid); + i64 iRowid = sqlite3_value_int64(pNewDocid); + + /* Check that the value specified by the user may be losslessly + ** converted to an integer. If not, return a "data mismatch" error. */ + if( (e!=SQLITE_INTEGER) + && (e!=SQLITE_FLOAT || (double)iRowid!=sqlite3_value_double(pNewDocid)) + ){ + rc = SQLITE_MISMATCH; + goto update_out; + } + + if( p->nLanguageidBits ){ + /* Check for an out-of-range docid value. */ + if( iRowid>=((i64)1 << (63 - p->nLanguageidBits)) + || iRowid<-1*((i64)1 << (63 - p->nLanguageidBits)) + ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } + + iRowid = sqlite3Fts3DocidToRowid(p, iRowid, iLangid); + } + + if( sqlite3_value_type(apVal[0])==SQLITE_NULL + || sqlite3_value_int64(apVal[0])!=iRowid + ){ + /* The new rowid is not NULL (in this case the rowid will be + ** automatically assigned and there is no chance of a conflict), and + ** the statement is either an INSERT or an UPDATE that modifies the + ** rowid column. So if the conflict mode is REPLACE, then delete any + ** existing row with rowid=pNewRowid. + ** + ** Or, if the conflict mode is not REPLACE, insert the new record into + ** the %_content table. If we hit the duplicate rowid constraint (or + ** any other error) while doing so, return immediately. + ** + ** This branch may also run if pNewRowid contains a value that cannot + ** be losslessly converted to an integer. In this case, the eventual + ** call to fts3InsertData() (either just below or further on in this + ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is + ** invoked, it will delete zero rows (since no row will have + ** docid=$pNewRowid if $pNewRowid is not an integer value). + */ + if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ + rc = fts3DeleteByRowid(p, iRowid, &nChng, aSzDel); + }else{ + rc = fts3InsertData(p, apVal, 0, iRowid); + bInsertDone = 1; + *pRowid = iRowid; + } } } } if( rc!=SQLITE_OK ){ goto update_out; @@ -5343,19 +5480,18 @@ } /* If this is a DELETE or UPDATE operation, remove the old record. */ if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); - rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); + rc = fts3DeleteByRowid(p, sqlite3_value_int64(apVal[0]), &nChng, aSzDel); isRemove = 1; } /* If this is an INSERT or UPDATE operation, insert the new record. */ if( nArg>1 && rc==SQLITE_OK ){ - int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); if( bInsertDone==0 ){ - rc = fts3InsertData(p, apVal, pRowid); + rc = fts3InsertData(p, apVal, pRowid, 0); if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ rc = FTS_CORRUPT_VTAB; } } if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ ADDED test/fts4langid2.test Index: test/fts4langid2.test ================================================================== --- /dev/null +++ test/fts4langid2.test @@ -0,0 +1,332 @@ +# 2012 March 01 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the languageid=xxx FTS4 option. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix fts4langid2 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +#------------------------------------------------------------------------- +# Test out-of-range values for the languageid_bits= parameter. +# +do_catchsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=31); +} {1 {languageid_bits parameter out of range}} + +do_catchsql_test 1.2 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=-1); +} {1 {languageid_bits parameter out of range}} + +do_catchsql_test 1.3 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=0); + CREATE VIRTUAL TABLE t2 USING fts4(languageid=lid, languageid_bits=30); +} {0 {}} + +do_execsql_test 1.4 { + DROP TABLE t1; + DROP TABLE t2; +} + +#------------------------------------------------------------------------- +# Test out-of-range values in the languageid column. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=8); + CREATE VIRTUAL TABLE t2 USING fts4(languageid=lid, languageid_bits=7); +} + +do_catchsql_test 2.2 { + INSERT INTO t1(docid, lid, content) VALUES(1, 256, 'abc def'); +} {1 {constraint failed}} + +do_catchsql_test 2.3 { + INSERT INTO t2(docid, lid, content) VALUES(1, 128, 'abc def'); +} {1 {constraint failed}} + +do_catchsql_test 2.3 { + INSERT INTO t1(docid, lid, content) VALUES(1, -1, 'abc def'); +} {1 {constraint failed}} + +do_execsql_test 2.4 { + DROP TABLE t1; + DROP TABLE t2; +} + +#------------------------------------------------------------------------- +# Test that if languageid_bits is set to a non-zero value it is +# not possible to specify a non-NULL rowid, even if it is the same +# as the docid. +# +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=4); + CREATE VIRTUAL TABLE t2 USING fts4(languageid=lid, languageid_bits=0); +} + +do_catchsql_test 3.2.1 { + INSERT INTO t1(rowid, lid, content) VALUES(1, 0, 'abc def'); +} {1 {constraint failed}} + +do_catchsql_test 3.2.2 { + INSERT INTO t2(rowid, lid, content) VALUES(1, 0, 'abc def'); +} {0 {}} + +do_catchsql_test 3.3 { + INSERT INTO t1(rowid, docid, lid, content) VALUES(2, 2, 0, 'abc def'); +} {1 {constraint failed}} + +do_catchsql_test 3.4 { + INSERT INTO t1(lid, content) VALUES(0, 'one two def'); +} {1 {constraint failed}} + +do_execsql_test 3.4 { + DROP TABLE t1; + DROP TABLE t2; +} + +#------------------------------------------------------------------------- +# Simple tests inserting data with multiple languageid values. +# +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=5); +} + +do_execsql_test 4.2 { + INSERT INTO t1 (docid, lid, content) VALUES(1, 0, '1 2 3'); + INSERT INTO t1 (docid, lid, content) VALUES(1, 1, '1 2 3 4'); +} + +do_execsql_test 4.3 { + SELECT docid, lid FROM t1; +} {1 0 1 1} + +do_execsql_test 4.4 { + SELECT docid, lid, content FROM t1 WHERE t1 MATCH '2'; +} {1 0 {1 2 3}} + +do_execsql_test 4.5 { + SELECT docid, lid, content FROM t1 WHERE t1 MATCH '2' AND lid=1; +} {1 1 {1 2 3 4}} + +do_execsql_test 4.6 { + UPDATE t1 SET content = 'x y z' || lid; + SELECT docid, lid FROM t1; +} {1 0 1 1} + +do_execsql_test 3.4 { + DROP TABLE t1; +} + +#------------------------------------------------------------------------- +# Tests for docid range boundary conditions. +# +for {set bits 1} {$bits <= 30} {incr bits} { + do_execsql_test 5.$bits.1 " + CREATE VIRTUAL TABLE t1 USING fts4(languageid=lid, languageid_bits=$bits); + " + + set max_docid [expr int(1<<(63-$bits))-1] + set min_docid [expr -1*int(1<<(63-$bits))] + set max_langid [expr (1<<$bits)-1] + set min_langid 0 + + + do_catchsql_test 5.$bits.2.1 { + INSERT INTO t1(docid, lid, content) VALUES($max_docid+1, 4, ''); + } {1 {constraint failed}} + do_catchsql_test 5.$bits.2.2 { + INSERT INTO t1(docid, lid, content) VALUES($min_docid-1, 4, ''); + } {1 {constraint failed}} + + do_test 5.$bits.3 { + foreach {a b c} " + $min_docid $min_langid {1 min min x} + $min_docid $max_langid {2 min max x} + $max_docid $min_langid {3 max min x} + $max_docid $max_langid {4 max max x} + " { + execsql { INSERT INTO t1(docid, lid, content) VALUES($a, $b, $c) } + } + } {} + + do_execsql_test 5.$bits.4.1 { + SELECT docid, lid, content FROM t1 ORDER BY content + } " + $min_docid $min_langid {1 min min x} + $min_docid $max_langid {2 min max x} + $max_docid $min_langid {3 max min x} + $max_docid $max_langid {4 max max x} + " + + do_execsql_test 5.$bits.4.2 { + SELECT docid, lid, content FROM t1 WHERE lid=$min_langid AND t1 MATCH 'x' + } " + $min_docid $min_langid {1 min min x} + $max_docid $min_langid {3 max min x} + " + + do_execsql_test 5.$bits.4.3 { + SELECT docid, lid, content FROM t1 WHERE lid=$max_langid AND t1 MATCH 'x' + } " + $min_docid $max_langid {2 min max x} + $max_docid $max_langid {4 max max x} + " + + do_execsql_test 5.$bits.4.4 { + SELECT docid, lid, content FROM t1 WHERE t1 MATCH '1' + } " + $min_docid $min_langid {1 min min x} + " + + do_execsql_test 5.$bits.5 { DROP TABLE t1 } +} + +#------------------------------------------------------------------------- +# Tests for auxilliary functions with langaugeid_bits tables. +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit + +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid_bits=4, languageid=lid); + INSERT INTO t1(docid,lid,content) VALUES(1, 1, 'one two three four'); + INSERT INTO t1(docid,lid,content) VALUES(2, 1, 'two three four five'); + INSERT INTO t1(docid,lid,content) VALUES(3, 1, 'three four five six'); + INSERT INTO t1(docid,lid,content) VALUES(4, 1, 'four five six seven'); + + INSERT INTO t1(docid,lid,content) VALUES(1, 2, 'four three two one'); + INSERT INTO t1(docid,lid,content) VALUES(2, 2, 'five four three two'); + INSERT INTO t1(docid,lid,content) VALUES(3, 2, 'six five four three'); + INSERT INTO t1(docid,lid,content) VALUES(4, 2, 'A B C D'); +} + +do_execsql_test 6.2.1 { + SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'one' AND lid=1; +} {1 {one two three four}} +do_execsql_test 6.2.2 { + SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'one' AND lid=2; +} {1 {four three two one}} + +do_execsql_test 6.2.1 { + SELECT docid, offsets(t1) FROM t1 WHERE t1 MATCH 'two' AND lid=1; +} {1 {0 0 4 3} 2 {0 0 0 3}} +do_execsql_test 6.2.2 { + SELECT docid, offsets(t1) FROM t1 WHERE t1 MATCH 'two' AND lid=2; +} {1 {0 0 11 3} 2 {0 0 16 3}} + +do_execsql_test 6.3.1 { + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'two' AND lid=1; +} {1 {1 1 1 2 2} 2 {1 1 1 2 2}} +do_execsql_test 6.3.2 { + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'two' AND lid=2; +} {1 {1 1 1 2 2} 2 {1 1 1 2 2}} +do_execsql_test 6.3.3 { + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'B' AND lid=1; +} {} +do_execsql_test 6.3.4 { + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'B' AND lid=2; +} {4 {1 1 1 1 1}} + +do_execsql_test 6.4 { + CREATE VIRTUAL TABLE t2 USING fts4(languageid_bits=8, languageid=lid); + INSERT INTO t2(docid,lid,content) VALUES(-1, 0, 'A B C D'); + INSERT INTO t2(docid,lid,content) VALUES(-2, 0, 'D C B A'); + INSERT INTO t2(docid,lid,content) VALUES(-3, 0, 'C B D A'); + INSERT INTO t2(docid,lid,content) VALUES(-4, 0, 'A D B C'); + + INSERT INTO t2(docid,lid,content) VALUES(-1, 1, 'A A A A'); + INSERT INTO t2(docid,lid,content) VALUES(-2, 1, 'B B B B'); + INSERT INTO t2(docid,lid,content) VALUES(-3, 1, 'C C C C'); + INSERT INTO t2(docid,lid,content) VALUES(-4, 1, 'D D D D'); +} + +do_execsql_test 6.4.1 { + SELECT docid, mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'B'; +} { + -4 {1 1 1 4 4} + -3 {1 1 1 4 4} + -2 {1 1 1 4 4} + -1 {1 1 1 4 4} +} +do_execsql_test 6.4.2 { + SELECT docid, mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'B' AND lid=1; +} {-2 {1 1 4 4 1}} + +do_execsql_test 6.5 { + DROP TABLE t1; + DROP TABLE t2; +} + +#------------------------------------------------------------------------- +# Tests for querying by docid. +# +do_execsql_test 7.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid_bits=8, languageid=lid); + INSERT INTO t1(docid,lid,content) VALUES(10, 10, 'abc def'); +} + +do_execsql_test 7.2 { + SELECT docid,lid,content FROM t1 WHERE docid=10; +} {10 10 {abc def}} + +do_execsql_test 7.3 { + SELECT docid,lid,content FROM t1 WHERE docid<11; +} {10 10 {abc def}} + +do_execsql_test 7.4 { + DROP TABLE t1; +} + +#------------------------------------------------------------------------- +# Tests for sorting by docid. +# +do_execsql_test 8.1 { + CREATE VIRTUAL TABLE t1 USING fts4(languageid_bits=6, languageid=lid); + INSERT INTO t1 (docid,lid,content) VALUES(1, 0, 'abc def'); + INSERT INTO t1 (docid,lid,content) VALUES(3, 0, 'abc ghi'); + INSERT INTO t1 (docid,lid,content) VALUES(2, 0, 'def ghi'); + + INSERT INTO t1 (docid,lid,content) VALUES(1, 5, 'A B'); + INSERT INTO t1 (docid,lid,content) VALUES(3, 5, 'A C'); + INSERT INTO t1 (docid,lid,content) VALUES(2, 5, 'B C'); +} + +do_execsql_test 8.2 { + SELECT docid FROM t1 ORDER BY docid; +} {1 1 2 2 3 3} +do_execsql_test 8.3 { + SELECT docid FROM t1 WHERE t1 MATCH 'ghi' ORDER BY docid; +} {2 3} +do_execsql_test 8.4 { + SELECT docid FROM t1 WHERE t1 MATCH 'ghi' ORDER BY docid DESC; +} {3 2} + +# Test that the docid and languageid fields may be updated. +# +do_execsql_test 8.5 { UPDATE t1 SET docid=docid+3, lid=0 WHERE lid=5; } +do_execsql_test 8.6 { SELECT docid FROM t1 ORDER BY docid; } {1 2 3 4 5 6} +do_execsql_test 8.7 { SELECT docid FROM t1 WHERE t1 MATCH 'A' } {4 6} +do_execsql_test 8.8 { SELECT docid FROM t1 WHERE t1 MATCH 'A' AND lid=5 } {} + +finish_test + Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -191,11 +191,12 @@ fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test - fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test + fts3corrupt2.test fts3first.test fts4langid.test fts4langid2.test + fts4merge.test fts4check.test fts4unicode.test } test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation.