Index: ext/fts5/fts5_aux.c ================================================================== --- ext/fts5/fts5_aux.c +++ ext/fts5/fts5_aux.c @@ -243,10 +243,119 @@ } } /* ** End of highlight() implementation. **************************************************************************/ + +/* +** Context object passed to the fts5SentenceFinderCb() function. +*/ +typedef struct Fts5SFinder Fts5SFinder; +struct Fts5SFinder { + int iPos; /* Current token position */ + int nFirstAlloc; /* Allocated size of aFirst[] */ + int nFirst; /* Number of entries in aFirst[] */ + int *aFirst; /* Array of first token in each sentence */ + const char *zDoc; /* Document being tokenized */ +}; + +/* +** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if +** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an +** error occurs. +*/ +static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ + if( p->nFirstAlloc==p->nFirst ){ + int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; + int *aNew; + + aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int)); + if( aNew==0 ) return SQLITE_NOMEM; + p->aFirst = aNew; + p->nFirstAlloc = nNew; + } + p->aFirst[p->nFirst++] = iAdd; + return SQLITE_OK; +} + +/* +** This function is an xTokenize() callback used by the auxiliary snippet() +** function. Its job is to identify tokens that are the first in a sentence. +** For each such token, an entry is added to the SFinder.aFirst[] array. +*/ +static int fts5SentenceFinderCb( + void *pContext, /* Pointer to HighlightContext object */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStartOff, /* Start offset of token */ + int iEndOff /* End offset of token */ +){ + int rc = SQLITE_OK; + + if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ + Fts5SFinder *p = (Fts5SFinder*)pContext; + if( p->iPos>0 ){ + int i; + char c = 0; + for(i=iStartOff-1; i>=0; i--){ + c = p->zDoc[i]; + if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; + } + if( i!=iStartOff-1 && (c=='.' || c==':') ){ + rc = fts5SentenceFinderAdd(p, p->iPos); + } + }else{ + rc = fts5SentenceFinderAdd(p, 0); + } + p->iPos++; + } + return rc; +} + +static int fts5SnippetScore( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + int nDocsize, /* Size of column in tokens */ + unsigned char *aSeen, /* Array with one element per query phrase */ + int iCol, /* Column to score */ + int iPos, /* Starting offset to score */ + int nToken, /* Max tokens per snippet */ + int *pnScore, /* OUT: Score */ + int *piPos /* OUT: Adjusted offset */ +){ + int rc; + int i; + int ip = 0; + int ic = 0; + int iOff = 0; + int iFirst = -1; + int nInst; + int nScore = 0; + int iLast = 0; + + rc = pApi->xInstCount(pFts, &nInst); + for(i=0; ixInst(pFts, i, &ip, &ic, &iOff); + if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){ + nScore += (aSeen[ip] ? 1 : 1000); + aSeen[ip] = 1; + if( iFirst<0 ) iFirst = iOff; + iLast = iOff + pApi->xPhraseSize(pFts, ip); + } + } + + *pnScore = nScore; + if( piPos ){ + int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; + if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; + if( iAdj<0 ) iAdj = 0; + *piPos = iAdj; + } + + return rc; +} /* ** Implementation of snippet() function. */ static void fts5SnippetFunction( @@ -265,86 +374,110 @@ int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ unsigned char *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ - int iBestLast; /* Last token of best snippet */ int nBestScore = 0; /* Score of best snippet */ int nColSize = 0; /* Total size of iBestCol in tokens */ + Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ + int nCol; if( nVal!=5 ){ const char *zErr = "wrong number of arguments to function snippet()"; sqlite3_result_error(pCtx, zErr, -1); return; } + nCol = pApi->xColumnCount(pFts); memset(&ctx, 0, sizeof(HighlightContext)); iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); zEllips = (const char*)sqlite3_value_text(apVal[3]); nToken = sqlite3_value_int(apVal[4]); - iBestLast = nToken-1; iBestCol = (iCol>=0 ? iCol : 0); nPhrase = pApi->xPhraseCount(pFts); aSeen = sqlite3_malloc(nPhrase); if( aSeen==0 ){ rc = SQLITE_NOMEM; } - - if( rc==SQLITE_OK ){ - rc = pApi->xInstCount(pFts, &nInst); - } - for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &ip, &iSnippetCol, &iStart); - if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ - int nScore = 1000; - int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); - int j; - aSeen[ip] = 1; - - for(j=i+1; rc==SQLITE_OK && jxInst(pFts, j, &ip, &ic, &io); - iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; - if( rc==SQLITE_OK && ic==iSnippetCol && iLastiLast ) iLast = iFinal; - } - } - - if( rc==SQLITE_OK && nScore>nBestScore ){ - iBestCol = iSnippetCol; - iBestStart = iStart; - iBestLast = iLast; - nBestScore = nScore; - } - } - } - - if( rc==SQLITE_OK ){ - rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); - } - if( rc==SQLITE_OK ){ - rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); - } - if( ctx.zIn ){ - if( rc==SQLITE_OK ){ - rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); - } - - if( (iBestStart+nToken-1)>iBestLast ){ - iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; - } - if( iBestStart+nToken>nColSize ){ - iBestStart = nColSize - nToken; - } - if( iBestStart<0 ) iBestStart = 0; + if( rc==SQLITE_OK ){ + rc = pApi->xInstCount(pFts, &nInst); + } + + memset(&sFinder, 0, sizeof(Fts5SFinder)); + for(i=0; ixColumnText(pFts, i, &sFinder.zDoc, &nDoc); + if( rc!=SQLITE_OK ) break; + rc = pApi->xTokenize(pFts, + sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb + ); + if( rc!=SQLITE_OK ) break; + rc = pApi->xColumnSize(pFts, i, &nDocsize); + if( rc!=SQLITE_OK ) break; + + for(ii=0; rc==SQLITE_OK && iixInst(pFts, ii, &ip, &ic, &io); + if( ic!=i || rc!=SQLITE_OK ) continue; + memset(aSeen, 0, nPhrase); + rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, + io, nToken, &nScore, &iAdj + ); + if( rc==SQLITE_OK && nScore>nBestScore ){ + nBestScore = nScore; + iBestCol = i; + iBestStart = iAdj; + nColSize = nDocsize; + } + + if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ + for(jj=0; jj<(sFinder.nFirst-1); jj++){ + if( sFinder.aFirst[jj+1]>io ) break; + } + + if( sFinder.aFirst[jj]nBestScore ){ + nBestScore = nScore; + iBestCol = i; + iBestStart = sFinder.aFirst[jj]; + nColSize = nDocsize; + } + } + } + } + } + } + + if( rc==SQLITE_OK ){ + rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); + } + if( rc==SQLITE_OK && nColSize==0 ){ + rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); + } + if( ctx.zIn ){ + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); + } ctx.iRangeStart = iBestStart; ctx.iRangeEnd = iBestStart + nToken - 1; if( iBestStart>0 ){ @@ -363,19 +496,19 @@ if( ctx.iRangeEnd>=(nColSize-1) ){ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); }else{ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } - - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_code(pCtx, rc); - } - sqlite3_free(ctx.zOut); - } + } + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + sqlite3_free(ctx.zOut); sqlite3_free(aSeen); + sqlite3_free(sFinder.aFirst); } /************************************************************************/ /* Index: ext/fts5/test/fts5af.test ================================================================== --- ext/fts5/test/fts5af.test +++ ext/fts5/test/fts5af.test @@ -70,49 +70,60 @@ 2.1 {X o o o o o o o} {[X] o o o o o o...} 2.2 {o X o o o o o o} {o [X] o o o o o...} 2.3 {o o X o o o o o} {o o [X] o o o o...} 2.4 {o o o X o o o o} {o o o [X] o o o...} - 2.5 {o o o o X o o o} {...o o o [X] o o o} - 2.6 {o o o o o X o o} {...o o o o [X] o o} - 2.7 {o o o o o o X o} {...o o o o o [X] o} + 2.5 {o o o o X o o o} {o o o o [X] o o...} + 2.6 {o o o o o X o o} {o o o o o [X] o...} + 2.7 {o o o o o o X o} {o o o o o o [X]...} 2.8 {o o o o o o o X} {...o o o o o o [X]} + + 2.9 {o o o o o o o X o} {...o o o o o [X] o} + 2.10 {o o o o o o o X o o} {...o o o o [X] o o} + 2.11 {o o o o o o o X o o o} {...o o o [X] o o o} + 2.12 {o o o o o o o X o o o o} {...o o o [X] o o o...} + 3.1 {X o o o o o o o o} {[X] o o o o o o...} 3.2 {o X o o o o o o o} {o [X] o o o o o...} 3.3 {o o X o o o o o o} {o o [X] o o o o...} 3.4 {o o o X o o o o o} {o o o [X] o o o...} - 3.5 {o o o o X o o o o} {...o o o [X] o o o...} - 3.6 {o o o o o X o o o} {...o o o [X] o o o} - 3.7 {o o o o o o X o o} {...o o o o [X] o o} - 3.8 {o o o o o o o X o} {...o o o o o [X] o} - 3.9 {o o o o o o o o X} {...o o o o o o [X]} + + 3.5 {o o o o o o o X o o o o} {...o o o [X] o o o...} + 3.6 {o o o o o o o o X o o o} {...o o o [X] o o o} + 3.7 {o o o o o o o o o X o o} {...o o o o [X] o o} + 3.8 {o o o o o o o o o o X o} {...o o o o o [X] o} + 3.9 {o o o o o o o o o o o X} {...o o o o o o [X]} 4.1 {X o o o o o X o o} {[X] o o o o o [X]...} - 4.2 {o X o o o o o X o} {...[X] o o o o o [X]...} - 4.3 {o o X o o o o o X} {...[X] o o o o o [X]} + 4.2 {o o o o o o o X o o o o o X o} {...[X] o o o o o [X]...} + 4.3 {o o o o o o o o X o o o o o X} {...[X] o o o o o [X]} 5.1 {X o o o o X o o o} {[X] o o o o [X] o...} - 5.2 {o X o o o o X o o} {...[X] o o o o [X] o...} - 5.3 {o o X o o o o X o} {...[X] o o o o [X] o} - 5.4 {o o o X o o o o X} {...o [X] o o o o [X]} + 5.2 {o o o o o o o X o o o o X o o} {...[X] o o o o [X] o...} + 5.3 {o o o o o o o o X o o o o X o} {...[X] o o o o [X] o} + 5.4 {o o o o o o o o o X o o o o X} {...o [X] o o o o [X]} 6.1 {X o o o X o o o} {[X] o o o [X] o o...} 6.2 {o X o o o X o o o} {o [X] o o o [X] o...} - 6.3 {o o X o o o X o o} {...o [X] o o o [X] o...} - 6.4 {o o o X o o o X o} {...o [X] o o o [X] o} - 6.5 {o o o o X o o o X} {...o o [X] o o o [X]} + 6.3 {o o o o o o o X o o o X o o} {...o [X] o o o [X] o...} + 6.4 {o o o o o o o o X o o o X o} {...o [X] o o o [X] o} + 6.5 {o o o o o o o o o X o o o X} {...o o [X] o o o [X]} 7.1 {X o o X o o o o o} {[X] o o [X] o o o...} 7.2 {o X o o X o o o o} {o [X] o o [X] o o...} - 7.3 {o o X o o X o o o} {...o [X] o o [X] o o...} - 7.4 {o o o X o o X o o} {...o [X] o o [X] o o} - 7.5 {o o o o X o o X o} {...o o [X] o o [X] o} - 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} + 7.3 {o o o o o o o X o o X o o o} {...o [X] o o [X] o o...} + 7.4 {o o o o o o o o X o o X o o} {...o [X] o o [X] o o} + 7.5 {o o o o o o o o o X o o X o} {...o o [X] o o [X] o} + 7.6 {o o o o o o o o o o X o o X} {...o o o [X] o o [X]} - 8.1 {o o o o X o o o o o o o o o o o o o o o o o o o o o X X X o o o} + 8.1 {o o o o o o o o o X o o o o o o o o o o o o o o o o X X X o o o} {...o o [X] [X] [X] o o...} + 8.2 {o o o o o o o. o o X o o o o o o o o o o o o o o o o X X X o o o} + {...o o [X] o o o o...} + 8.3 {o o o o X o o o o o o o o o o o o o o o o o o o o o X X X o o o} + {o o o o [X] o o...} } { do_snippet_test 1.$tn $doc X $res } if {[detail_is_full]} { @@ -125,27 +136,46 @@ 1.6 {o o o o o X Y} {o o o o o [X Y]} 2.1 {X Y o o o o o o} {[X Y] o o o o o...} 2.2 {o X Y o o o o o} {o [X Y] o o o o...} 2.3 {o o X Y o o o o} {o o [X Y] o o o...} - 2.4 {o o o X Y o o o} {...o o [X Y] o o o} - 2.5 {o o o o X Y o o} {...o o o [X Y] o o} - 2.6 {o o o o o X Y o} {...o o o o [X Y] o} - 2.7 {o o o o o o X Y} {...o o o o o [X Y]} + 2.4 {o o o o o o o X Y o o o} {...o o [X Y] o o o} + 2.5 {o o o o o o o o X Y o o} {...o o o [X Y] o o} + 2.6 {o o o o o o o o o X Y o} {...o o o o [X Y] o} + 2.7 {o o o o o o o o o o X Y} {...o o o o o [X Y]} 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} - 3.4 {o o o X Y o o o o} {...o o [X Y] o o o...} - 3.5 {o o o o X Y o o o} {...o o [X Y] o o o} - 3.6 {o o o o o X Y o o} {...o o o [X Y] o o} - 3.7 {o o o o o o X Y o} {...o o o o [X Y] o} - 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} + 3.4 {o o o o o o o X Y o o o o} {...o o [X Y] o o o...} + 3.5 {o o o o o o o o X Y o o o} {...o o [X Y] o o o} + 3.6 {o o o o o o o o o X Y o o} {...o o o [X Y] o o} + 3.7 {o o o o o o o o o o X Y o} {...o o o o [X Y] o} + 3.8 {o o o o o o o o o o o X Y} {...o o o o o [X Y]} } { do_snippet_test 2.$tn $doc "X + Y" $res } } +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x1 USING fts5(a, b); + INSERT INTO x1 VALUES('xyz', '1 2 3 4 5 6 7 8 9 10 11 12 13'); + SELECT snippet(x1, 1, '[', ']', '...', 5) FROM x1('xyz'); +} { + {1 2 3 4 5...} +} + +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE p1 USING fts5(a, b); + INSERT INTO p1 VALUES( + 'x a a a a a a a a a a', + 'a a a a a a a a a a a a a a a a a a a x' + ); +} +do_execsql_test 5.1 { + SELECT snippet(p1, 0, '[', ']', '...', 6) FROM p1('x'); +} {{[x] a a a a a...}} + } ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5unicode2.test ================================================================== --- ext/fts5/test/fts5unicode2.test +++ ext/fts5/test/fts5unicode2.test @@ -158,16 +158,16 @@ 3 "ROW" { ...returns the value of y on the same [row] that contains the maximum x value. } 4 "rollback" { - ...[ROLLBACK]. Instead, the pending statement - will return SQLITE_ABORT upon next access after the [ROLLBACK]. + Pending statements no longer block [ROLLBACK]. Instead, the pending + statement will return SQLITE_ABORT upon... } 5 "rOllback" { - ...[ROLLBACK]. Instead, the pending statement - will return SQLITE_ABORT upon next access after the [ROLLBACK]. + Pending statements no longer block [ROLLBACK]. Instead, the pending + statement will return SQLITE_ABORT upon... } 6 "lang*" { Added support for the FTS4 [languageid] option. } } { Index: src/main.c ================================================================== --- src/main.c +++ src/main.c @@ -2946,15 +2946,24 @@ ** database schema yet. This is delayed until the first time the database ** is accessed. */ sqlite3Error(db, SQLITE_OK); sqlite3RegisterPerConnectionBuiltinFunctions(db); + rc = sqlite3_errcode(db); + +#ifdef SQLITE_ENABLE_FTS5 + /* Register any built-in FTS5 module before loading the automatic + ** extensions. This allows automatic extensions to register FTS5 + ** tokenizers and auxiliary functions. */ + if( !db->mallocFailed && rc==SQLITE_OK ){ + rc = sqlite3Fts5Init(db); + } +#endif /* Load automatic extensions - extensions that have been registered ** using the sqlite3_automatic_extension() API. */ - rc = sqlite3_errcode(db); if( rc==SQLITE_OK ){ sqlite3AutoLoadExtensions(db); rc = sqlite3_errcode(db); if( rc!=SQLITE_OK ){ goto opendb_out; @@ -2977,16 +2986,10 @@ #ifdef SQLITE_ENABLE_FTS3 /* automatically defined by SQLITE_ENABLE_FTS4 */ if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); } -#endif - -#ifdef SQLITE_ENABLE_FTS5 - if( !db->mallocFailed && rc==SQLITE_OK ){ - rc = sqlite3Fts5Init(db); - } #endif #ifdef SQLITE_ENABLE_ICU if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3IcuInit(db);