Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the fts3 matchinfo 'b' flag. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-matchinfo-y |
Files: | files | file ages | folders |
SHA1: |
b9b77972d88171e4239b8194f308eb5d |
User & Date: | dan 2015-05-05 20:39:53.730 |
Context
2015-05-06
| ||
08:43 | Further optimizations for the 'y' and 'b' matchinfo operators. (check-in: fbd038bb57 user: dan tags: fts3-matchinfo-y) | |
2015-05-05
| ||
20:39 | Add the fts3 matchinfo 'b' flag. (check-in: b9b77972d8 user: dan tags: fts3-matchinfo-y) | |
19:37 | Optimizations for the matchinfo() function, particularly the 'y' flag. (check-in: dddd7e1829 user: dan tags: fts3-matchinfo-y) | |
Changes
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
24 25 26 27 28 29 30 31 32 33 34 35 36 37 | #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ #define FTS3_MATCHINFO_LCS 's' /* nCol values */ #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ /* ** The default value for the second argument to matchinfo(). */ #define FTS3_MATCHINFO_DEFAULT "pcx" | > | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ #define FTS3_MATCHINFO_LCS 's' /* nCol values */ #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ /* ** The default value for the second argument to matchinfo(). */ #define FTS3_MATCHINFO_DEFAULT "pcx" |
︙ | ︙ | |||
85 86 87 88 89 90 91 92 93 94 95 96 97 98 | */ typedef struct MatchInfo MatchInfo; struct MatchInfo { Fts3Cursor *pCursor; /* FTS3 Cursor */ int nCol; /* Number of columns in table */ int nPhrase; /* Number of matchable phrases in query */ sqlite3_int64 nDoc; /* Number of docs in database */ u32 *aMatchinfo; /* Pre-allocated buffer */ }; /* ** An instance of this structure is used to manage a pair of buffers, each ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below ** for details. | > | 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | */ typedef struct MatchInfo MatchInfo; struct MatchInfo { Fts3Cursor *pCursor; /* FTS3 Cursor */ int nCol; /* Number of columns in table */ int nPhrase; /* Number of matchable phrases in query */ sqlite3_int64 nDoc; /* Number of docs in database */ char flag; u32 *aMatchinfo; /* Pre-allocated buffer */ }; /* ** An instance of this structure is used to manage a pair of buffers, each ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below ** for details. |
︙ | ︙ | |||
232 233 234 235 236 237 238 | */ static void fts3GetDeltaPosition(char **pp, int *piPos){ int iVal; *pp += fts3GetVarint32(*pp, &iVal); *piPos += (iVal-2); } | < < > | | | | 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | */ static void fts3GetDeltaPosition(char **pp, int *piPos){ int iVal; *pp += fts3GetVarint32(*pp, &iVal); *piPos += (iVal-2); } /* ** Helper function for fts3ExprIterate() (see below). */ static int fts3ExprIterate2( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int bExcludeEof, int *piPhrase, /* Pointer to phrase counter */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int rc; /* Return code */ if( bExcludeEof && pExpr->bEof ){ rc = SQLITE_OK; }else{ int eType = pExpr->eType; /* Type of expression node pExpr */ if( eType!=FTSQUERY_PHRASE ){ assert( pExpr->pLeft && pExpr->pRight ); rc = fts3ExprIterate2(pExpr->pLeft, bExcludeEof, piPhrase, x, pCtx); if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ rc = fts3ExprIterate2(pExpr->pRight, bExcludeEof, piPhrase, x, pCtx); } }else{ rc = x(pExpr, *piPhrase, pCtx); (*piPhrase)++; } } return rc; |
︙ | ︙ | |||
275 276 277 278 279 280 281 282 283 284 285 | ** If the callback function returns anything other than SQLITE_OK, ** the iteration is abandoned and the error code returned immediately. ** Otherwise, SQLITE_OK is returned after a callback has been made for ** all eligible phrase nodes. */ static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int iPhrase = 0; /* Variable used as the phrase counter */ | > | | 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | ** If the callback function returns anything other than SQLITE_OK, ** the iteration is abandoned and the error code returned immediately. ** Otherwise, SQLITE_OK is returned after a callback has been made for ** all eligible phrase nodes. */ static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int bExcludeEof, /* Include nodes already at EOF */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int iPhrase = 0; /* Variable used as the phrase counter */ return fts3ExprIterate2(pExpr, bExcludeEof, &iPhrase, x, pCtx); } /* ** This is an fts3ExprIterate() callback used while loading the doclists ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also ** fts3ExprLoadDoclists(). */ |
︙ | ︙ | |||
318 319 320 321 322 323 324 | Fts3Cursor *pCsr, /* Fts3 cursor for current query */ int *pnPhrase, /* OUT: Number of phrases in query */ int *pnToken /* OUT: Number of tokens in query */ ){ int rc; /* Return Code */ LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ sCtx.pCsr = pCsr; | | | | 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | Fts3Cursor *pCsr, /* Fts3 cursor for current query */ int *pnPhrase, /* OUT: Number of phrases in query */ int *pnToken /* OUT: Number of tokens in query */ ){ int rc; /* Return Code */ LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ sCtx.pCsr = pCsr; rc = fts3ExprIterate(pCsr->pExpr, 0, fts3ExprLoadDoclistsCb, (void *)&sCtx); if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; } static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ (*(int *)ctx)++; UNUSED_PARAMETER(pExpr); UNUSED_PARAMETER(iPhrase); return SQLITE_OK; } static int fts3ExprPhraseCount(Fts3Expr *pExpr){ int nPhrase = 0; (void)fts3ExprIterate(pExpr, 0, fts3ExprPhraseCountCb, (void *)&nPhrase); return nPhrase; } /* ** Advance the position list iterator specified by the first two ** arguments so that it points to the first element with a value greater ** than or equal to parameter iNext. |
︙ | ︙ | |||
548 549 550 551 552 553 554 | ** the set of phrases in the expression to populate the aPhrase[] array. */ sIter.pCsr = pCsr; sIter.iCol = iCol; sIter.nSnippet = nSnippet; sIter.nPhrase = nList; sIter.iCurrent = -1; | | | 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 | ** the set of phrases in the expression to populate the aPhrase[] array. */ sIter.pCsr = pCsr; sIter.iCol = iCol; sIter.nSnippet = nSnippet; sIter.nPhrase = nList; sIter.iCurrent = -1; rc = fts3ExprIterate(pCsr->pExpr, 0, fts3SnippetFindPositions, (void*)&sIter); if( rc==SQLITE_OK ){ /* Set the *pmSeen output variable. */ for(i=0; i<nList; i++){ if( sIter.aPhrase[i].pHead ){ *pmSeen |= (u64)1 << i; } |
︙ | ︙ | |||
932 933 934 935 936 937 938 | MatchInfo *p = (MatchInfo *)pCtx; /* This must be a phrase */ assert( pExpr->pPhrase ); if( pExpr->iDocid==p->pCursor->iPrevId ){ Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; | | > > > > > > > > | > > > | 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 | MatchInfo *p = (MatchInfo *)pCtx; /* This must be a phrase */ assert( pExpr->pPhrase ); if( pExpr->iDocid==p->pCursor->iPrevId ){ Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; int iStart; Fts3Phrase *pPhrase = pExpr->pPhrase; char *pIter = pPhrase->doclist.pList; int iCol = 0; assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); if( p->flag==FTS3_MATCHINFO_LHITS ){ iStart = iPhrase * p->nCol; }else{ iStart = iPhrase * ((p->nCol + 31) / 32); } while( 1 ){ int nHit = fts3ColumnlistCount(&pIter); if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ if( p->flag==FTS3_MATCHINFO_LHITS ){ p->aMatchinfo[iStart + iCol] = (u32)nHit; }else if( nHit ){ p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); } } assert( *pIter==0x00 || *pIter==0x01 ); if( *pIter!=0x01 ) break; pIter++; pIter += fts3GetVarint32(pIter, &iCol); } } |
︙ | ︙ | |||
965 966 967 968 969 970 971 972 973 974 975 976 977 978 | || (cArg==FTS3_MATCHINFO_NCOL) || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) || (cArg==FTS3_MATCHINFO_LCS) || (cArg==FTS3_MATCHINFO_HITS) || (cArg==FTS3_MATCHINFO_LHITS) ){ return SQLITE_OK; } sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); return SQLITE_ERROR; } | > | 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 | || (cArg==FTS3_MATCHINFO_NCOL) || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) || (cArg==FTS3_MATCHINFO_LCS) || (cArg==FTS3_MATCHINFO_HITS) || (cArg==FTS3_MATCHINFO_LHITS) || (cArg==FTS3_MATCHINFO_LHITS_BM) ){ return SQLITE_OK; } sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); return SQLITE_ERROR; } |
︙ | ︙ | |||
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 | case FTS3_MATCHINFO_LCS: nVal = pInfo->nCol; break; case FTS3_MATCHINFO_LHITS: nVal = pInfo->nCol * pInfo->nPhrase; break; default: assert( cArg==FTS3_MATCHINFO_HITS ); nVal = pInfo->nCol * pInfo->nPhrase * 3; break; } | > > > > | 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 | case FTS3_MATCHINFO_LCS: nVal = pInfo->nCol; break; case FTS3_MATCHINFO_LHITS: nVal = pInfo->nCol * pInfo->nPhrase; break; case FTS3_MATCHINFO_LHITS_BM: nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); break; default: assert( cArg==FTS3_MATCHINFO_HITS ); nVal = pInfo->nCol * pInfo->nPhrase * 3; break; } |
︙ | ︙ | |||
1102 1103 1104 1105 1106 1107 1108 | /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); | | | 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 | /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, 0, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; } |
︙ | ︙ | |||
1186 1187 1188 1189 1190 1191 1192 | ){ int rc = SQLITE_OK; int i; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; sqlite3_stmt *pSelect = 0; for(i=0; rc==SQLITE_OK && zArg[i]; i++){ | | | 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 | ){ int rc = SQLITE_OK; int i; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; sqlite3_stmt *pSelect = 0; for(i=0; rc==SQLITE_OK && zArg[i]; i++){ pInfo->flag = zArg[i]; switch( zArg[i] ){ case FTS3_MATCHINFO_NPHRASE: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; break; case FTS3_MATCHINFO_NCOL: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
︙ | ︙ | |||
1246 1247 1248 1249 1250 1251 1252 1253 | case FTS3_MATCHINFO_LCS: rc = fts3ExprLoadDoclists(pCsr, 0, 0); if( rc==SQLITE_OK ){ rc = fts3MatchinfoLcs(pCsr, pInfo); } break; case FTS3_MATCHINFO_LHITS: { | > | | | | | 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 | case FTS3_MATCHINFO_LCS: rc = fts3ExprLoadDoclists(pCsr, 0, 0); if( rc==SQLITE_OK ){ rc = fts3MatchinfoLcs(pCsr, pInfo); } break; case FTS3_MATCHINFO_LHITS_BM: case FTS3_MATCHINFO_LHITS: { int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); memset(pInfo->aMatchinfo, 0, nZero); (void)fts3ExprIterate(pCsr->pExpr, 1, fts3ExprLHitsCb, (void*)pInfo); break; } default: { Fts3Expr *pExpr; assert( zArg[i]==FTS3_MATCHINFO_HITS ); pExpr = pCsr->pExpr; rc = fts3ExprLoadDoclists(pCsr, 0, 0); if( rc!=SQLITE_OK ) break; if( bGlobal ){ if( pCsr->pDeferred ){ rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); if( rc!=SQLITE_OK ) break; } rc = fts3ExprIterate(pExpr, 0, fts3ExprGlobalHitsCb,(void*)pInfo); if( rc!=SQLITE_OK ) break; } (void)fts3ExprIterate(pExpr, 0, fts3ExprLocalHitsCb,(void*)pInfo); break; } } pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); } |
︙ | ︙ | |||
1566 1567 1568 1569 1570 1571 1572 | /* Initialize the contents of sCtx.aTerm[] for column iCol. There is ** no way that this operation can fail, so the return code from ** fts3ExprIterate() can be discarded. */ sCtx.iCol = iCol; sCtx.iTerm = 0; | | | 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 | /* Initialize the contents of sCtx.aTerm[] for column iCol. There is ** no way that this operation can fail, so the return code from ** fts3ExprIterate() can be discarded. */ sCtx.iCol = iCol; sCtx.iTerm = 0; (void)fts3ExprIterate(pCsr->pExpr, 0, fts3ExprTermOffsetInit, (void*)&sCtx); /* Retreive the text stored in column iCol. If an SQL NULL is stored ** in column iCol, jump immediately to the next iteration of the loop. ** If an OOM occurs while retrieving the data (this can happen if SQLite ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM ** to the caller. */ |
︙ | ︙ |
Changes to test/fts3matchinfo.test.
︙ | ︙ | |||
503 504 505 506 507 508 509 | 7 "a OR (a AND b)" { 1 {1 2 1 2 0 1} 2 {1 0 1 0 1 0} 3 {0 1 0 1 1 2} 4 {1 0 1 0 0 1} 5 {1 0 1 0 0 1} 6 {1 0 1 0 2 2} 7 {2 1 0 0 0 0} 8 {1 2 1 2 2 1} 9 {1 1 1 1 1 3} 10 {1 3 0 0 0 0} } } { | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 | 7 "a OR (a AND b)" { 1 {1 2 1 2 0 1} 2 {1 0 1 0 1 0} 3 {0 1 0 1 1 2} 4 {1 0 1 0 0 1} 5 {1 0 1 0 0 1} 6 {1 0 1 0 2 2} 7 {2 1 0 0 0 0} 8 {1 2 1 2 2 1} 9 {1 1 1 1 1 3} 10 {1 3 0 0 0 0} } } { do_execsql_test 11.1.$tn.1 { SELECT rowid, mit(matchinfo(tt, 'y')) FROM tt WHERE tt MATCH $expr } $res set r2 [list] foreach {rowid L} $res { lappend r2 $rowid set M [list] foreach {a b} $L { lappend M [expr ($a ? 1 : 0) + ($b ? 2 : 0)] } lappend r2 $M } do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 breakpoint do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } set sqlite_fts3_enable_parentheses 0 #--------------------------------------------------------------------------- # Test the 'b' matchinfo flag # set sqlite_fts3_enable_parentheses 1 reset_db db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] set sqlite_fts3_enable_parentheses 0 finish_test |
Changes to test/fts3query.test.
︙ | ︙ | |||
169 170 171 172 173 174 175 | } { 1 "SELECT matchinfo(content) FROM t2 WHERE t2 MATCH 'history'" matchinfo 2 "SELECT offsets(content) FROM t2 WHERE t2 MATCH 'history'" offsets 3 "SELECT snippet(content) FROM t2 WHERE t2 MATCH 'history'" snippet 4 "SELECT optimize(content) FROM t2 WHERE t2 MATCH 'history'" optimize } do_catchsql_test 5.5.1 { | | | | 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | } { 1 "SELECT matchinfo(content) FROM t2 WHERE t2 MATCH 'history'" matchinfo 2 "SELECT offsets(content) FROM t2 WHERE t2 MATCH 'history'" offsets 3 "SELECT snippet(content) FROM t2 WHERE t2 MATCH 'history'" snippet 4 "SELECT optimize(content) FROM t2 WHERE t2 MATCH 'history'" optimize } do_catchsql_test 5.5.1 { SELECT matchinfo(t2, 'abcd') FROM t2 WHERE t2 MATCH 'history' } {1 {unrecognized matchinfo request: d}} do_execsql_test 5.5 { DROP TABLE t2 } # Test the snippet() function with 1 to 6 arguments. # do_execsql_test 6.1 { |
︙ | ︙ |