Index: ext/fts3/fts3_expr.c ================================================================== --- ext/fts3/fts3_expr.c +++ ext/fts3/fts3_expr.c @@ -638,12 +638,14 @@ rc = SQLITE_NOMEM; goto exprparse_out; } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; + p->pParent = pNot; if( pNotBranch ){ pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; } pNotBranch = pNot; p = pPrev; }else{ int eType = p->eType; @@ -727,10 +729,11 @@ Fts3Expr *pIter = pNotBranch; while( pIter->pLeft ){ pIter = pIter->pLeft; } pIter->pLeft = pRet; + pRet->pParent = pIter; pRet = pNotBranch; } } } *pnConsumed = n - nIn; @@ -742,10 +745,226 @@ pRet = 0; } *ppExpr = pRet; return rc; } + +/* +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); + } + } + } + return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. +** +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. +** +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ + + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; + }else{ + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } + + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; + + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); + } + + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ + + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; + + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; + } + } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + break; + } + + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; + + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; + } + + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } + } + } + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } + } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: +** +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. +*/ +static int fts3ExprParseUnbalanced( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr /* OUT: Parsed query structure */ +){ + int nParsed; + int rc; + ParseContext sParse; + + memset(&sParse, 0, sizeof(ParseContext)); + sParse.pTokenizer = pTokenizer; + sParse.iLangid = iLangid; + sParse.azCol = (const char **)azCol; + sParse.nCol = nCol; + sParse.iDefaultCol = iDefaultCol; + sParse.bFts4 = bFts4; + if( z==0 ){ + *ppExpr = 0; + return SQLITE_OK; + } + if( n<0 ){ + n = (int)strlen(z); + } + rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); + + /* Check for mismatched parenthesis */ + if( rc==SQLITE_OK && sParse.nNest ){ + rc = SQLITE_ERROR; + } + + return rc; +} /* ** Parameters z and n contain a pointer to and length of a buffer containing ** an fts3 query expression, respectively. This function attempts to parse the ** query expression and create a tree of Fts3Expr structures representing the @@ -777,51 +996,66 @@ int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr /* OUT: Parsed query structure */ ){ - int nParsed; - int rc; - ParseContext sParse; - - memset(&sParse, 0, sizeof(ParseContext)); - sParse.pTokenizer = pTokenizer; - sParse.iLangid = iLangid; - sParse.azCol = (const char **)azCol; - sParse.nCol = nCol; - sParse.iDefaultCol = iDefaultCol; - sParse.bFts4 = bFts4; - if( z==0 ){ - *ppExpr = 0; - return SQLITE_OK; - } - if( n<0 ){ - n = (int)strlen(z); - } - rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); - - /* Check for mismatched parenthesis */ - if( rc==SQLITE_OK && sParse.nNest ){ - rc = SQLITE_ERROR; + static const int MAX_EXPR_DEPTH = 12; + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); + } + } + if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; } return rc; } + +/* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} /* ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. */ -void sqlite3Fts3ExprFree(Fts3Expr *p){ - if( p ){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3ExprFree(p->pLeft); - sqlite3Fts3ExprFree(p->pRight); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } } } /**************************************************************************** ***************************************************************************** @@ -869,10 +1103,13 @@ ** ** If the second argument is not NULL, then its contents are prepended to ** the returned expression text and then freed using sqlite3_free(). */ static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } switch( pExpr->eType ){ case FTSQUERY_PHRASE: { Fts3Phrase *pPhrase = pExpr->pPhrase; int i; zBuf = sqlite3_mprintf( @@ -976,14 +1213,23 @@ } for(ii=0; ii 1} { + set N [list] + if {[llength $L] % 2} { + foreach {a b} [lrange $L 0 end-1] { lappend N [list AND $a $b] } + lappend N [lindex $L end] + } else { + foreach {a b} $L { lappend N [list AND $a $b] } + } + set L $N + } + return [lindex $L 0] +} + +proc balanced_and_tree {nEntry} { + set query [balanced_exprtree_structure $nEntry] + if {$query == "xxx"} { + return "P 1" + } + for {set i 1} {$i <= $nEntry} {incr i} { + regsub xxx $query "{P $i}" query + } + return $query +} + +proc random_tree_structure {nEntry bParen op} { + set query xxx + for {set i 1} {$i < $nEntry} {incr i} { + set x1 [expr int(rand()*4.0)] + set x2 [expr int(rand()*2.0)] + if {$x1==0 && $bParen} { + set query "($query)" + } + if {$x2} { + set query "xxx $op $query" + } else { + set query "$query $op xxx" + } + } + return $query +} + +proc random_and_query {nEntry {bParen 0}} { + set query [random_tree_structure $nEntry $bParen AND] + for {set i 1} {$i <= $nEntry} {incr i} { + regsub xxx $query $i query + } + return $query +} + +proc random_or_query {nEntry} { + set query [random_tree_structure $nEntry 1 OR] + for {set i 1} {$i <= $nEntry} {incr i} { + regsub xxx $query $i query + } + return $query +} + +proc random_andor_query {nEntry} { + set query [random_tree_structure $nEntry 1 AND] + for {set i 1} {$i <= $nEntry} {incr i} { + regsub xxx $query "([random_or_query $nEntry])" query + } + return $query +} + +proc balanced_andor_tree {nEntry} { + set tree [balanced_exprtree_structure $nEntry] + set node "{[balanced_and_tree $nEntry]}" + regsub -all AND $node OR node + regsub -all xxx $tree $node tree + return $tree +} + +# Test that queries like "1 AND 2 AND 3 AND 4..." are transformed to +# balanced trees by FTS. +# +for {set i 1} {$i < 100} {incr i} { + do_test 1.$i { + test_fts3expr2 [random_and_query $i] + } [balanced_and_tree $i] +} + +# Same again, except with parenthesis inserted at arbitrary points. +# +for {set i 1} {$i < 100} {incr i} { + do_test 2.$i { + test_fts3expr2 [random_and_query $i 1] + } [balanced_and_tree $i] +} + +# Now attempt to balance two AND trees joined by an OR. +# +for {set i 1} {$i < 100} {incr i} { + do_test 3.$i { + test_fts3expr2 "[random_and_query $i 1] OR [random_and_query $i 1]" + } [list OR [balanced_and_tree $i] [balanced_and_tree $i]] +} + +# Try trees of AND nodes with leaves that are themselves trees of OR nodes. +# +for {set i 2} {$i < 32} {incr i} { + do_test 3.$i { + test_fts3expr2 [random_andor_query $i] + } [balanced_andor_tree $i] +} + +# These exceed the depth limit. +# +for {set i 33} {$i < 40} {incr i} { + do_test 3.$i { + list [catch {test_fts3expr2 [random_andor_query $i]} msg] $msg + } {1 {Error parsing expression}} +} + +# This also exceeds the depth limit. +# +do_test 4.1 { + set q "1" + for {set i 2} {$i < 5000} {incr i} { + append q " AND $i" + } + list [catch {test_fts3expr2 $q} msg] $msg +} {1 {Error parsing expression}} + +proc create_toggle_tree {nDepth} { + if {$nDepth == 0} { return xxx } + set nNew [expr $nDepth-1] + if {$nDepth % 2} { + return "([create_toggle_tree $nNew]) OR ([create_toggle_tree $nNew])" + } + return "([create_toggle_tree $nNew]) AND ([create_toggle_tree $nNew])" +} + +do_test 4.2 { + list [catch {test_fts3expr2 [create_toggle_tree 17]} msg] $msg +} {1 {Error parsing expression}} + +set query [random_andor_query 12] +set result [balanced_andor_tree 12] +do_faultsim_test fts3expr3-fault-1 -faults oom-* -body { + test_fts3expr2 $::query +} -test { + faultsim_test_result [list 0 $::result] +} + +set sqlite_fts3_enable_parentheses 0 +finish_test + + + + Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -184,10 +184,11 @@ fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test + fts3expr3.test fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test