Index: src/expr.c ================================================================== --- src/expr.c +++ src/expr.c @@ -1813,10 +1813,69 @@ */ int sqlite3ExprIsTableConstant(Expr *p, int iCur){ return exprIsConst(p, 3, iCur); } + +/* +** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy(). +*/ +static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){ + ExprList *pGroupBy = pWalker->u.pGroupBy; + int i; + + /* Check if pExpr is identical to any GROUP BY term. If so, consider + ** it constant. */ + for(i=0; inExpr; i++){ + Expr *p = pGroupBy->a[i].pExpr; + if( sqlite3ExprCompare(pExpr, p, -1)<2 ){ + CollSeq *pColl = sqlite3ExprCollSeq(pWalker->pParse, p); + if( pColl==0 || sqlite3_stricmp("BINARY", pColl->zName)==0 ){ + return WRC_Prune; + } + } + } + + /* Check if pExpr is a sub-select. If so, consider it variable. */ + if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + pWalker->eCode = 0; + return WRC_Abort; + } + + return exprNodeIsConstant(pWalker, pExpr); +} + +/* +** Walk the expression tree passed as the first argument. Return non-zero +** if the expression consists entirely of constants or copies of terms +** in pGroupBy that sort with the BINARY collation sequence. +** +** This routine is used to determine if a term of the HAVING clause can +** be promoted into the WHERE clause. In order for such a promotion to work, +** the value of the HAVING clause term must be the same for all members of +** a "group". The requirement that the GROUP BY term must be BINARY +** assumes that no other collating sequence will have a finer-grained +** grouping than binary. In other words (A=B COLLATE binary) implies +** A=B in every other collating sequence. The requirement that the +** GROUP BY be BINARY is stricter than necessary. It would also work +** to promote HAVING clauses that use the same alternative collating +** sequence as the GROUP BY term, but that is much harder to check, +** alternative collating sequences are uncommon, and this is only an +** optimization, so we take the easy way out and simply require the +** GROUP BY to use the BINARY collating sequence. +*/ +int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){ + Walker w; + memset(&w, 0, sizeof(w)); + w.eCode = 1; + w.xExprCallback = exprNodeIsConstantOrGroupBy; + w.u.pGroupBy = pGroupBy; + w.pParse = pParse; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + /* ** Walk an expression tree. Return non-zero if the expression is constant ** or a function call with constant arguments. Return and 0 if there ** are any variables. ** Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -4876,10 +4876,81 @@ } } #else # define explainSimpleCount(a,b,c) #endif + +/* +** Context object for havingToWhereExprCb(). +*/ +struct HavingToWhereCtx { + Expr **ppWhere; + ExprList *pGroupBy; +}; + +/* +** sqlite3WalkExpr() callback used by havingToWhere(). +** +** If the node passed to the callback is a TK_AND node, return +** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes. +** +** Otherwise, return WRC_Prune. In this case, also check if the +** sub-expression matches the criteria for being moved to the WHERE +** clause. If so, add it to the WHERE clause and replace the sub-expression +** within the HAVING expression with a constant "1". +*/ +static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op!=TK_AND ){ + struct HavingToWhereCtx *p = pWalker->u.pHavingCtx; + if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, p->pGroupBy) ){ + sqlite3 *db = pWalker->pParse->db; + Expr *pNew = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[1], 0); + if( pNew ){ + Expr *pWhere = *(p->ppWhere); + SWAP(Expr, *pNew, *pExpr); + pNew = sqlite3ExprAnd(db, pWhere, pNew); + *(p->ppWhere) = pNew; + } + } + return WRC_Prune; + } + return WRC_Continue; +} + +/* +** Transfer eligible terms from the HAVING clause of a query, which is +** processed after grouping, to the WHERE clause, which is processed before +** grouping. For example, the query: +** +** SELECT * FROM WHERE a=? GROUP BY b HAVING b=? AND c=? +** +** can be rewritten as: +** +** SELECT * FROM WHERE a=? AND b=? GROUP BY b HAVING c=? +** +** A term of the HAVING expression is eligible for transfer if it consists +** entirely of constants and expressions that are also GROUP BY terms that +** use the "BINARY" collation sequence. +*/ +static void havingToWhere( + Parse *pParse, + ExprList *pGroupBy, + Expr *pHaving, + Expr **ppWhere +){ + struct HavingToWhereCtx sCtx; + Walker sWalker; + + sCtx.ppWhere = ppWhere; + sCtx.pGroupBy = pGroupBy; + + memset(&sWalker, 0, sizeof(sWalker)); + sWalker.pParse = pParse; + sWalker.xExprCallback = havingToWhereExprCb; + sWalker.u.pHavingCtx = &sCtx; + sqlite3WalkExpr(&sWalker, pHaving); +} /* ** Generate code for the SELECT statement given in the p argument. ** ** The results are returned according to the SelectDest structure. @@ -5341,10 +5412,15 @@ sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0; sAggInfo.pGroupBy = pGroupBy; sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); if( pHaving ){ + if( pGroupBy ){ + assert( pWhere==p->pWhere ); + havingToWhere(pParse, pGroupBy, pHaving, &p->pWhere); + pWhere = p->pWhere; + } sqlite3ExprAnalyzeAggregates(&sNC, pHaving); } sAggInfo.nAccumulator = sAggInfo.nColumn; for(i=0; iWHERE optimization. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix having + +do_execsql_test 1.0 { + CREATE TABLE t2(c, d); + + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 2); + INSERT INTO t1 VALUES(1, 3); + INSERT INTO t1 VALUES(2, 4); + INSERT INTO t1 VALUES(1, 5); + INSERT INTO t1 VALUES(2, 6); +} {} + +foreach {tn sql res} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" {2 12} + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2 AND sum(b)>10" {2 12} + 3 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>12" {} +} { + do_execsql_test 1.$tn $sql $res +} + +# Run an EXPLAIN command for both SQL statements. Return true if +# the outputs are identical, or false otherwise. +# +proc compare_vdbe {sql1 sql2} { + set r1 [list] + set r2 [list] + db eval "explain $sql1" { lappend r1 $opcode $p1 $p2 $p3 $p4 $p5} + db eval "explain $sql2" { lappend r2 $opcode $p1 $p2 $p3 $p4 $p5} + return [expr {$r1==$r2}] +} + +proc do_compare_vdbe_test {tn sql1 sql2 res} { + uplevel [list do_test $tn [list compare_vdbe $sql1 $sql2] $res] +} + +#------------------------------------------------------------------------- +# Test that various statements that are eligible for the optimization +# produce the same VDBE code as optimizing by hand does. +# +foreach {tn sql1 sql2} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a" + + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>5 AND a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a HAVING sum(b)>5" + + 3 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE binary" + + 4 { + SELECT x,y FROM ( + SELECT a AS x, sum(b) AS y FROM t1 + GROUP BY a + ) WHERE x BETWEEN 8888 AND 9999 + } { + SELECT x,y FROM ( + SELECT a AS x, sum(b) AS y FROM t1 + WHERE x BETWEEN 8888 AND 9999 + GROUP BY a + ) + } + + 5 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING 0" + "SELECT a, sum(b) FROM t1 WHERE 0 GROUP BY a COLLATE binary" + + 6 "SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d HAVING b=d" + "SELECT count(*) FROM t1,t2 WHERE a=c AND b=d GROUP BY b, d" + + 7 { + SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d + HAVING b=d COLLATE nocase + } { + SELECT count(*) FROM t1,t2 WHERE a=c AND b=d COLLATE nocase + GROUP BY b, d + } + + 8 "SELECT a, sum(b) FROM t1 GROUP BY a||b HAVING substr(a||b, 1, 1)='a'" + "SELECT a, sum(b) FROM t1 WHERE substr(a||b, 1, 1)='a' GROUP BY a||b" +} { + do_compare_vdbe_test 2.$tn $sql1 $sql2 1 +} + +#------------------------------------------------------------------------- +# 1: Test that the optimization is only applied if the GROUP BY term +# uses BINARY collation. +# +# 2: Not applied if there is a non-deterministic function in the HAVING +# term. +# +foreach {tn sql1 sql2} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE nocase HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE nocase" + + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING randomblob(a)