Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch having-where-optimization Excluding Merge-Ins
This is equivalent to a diff from d7bb79ed3a to 8424492eac
2017-05-02
| ||
16:55 | Move terms of the HAVING clause that reference only columns in the GROUP BY clause over to the WHERE clause, resulting in a faster query plan. (check-in: 47cbb471d0 user: drh tags: trunk) | |
16:46 | Additional comments on the sqlite3ExprIsConstantOrGroupBy() routine. No code changes. (Closed-Leaf check-in: 8424492eac user: drh tags: having-where-optimization) | |
2017-05-01
| ||
19:53 | Remove an unnecessary branch. (check-in: a33179596f user: drh tags: having-where-optimization) | |
2017-04-29
| ||
20:53 | Automatically transfer terms from the HAVING clause to the WHERE clause of an aggregate query in cases where the result of evaluating the term depends only one one or more of the GROUP BY expressions (and on no other inputs). (check-in: 5375a3ce56 user: dan tags: having-where-optimization) | |
18:02 | Improvements to opcode documentation in the bytecode engine. No changes to code. (check-in: e54c9f8db5 user: drh tags: trunk) | |
15:27 | Evaluate WHERE clause terms that reference only the index before evaluating terms that require the table, and thereby avoid seeking the table row if index terms are false. This is called the "push-down" optimization in the MySQL world, we are told. Do not confuse with [/info/6df18e949d367629|WHERE-clause push-down]. (check-in: d7bb79ed3a user: drh tags: trunk) | |
14:56 | Minor size and performance improvements to the push-down optimization. (Closed-Leaf check-in: 91dfb61a1a user: drh tags: pushdown-optimization) | |
2017-04-26
| ||
17:21 | Add new test file cachespill.test. (check-in: 2d0b64316d user: dan tags: trunk) | |
Changes to src/expr.c.
︙ | ︙ | |||
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 | ** expression must not refer to any non-deterministic function nor any ** table other than iCur. */ int sqlite3ExprIsTableConstant(Expr *p, int iCur){ return exprIsConst(p, 3, iCur); } /* ** Walk an expression tree. Return non-zero if the expression is constant ** or a function call with constant arguments. Return and 0 if there ** are any variables. ** ** For the purposes of this function, a double-quoted string (ex: "abc") ** is considered a variable but a single-quoted string (ex: 'abc') is | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 | ** expression must not refer to any non-deterministic function nor any ** table other than iCur. */ int sqlite3ExprIsTableConstant(Expr *p, int iCur){ return exprIsConst(p, 3, iCur); } /* ** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy(). */ static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){ ExprList *pGroupBy = pWalker->u.pGroupBy; int i; /* Check if pExpr is identical to any GROUP BY term. If so, consider ** it constant. */ for(i=0; i<pGroupBy->nExpr; i++){ Expr *p = pGroupBy->a[i].pExpr; if( sqlite3ExprCompare(pExpr, p, -1)<2 ){ CollSeq *pColl = sqlite3ExprCollSeq(pWalker->pParse, p); if( pColl==0 || sqlite3_stricmp("BINARY", pColl->zName)==0 ){ return WRC_Prune; } } } /* Check if pExpr is a sub-select. If so, consider it variable. */ if( ExprHasProperty(pExpr, EP_xIsSelect) ){ pWalker->eCode = 0; return WRC_Abort; } return exprNodeIsConstant(pWalker, pExpr); } /* ** Walk the expression tree passed as the first argument. Return non-zero ** if the expression consists entirely of constants or copies of terms ** in pGroupBy that sort with the BINARY collation sequence. ** ** This routine is used to determine if a term of the HAVING clause can ** be promoted into the WHERE clause. In order for such a promotion to work, ** the value of the HAVING clause term must be the same for all members of ** a "group". The requirement that the GROUP BY term must be BINARY ** assumes that no other collating sequence will have a finer-grained ** grouping than binary. In other words (A=B COLLATE binary) implies ** A=B in every other collating sequence. The requirement that the ** GROUP BY be BINARY is stricter than necessary. It would also work ** to promote HAVING clauses that use the same alternative collating ** sequence as the GROUP BY term, but that is much harder to check, ** alternative collating sequences are uncommon, and this is only an ** optimization, so we take the easy way out and simply require the ** GROUP BY to use the BINARY collating sequence. */ int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){ Walker w; memset(&w, 0, sizeof(w)); w.eCode = 1; w.xExprCallback = exprNodeIsConstantOrGroupBy; w.u.pGroupBy = pGroupBy; w.pParse = pParse; sqlite3WalkExpr(&w, p); return w.eCode; } /* ** Walk an expression tree. Return non-zero if the expression is constant ** or a function call with constant arguments. Return and 0 if there ** are any variables. ** ** For the purposes of this function, a double-quoted string (ex: "abc") ** is considered a variable but a single-quoted string (ex: 'abc') is |
︙ | ︙ |
Changes to src/select.c.
︙ | ︙ | |||
4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 | pParse->pVdbe, OP_Explain, pParse->iSelectId, 0, 0, zEqp, P4_DYNAMIC ); } } #else # define explainSimpleCount(a,b,c) #endif /* ** Generate code for the SELECT statement given in the p argument. ** ** The results are returned according to the SelectDest structure. ** See comments in sqliteInt.h for further information. ** | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 | pParse->pVdbe, OP_Explain, pParse->iSelectId, 0, 0, zEqp, P4_DYNAMIC ); } } #else # define explainSimpleCount(a,b,c) #endif /* ** Context object for havingToWhereExprCb(). */ struct HavingToWhereCtx { Expr **ppWhere; ExprList *pGroupBy; }; /* ** sqlite3WalkExpr() callback used by havingToWhere(). ** ** If the node passed to the callback is a TK_AND node, return ** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes. ** ** Otherwise, return WRC_Prune. In this case, also check if the ** sub-expression matches the criteria for being moved to the WHERE ** clause. If so, add it to the WHERE clause and replace the sub-expression ** within the HAVING expression with a constant "1". */ static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){ if( pExpr->op!=TK_AND ){ struct HavingToWhereCtx *p = pWalker->u.pHavingCtx; if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, p->pGroupBy) ){ sqlite3 *db = pWalker->pParse->db; Expr *pNew = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[1], 0); if( pNew ){ Expr *pWhere = *(p->ppWhere); SWAP(Expr, *pNew, *pExpr); pNew = sqlite3ExprAnd(db, pWhere, pNew); *(p->ppWhere) = pNew; } } return WRC_Prune; } return WRC_Continue; } /* ** Transfer eligible terms from the HAVING clause of a query, which is ** processed after grouping, to the WHERE clause, which is processed before ** grouping. For example, the query: ** ** SELECT * FROM <tables> WHERE a=? GROUP BY b HAVING b=? AND c=? ** ** can be rewritten as: ** ** SELECT * FROM <tables> WHERE a=? AND b=? GROUP BY b HAVING c=? ** ** A term of the HAVING expression is eligible for transfer if it consists ** entirely of constants and expressions that are also GROUP BY terms that ** use the "BINARY" collation sequence. */ static void havingToWhere( Parse *pParse, ExprList *pGroupBy, Expr *pHaving, Expr **ppWhere ){ struct HavingToWhereCtx sCtx; Walker sWalker; sCtx.ppWhere = ppWhere; sCtx.pGroupBy = pGroupBy; memset(&sWalker, 0, sizeof(sWalker)); sWalker.pParse = pParse; sWalker.xExprCallback = havingToWhereExprCb; sWalker.u.pHavingCtx = &sCtx; sqlite3WalkExpr(&sWalker, pHaving); } /* ** Generate code for the SELECT statement given in the p argument. ** ** The results are returned according to the SelectDest structure. ** See comments in sqliteInt.h for further information. ** |
︙ | ︙ | |||
5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 | sNC.pAggInfo = &sAggInfo; sAggInfo.mnReg = pParse->nMem+1; sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0; sAggInfo.pGroupBy = pGroupBy; sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); if( pHaving ){ sqlite3ExprAnalyzeAggregates(&sNC, pHaving); } sAggInfo.nAccumulator = sAggInfo.nColumn; for(i=0; i<sAggInfo.nFunc; i++){ assert( !ExprHasProperty(sAggInfo.aFunc[i].pExpr, EP_xIsSelect) ); sNC.ncFlags |= NC_InAggFunc; sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList); | > > > > > | 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 | sNC.pAggInfo = &sAggInfo; sAggInfo.mnReg = pParse->nMem+1; sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0; sAggInfo.pGroupBy = pGroupBy; sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); if( pHaving ){ if( pGroupBy ){ assert( pWhere==p->pWhere ); havingToWhere(pParse, pGroupBy, pHaving, &p->pWhere); pWhere = p->pWhere; } sqlite3ExprAnalyzeAggregates(&sNC, pHaving); } sAggInfo.nAccumulator = sAggInfo.nColumn; for(i=0; i<sAggInfo.nFunc; i++){ assert( !ExprHasProperty(sAggInfo.aFunc[i].pExpr, EP_xIsSelect) ); sNC.ncFlags |= NC_InAggFunc; sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList); |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3312 3313 3314 3315 3316 3317 3318 | Parse *pParse; /* Parser context. */ int (*xExprCallback)(Walker*, Expr*); /* Callback for expressions */ int (*xSelectCallback)(Walker*,Select*); /* Callback for SELECTs */ void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */ int walkerDepth; /* Number of subqueries */ u8 eCode; /* A small processing code */ union { /* Extra data for callback */ | | | | | | | | | | > > | 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 | Parse *pParse; /* Parser context. */ int (*xExprCallback)(Walker*, Expr*); /* Callback for expressions */ int (*xSelectCallback)(Walker*,Select*); /* Callback for SELECTs */ void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */ int walkerDepth; /* Number of subqueries */ u8 eCode; /* A small processing code */ union { /* Extra data for callback */ NameContext *pNC; /* Naming context */ int n; /* A counter */ int iCur; /* A cursor number */ SrcList *pSrcList; /* FROM clause */ struct SrcCount *pSrcCount; /* Counting column references */ struct CCurHint *pCCurHint; /* Used by codeCursorHint() */ int *aiCol; /* array of column indexes */ struct IdxCover *pIdxCover; /* Check for index coverage */ struct IdxExprTrans *pIdxTrans; /* Convert indexed expr to column */ ExprList *pGroupBy; /* GROUP BY clause */ struct HavingToWhereCtx *pHavingCtx; /* HAVING to WHERE clause ctx */ } u; }; /* Forward declarations */ int sqlite3WalkExpr(Walker*, Expr*); int sqlite3WalkExprList(Walker*, ExprList*); int sqlite3WalkSelect(Walker*, Select*); |
︙ | ︙ | |||
3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 | void sqlite3RollbackTransaction(Parse*); void sqlite3Savepoint(Parse*, int, Token*); void sqlite3CloseSavepoints(sqlite3 *); void sqlite3LeaveMutexAndCloseZombie(sqlite3*); int sqlite3ExprIsConstant(Expr*); int sqlite3ExprIsConstantNotJoin(Expr*); int sqlite3ExprIsConstantOrFunction(Expr*, u8); int sqlite3ExprIsTableConstant(Expr*,int); #ifdef SQLITE_ENABLE_CURSOR_HINTS int sqlite3ExprContainsSubquery(Expr*); #endif int sqlite3ExprIsInteger(Expr*, int*); int sqlite3ExprCanBeNull(const Expr*); int sqlite3ExprNeedsNoAffinityChange(const Expr*, char); | > | 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 | void sqlite3RollbackTransaction(Parse*); void sqlite3Savepoint(Parse*, int, Token*); void sqlite3CloseSavepoints(sqlite3 *); void sqlite3LeaveMutexAndCloseZombie(sqlite3*); int sqlite3ExprIsConstant(Expr*); int sqlite3ExprIsConstantNotJoin(Expr*); int sqlite3ExprIsConstantOrFunction(Expr*, u8); int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*); int sqlite3ExprIsTableConstant(Expr*,int); #ifdef SQLITE_ENABLE_CURSOR_HINTS int sqlite3ExprContainsSubquery(Expr*); #endif int sqlite3ExprIsInteger(Expr*, int*); int sqlite3ExprCanBeNull(const Expr*); int sqlite3ExprNeedsNoAffinityChange(const Expr*, char); |
︙ | ︙ |
Added test/having.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | # 2017 April 30 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Test the HAVING->WHERE optimization. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix having do_execsql_test 1.0 { CREATE TABLE t2(c, d); CREATE TABLE t1(a, b); INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 VALUES(2, 2); INSERT INTO t1 VALUES(1, 3); INSERT INTO t1 VALUES(2, 4); INSERT INTO t1 VALUES(1, 5); INSERT INTO t1 VALUES(2, 6); } {} foreach {tn sql res} { 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" {2 12} 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2 AND sum(b)>10" {2 12} 3 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>12" {} } { do_execsql_test 1.$tn $sql $res } # Run an EXPLAIN command for both SQL statements. Return true if # the outputs are identical, or false otherwise. # proc compare_vdbe {sql1 sql2} { set r1 [list] set r2 [list] db eval "explain $sql1" { lappend r1 $opcode $p1 $p2 $p3 $p4 $p5} db eval "explain $sql2" { lappend r2 $opcode $p1 $p2 $p3 $p4 $p5} return [expr {$r1==$r2}] } proc do_compare_vdbe_test {tn sql1 sql2 res} { uplevel [list do_test $tn [list compare_vdbe $sql1 $sql2] $res] } #------------------------------------------------------------------------- # Test that various statements that are eligible for the optimization # produce the same VDBE code as optimizing by hand does. # foreach {tn sql1 sql2} { 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a" 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>5 AND a=2" "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a HAVING sum(b)>5" 3 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING a=2" "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE binary" 4 { SELECT x,y FROM ( SELECT a AS x, sum(b) AS y FROM t1 GROUP BY a ) WHERE x BETWEEN 8888 AND 9999 } { SELECT x,y FROM ( SELECT a AS x, sum(b) AS y FROM t1 WHERE x BETWEEN 8888 AND 9999 GROUP BY a ) } 5 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING 0" "SELECT a, sum(b) FROM t1 WHERE 0 GROUP BY a COLLATE binary" 6 "SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d HAVING b=d" "SELECT count(*) FROM t1,t2 WHERE a=c AND b=d GROUP BY b, d" 7 { SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d HAVING b=d COLLATE nocase } { SELECT count(*) FROM t1,t2 WHERE a=c AND b=d COLLATE nocase GROUP BY b, d } 8 "SELECT a, sum(b) FROM t1 GROUP BY a||b HAVING substr(a||b, 1, 1)='a'" "SELECT a, sum(b) FROM t1 WHERE substr(a||b, 1, 1)='a' GROUP BY a||b" } { do_compare_vdbe_test 2.$tn $sql1 $sql2 1 } #------------------------------------------------------------------------- # 1: Test that the optimization is only applied if the GROUP BY term # uses BINARY collation. # # 2: Not applied if there is a non-deterministic function in the HAVING # term. # foreach {tn sql1 sql2} { 1 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE nocase HAVING a=2" "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE nocase" 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING randomblob(a)<X'88'" "SELECT a, sum(b) FROM t1 WHERE randomblob(a)<X'88' GROUP BY a" } { do_compare_vdbe_test 3.$tn $sql1 $sql2 0 } #------------------------------------------------------------------------- # Test that non-deterministic functions disqualify a term from being # moved from the HAVING to WHERE clause. # do_execsql_test 4.1 { CREATE TABLE t3(a, b); INSERT INTO t3 VALUES(1, 1); INSERT INTO t3 VALUES(1, 2); INSERT INTO t3 VALUES(1, 3); INSERT INTO t3 VALUES(2, 1); INSERT INTO t3 VALUES(2, 2); INSERT INTO t3 VALUES(2, 3); } proc nondeter {args} { incr ::nondeter_ret expr {$::nondeter_ret % 2} } db func nondeter nondeter set ::nondeter_ret 0 do_execsql_test 4.2 { SELECT a, sum(b) FROM t3 GROUP BY a HAVING nondeter(a) } {1 6} # If the term where moved, the query above would return the same # result as the following. But it does not. # set ::nondeter_ret 0 do_execsql_test 4.3 { SELECT a, sum(b) FROM t3 WHERE nondeter(a) GROUP BY a } {1 4 2 2} finish_test |