Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Experiments with the optimization of ORDER BY and GROUP BY clauses. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | orderby-planning |
Files: | files | file ages | folders |
SHA1: |
b150902579d708b454efd5f8750e26a8 |
User & Date: | drh 2014-03-18 15:30:27.660 |
Context
2014-03-18
| ||
18:59 | Adjust the query planner to keep track of the number of ORDER BY terms satisfied. Still doesn't do anything with this information. Some tests fail after this check-in, but all failures are believed to be benign. The failures will be addressed at a later stage. (check-in: 59d49b7fc4 user: drh tags: orderby-planning) | |
15:30 | Experiments with the optimization of ORDER BY and GROUP BY clauses. (check-in: b150902579 user: drh tags: orderby-planning) | |
2014-03-17
| ||
15:06 | Clean up some obsolete "register" declarations in printf.c. (check-in: ecd9d3f945 user: drh tags: trunk) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
4502 4503 4504 4505 4506 4507 4508 | if( i>=nConstraint ){ pNew->nLTerm = mxTerm+1; assert( pNew->nLTerm<=pNew->nLSlot ); pNew->u.vtab.idxNum = pIdxInfo->idxNum; pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr; pIdxInfo->needToFreeIdxStr = 0; pNew->u.vtab.idxStr = pIdxInfo->idxStr; | | | | 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 | if( i>=nConstraint ){ pNew->nLTerm = mxTerm+1; assert( pNew->nLTerm<=pNew->nLSlot ); pNew->u.vtab.idxNum = pIdxInfo->idxNum; pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr; pIdxInfo->needToFreeIdxStr = 0; pNew->u.vtab.idxStr = pIdxInfo->idxStr; pNew->u.vtab.isOrdered = (i8)(pIdxInfo->orderByConsumed ? pIdxInfo->nOrderBy : 0); pNew->rSetup = 0; pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost); pNew->nOut = sqlite3LogEst(pIdxInfo->estimatedRows); whereLoopInsert(pBuilder, pNew); if( pNew->u.vtab.needFree ){ sqlite3_free(pNew->u.vtab.idxStr); pNew->u.vtab.needFree = 0; |
︙ | ︙ | |||
4664 4665 4666 4667 4668 4669 4670 | whereLoopClear(db, pNew); return rc; } /* ** Examine a WherePath (with the addition of the extra WhereLoop of the 5th ** parameters) to see if it outputs rows in the requested ORDER BY | | | | | | | 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 | whereLoopClear(db, pNew); return rc; } /* ** Examine a WherePath (with the addition of the extra WhereLoop of the 5th ** parameters) to see if it outputs rows in the requested ORDER BY ** (or GROUP BY) without requiring a separate sort operation. Return N: ** ** N>0: N terms of the ORDER BY clause are satisfied ** N==0: No terms of the ORDER BY clause are satisfied ** N<0: Unknown yet how many terms of ORDER BY might be satisfied. ** ** Note that processing for WHERE_GROUPBY and WHERE_DISTINCTBY is not as ** strict. With GROUP BY and DISTINCT the only requirement is that ** equivalent rows appear immediately adjacent to one another. GROUP BY ** and DISTINT do not require rows to appear in any particular order as long ** as equivelent rows are grouped together. Thus for GROUP BY and DISTINCT ** the pOrderBy terms can be matched in any order. With ORDER BY, the ** pOrderBy terms must be matched in strict left-to-right order. */ static i8 wherePathSatisfiesOrderBy( WhereInfo *pWInfo, /* The WHERE clause */ ExprList *pOrderBy, /* ORDER BY or GROUP BY or DISTINCT clause to check */ WherePath *pPath, /* The WherePath to check */ u16 wctrlFlags, /* Might contain WHERE_GROUPBY or WHERE_DISTINCTBY */ u16 nLoop, /* Number of entries in pPath->aLoop[] */ WhereLoop *pLast, /* Add this WhereLoop to the end of pPath->aLoop[] */ Bitmask *pRevMask /* OUT: Mask of WhereLoops to run in reverse order */ |
︙ | ︙ | |||
4911 4912 4913 4914 4915 4916 4917 | if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue; if( (mTerm&~orderDistinctMask)==0 ){ obSat |= MASKBIT(i); } } } } /* End the loop over all WhereLoops from outer-most down to inner-most */ | | | 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 | if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue; if( (mTerm&~orderDistinctMask)==0 ){ obSat |= MASKBIT(i); } } } } /* End the loop over all WhereLoops from outer-most down to inner-most */ if( obSat==obDone ) return nOrderBy; if( !isOrderDistinct ) return 0; return -1; } #ifdef WHERETRACE_ENABLED /* For debugging use only: */ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){ |
︙ | ︙ | |||
4997 4998 4999 5000 5001 5002 5003 | aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==sqlite3LogEst(25) ); nFrom = 1; /* Precompute the cost of sorting the final result set, if the caller ** to sqlite3WhereBegin() was concerned about sorting */ rSortCost = 0; if( pWInfo->pOrderBy==0 || nRowEst==0 ){ | | > < | | | | < | < < < < < < < < | | | | | | | < | 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 | aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==sqlite3LogEst(25) ); nFrom = 1; /* Precompute the cost of sorting the final result set, if the caller ** to sqlite3WhereBegin() was concerned about sorting */ rSortCost = 0; if( pWInfo->pOrderBy==0 || nRowEst==0 ){ aFrom[0].isOrdered = 0; }else{ /* TUNING: Estimated cost of sorting is 48*N*log2(N) where N is the ** number of output rows. The 48 is the expected size of a row to sort. ** FIXME: compute a better estimate of the 48 multiplier based on the ** result set expressions. */ aFrom[0].isOrdered = -1; rSortCost = nRowEst + estLog(nRowEst); WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost)); } /* Compute successively longer WherePaths using the previous generation ** of WherePaths as the basis for the next. Keep track of the mxChoice ** best paths at each generation */ for(iLoop=0; iLoop<nLoop; iLoop++){ nTo = 0; for(ii=0, pFrom=aFrom; ii<nFrom; ii++, pFrom++){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ Bitmask maskNew; Bitmask revMask = 0; i8 isOrdered = pFrom->isOrdered; if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue; if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue; /* At this point, pWLoop is a candidate to be the next loop. ** Compute its cost */ rCost = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); rCost = sqlite3LogEstAdd(rCost, pFrom->rCost); nOut = pFrom->nRow + pWLoop->nOut; maskNew = pFrom->maskLoop | pWLoop->maskSelf; if( isOrdered<0 ){ isOrdered = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, iLoop, pWLoop, &revMask); if( isOrdered==0 ){ rCost = sqlite3LogEstAdd(rCost, rSortCost); } }else{ revMask = pFrom->revLoop; } /* Check to see if pWLoop should be added to the mxChoice best so far */ for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){ if( pTo->maskLoop==maskNew && ((pTo->isOrdered^isOrdered)&80)==0 && ((pTo->rCost<=rCost && pTo->nRow<=nOut) || (pTo->rCost>=rCost && pTo->nRow>=nOut)) ){ testcase( jj==nTo-1 ); break; } } if( jj>=nTo ){ if( nTo>=mxChoice && rCost>=mxCost ){ #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf("Skip %s cost=%-3d,%3d order=%c\n", wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrdered>=0 ? isOrdered+'0' : '?'); } #endif continue; } /* Add a new Path to the aTo[] set */ if( nTo<mxChoice ){ /* Increase the size of the aTo set by one */ jj = nTo++; }else{ /* New path replaces the prior worst to keep count below mxChoice */ jj = mxI; } pTo = &aTo[jj]; #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf("New %s cost=%-3d,%3d order=%c\n", wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrdered>=0 ? isOrdered+'0' : '?'); } #endif }else{ if( pTo->rCost<=rCost && pTo->nRow<=nOut ){ #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf( "Skip %s cost=%-3d,%3d order=%c", wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrdered>=0 ? isOrdered+'0' : '?'); sqlite3DebugPrintf(" vs %s cost=%-3d,%d order=%c\n", wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?'); } #endif testcase( pTo->rCost==rCost ); continue; } testcase( pTo->rCost==rCost+1 ); /* A new and better score for a previously created equivalent path */ #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf( "Update %s cost=%-3d,%3d order=%c", wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrdered>=0 ? isOrdered+'0' : '?'); sqlite3DebugPrintf(" was %s cost=%-3d,%3d order=%c\n", wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?'); } #endif } /* pWLoop is a winner. Add it to the set of best so far */ pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf; pTo->revLoop = revMask; pTo->nRow = nOut; pTo->rCost = rCost; pTo->isOrdered = isOrdered; memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop); pTo->aLoop[iLoop] = pWLoop; if( nTo>=mxChoice ){ mxI = 0; mxCost = aTo[0].rCost; mxOut = aTo[0].nRow; |
︙ | ︙ | |||
5143 5144 5145 5146 5147 5148 5149 | #ifdef WHERETRACE_ENABLED /* >=2 */ if( sqlite3WhereTrace>=2 ){ sqlite3DebugPrintf("---- after round %d ----\n", iLoop); for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){ sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c", wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, | | | | 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 | #ifdef WHERETRACE_ENABLED /* >=2 */ if( sqlite3WhereTrace>=2 ){ sqlite3DebugPrintf("---- after round %d ----\n", iLoop); for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){ sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c", wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, pTo->isOrdered>=0 ? (pTo->isOrdered+'0') : '?'); if( pTo->isOrdered>0 ){ sqlite3DebugPrintf(" rev=0x%llx\n", pTo->revLoop); }else{ sqlite3DebugPrintf("\n"); } } } #endif |
︙ | ︙ | |||
5187 5188 5189 5190 5191 5192 5193 | && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0 && pWInfo->eDistinct==WHERE_DISTINCT_NOOP && nRowEst ){ Bitmask notUsed; int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom, WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], ¬Used); | > | > | | 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 | && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0 && pWInfo->eDistinct==WHERE_DISTINCT_NOOP && nRowEst ){ Bitmask notUsed; int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom, WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], ¬Used); if( rc==pWInfo->pResultSet->nExpr ){ pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; } } if( pWInfo->pOrderBy && pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){ if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){ pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; }else{ pWInfo->bOBSat = 1; pWInfo->revMask = pFrom->revLoop; } } |
︙ | ︙ | |||
5382 5383 5384 5385 5386 5387 5388 | ** be used to compute the appropriate cursor depending on which index is ** used. */ WhereInfo *sqlite3WhereBegin( Parse *pParse, /* The parser context */ SrcList *pTabList, /* FROM clause: A list of all tables to be scanned */ Expr *pWhere, /* The WHERE clause */ | | | 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 | ** be used to compute the appropriate cursor depending on which index is ** used. */ WhereInfo *sqlite3WhereBegin( Parse *pParse, /* The parser context */ SrcList *pTabList, /* FROM clause: A list of all tables to be scanned */ Expr *pWhere, /* The WHERE clause */ ExprList *pOrderBy, /* An ORDER BY (or GROUP BY) clause, or NULL */ ExprList *pResultSet, /* Result set of the query */ u16 wctrlFlags, /* One of the WHERE_* flags defined in sqliteInt.h */ int iIdxCur /* If WHERE_ONETABLE_ONLY is set, index cursor number */ ){ int nByteWInfo; /* Num. bytes allocated for WhereInfo struct */ int nTabList; /* Number of elements in pTabList */ WhereInfo *pWInfo; /* Will become the return value of this function */ |
︙ | ︙ | |||
5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 | sqlite3 *db; /* Database connection */ int rc; /* Return code */ /* Variable initialization */ db = pParse->db; memset(&sWLB, 0, sizeof(sWLB)); sWLB.pOrderBy = pOrderBy; /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */ if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){ wctrlFlags &= ~WHERE_WANT_DISTINCT; } | > > > > | 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 | sqlite3 *db; /* Database connection */ int rc; /* Return code */ /* Variable initialization */ db = pParse->db; memset(&sWLB, 0, sizeof(sWLB)); /* An ORDER/GROUP BY clause of more than 63 terms cannot be optimized */ testcase( pOrderBy && pOrderBy->nExpr==BMS-1 ); if( pOrderBy && pOrderBy->nExpr>=BMS ) pOrderBy = 0; sWLB.pOrderBy = pOrderBy; /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */ if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){ wctrlFlags &= ~WHERE_WANT_DISTINCT; } |
︙ | ︙ |
Changes to src/whereInt.h.
︙ | ︙ | |||
117 118 119 120 121 122 123 | u16 nEq; /* Number of equality constraints */ u16 nSkip; /* Number of initial index columns to skip */ Index *pIndex; /* Index used, or NULL */ } btree; struct { /* Information for virtual tables */ int idxNum; /* Index number */ u8 needFree; /* True if sqlite3_free(idxStr) is needed */ | | | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | u16 nEq; /* Number of equality constraints */ u16 nSkip; /* Number of initial index columns to skip */ Index *pIndex; /* Index used, or NULL */ } btree; struct { /* Information for virtual tables */ int idxNum; /* Index number */ u8 needFree; /* True if sqlite3_free(idxStr) is needed */ i8 isOrdered; /* True if satisfies ORDER BY */ u16 omitMask; /* Terms that may be omitted */ char *idxStr; /* Index identifier string */ } vtab; } u; u32 wsFlags; /* WHERE_* flags describing the plan */ u16 nLTerm; /* Number of entries in aLTerm[] */ /**** whereLoopXfer() copies fields above ***********************/ |
︙ | ︙ | |||
179 180 181 182 183 184 185 | ** at the end is the choosen query plan. */ struct WherePath { Bitmask maskLoop; /* Bitmask of all WhereLoop objects in this path */ Bitmask revLoop; /* aLoop[]s that should be reversed for ORDER BY */ LogEst nRow; /* Estimated number of rows generated by this path */ LogEst rCost; /* Total cost of this path */ | | < | 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | ** at the end is the choosen query plan. */ struct WherePath { Bitmask maskLoop; /* Bitmask of all WhereLoop objects in this path */ Bitmask revLoop; /* aLoop[]s that should be reversed for ORDER BY */ LogEst nRow; /* Estimated number of rows generated by this path */ LogEst rCost; /* Total cost of this path */ i8 isOrdered; /* No. of ORDER BY terms satisfied. -1 for unknown */ WhereLoop **aLoop; /* Array of WhereLoop objects implementing this path */ }; /* ** The query generator uses an array of instances of this structure to ** help it analyze the subexpressions of the WHERE clause. Each WHERE ** clause subexpression is separated from the others by AND operators, |
︙ | ︙ |