/ Check-in [b150902579]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experiments with the optimization of ORDER BY and GROUP BY clauses.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | orderby-planning
Files: files | file ages | folders
SHA1: b150902579d708b454efd5f8750e26a816f7f1a6
User & Date: drh 2014-03-18 15:30:27
Context
2014-03-18
18:59
Adjust the query planner to keep track of the number of ORDER BY terms satisfied. Still doesn't do anything with this information. Some tests fail after this check-in, but all failures are believed to be benign. The failures will be addressed at a later stage. check-in: 59d49b7fc4 user: drh tags: orderby-planning
15:30
Experiments with the optimization of ORDER BY and GROUP BY clauses. check-in: b150902579 user: drh tags: orderby-planning
2014-03-17
15:06
Clean up some obsolete "register" declarations in printf.c. check-in: ecd9d3f945 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  4502   4502       if( i>=nConstraint ){
  4503   4503         pNew->nLTerm = mxTerm+1;
  4504   4504         assert( pNew->nLTerm<=pNew->nLSlot );
  4505   4505         pNew->u.vtab.idxNum = pIdxInfo->idxNum;
  4506   4506         pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr;
  4507   4507         pIdxInfo->needToFreeIdxStr = 0;
  4508   4508         pNew->u.vtab.idxStr = pIdxInfo->idxStr;
  4509         -      pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0)
  4510         -                                     && pIdxInfo->orderByConsumed);
         4509  +      pNew->u.vtab.isOrdered = (i8)(pIdxInfo->orderByConsumed ?
         4510  +                                      pIdxInfo->nOrderBy : 0);
  4511   4511         pNew->rSetup = 0;
  4512   4512         pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost);
  4513   4513         pNew->nOut = sqlite3LogEst(pIdxInfo->estimatedRows);
  4514   4514         whereLoopInsert(pBuilder, pNew);
  4515   4515         if( pNew->u.vtab.needFree ){
  4516   4516           sqlite3_free(pNew->u.vtab.idxStr);
  4517   4517           pNew->u.vtab.needFree = 0;
................................................................................
  4664   4664     whereLoopClear(db, pNew);
  4665   4665     return rc;
  4666   4666   }
  4667   4667   
  4668   4668   /*
  4669   4669   ** Examine a WherePath (with the addition of the extra WhereLoop of the 5th
  4670   4670   ** parameters) to see if it outputs rows in the requested ORDER BY
  4671         -** (or GROUP BY) without requiring a separate sort operation.  Return:
         4671  +** (or GROUP BY) without requiring a separate sort operation.  Return N:
  4672   4672   ** 
  4673         -**    0:  ORDER BY is not satisfied.  Sorting required
  4674         -**    1:  ORDER BY is satisfied.      Omit sorting
  4675         -**   -1:  Unknown at this time
         4673  +**   N>0:   N terms of the ORDER BY clause are satisfied
         4674  +**   N==0:  No terms of the ORDER BY clause are satisfied
         4675  +**   N<0:   Unknown yet how many terms of ORDER BY might be satisfied.   
  4676   4676   **
  4677   4677   ** Note that processing for WHERE_GROUPBY and WHERE_DISTINCTBY is not as
  4678   4678   ** strict.  With GROUP BY and DISTINCT the only requirement is that
  4679   4679   ** equivalent rows appear immediately adjacent to one another.  GROUP BY
  4680   4680   ** and DISTINT do not require rows to appear in any particular order as long
  4681   4681   ** as equivelent rows are grouped together.  Thus for GROUP BY and DISTINCT
  4682   4682   ** the pOrderBy terms can be matched in any order.  With ORDER BY, the 
  4683   4683   ** pOrderBy terms must be matched in strict left-to-right order.
  4684   4684   */
  4685         -static int wherePathSatisfiesOrderBy(
         4685  +static i8 wherePathSatisfiesOrderBy(
  4686   4686     WhereInfo *pWInfo,    /* The WHERE clause */
  4687   4687     ExprList *pOrderBy,   /* ORDER BY or GROUP BY or DISTINCT clause to check */
  4688   4688     WherePath *pPath,     /* The WherePath to check */
  4689   4689     u16 wctrlFlags,       /* Might contain WHERE_GROUPBY or WHERE_DISTINCTBY */
  4690   4690     u16 nLoop,            /* Number of entries in pPath->aLoop[] */
  4691   4691     WhereLoop *pLast,     /* Add this WhereLoop to the end of pPath->aLoop[] */
  4692   4692     Bitmask *pRevMask     /* OUT: Mask of WhereLoops to run in reverse order */
................................................................................
  4911   4911           if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue;
  4912   4912           if( (mTerm&~orderDistinctMask)==0 ){
  4913   4913             obSat |= MASKBIT(i);
  4914   4914           }
  4915   4915         }
  4916   4916       }
  4917   4917     } /* End the loop over all WhereLoops from outer-most down to inner-most */
  4918         -  if( obSat==obDone ) return 1;
         4918  +  if( obSat==obDone ) return nOrderBy;
  4919   4919     if( !isOrderDistinct ) return 0;
  4920   4920     return -1;
  4921   4921   }
  4922   4922   
  4923   4923   #ifdef WHERETRACE_ENABLED
  4924   4924   /* For debugging use only: */
  4925   4925   static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
................................................................................
  4997   4997     aFrom[0].nRow = MIN(pParse->nQueryLoop, 46);  assert( 46==sqlite3LogEst(25) );
  4998   4998     nFrom = 1;
  4999   4999   
  5000   5000     /* Precompute the cost of sorting the final result set, if the caller
  5001   5001     ** to sqlite3WhereBegin() was concerned about sorting */
  5002   5002     rSortCost = 0;
  5003   5003     if( pWInfo->pOrderBy==0 || nRowEst==0 ){
  5004         -    aFrom[0].isOrderedValid = 1;
         5004  +    aFrom[0].isOrdered = 0;
  5005   5005     }else{
  5006   5006       /* TUNING: Estimated cost of sorting is 48*N*log2(N) where N is the
  5007   5007       ** number of output rows. The 48 is the expected size of a row to sort. 
  5008   5008       ** FIXME:  compute a better estimate of the 48 multiplier based on the
  5009   5009       ** result set expressions. */
         5010  +    aFrom[0].isOrdered = -1;
  5010   5011       rSortCost = nRowEst + estLog(nRowEst);
  5011   5012       WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost));
  5012   5013     }
  5013   5014   
  5014   5015     /* Compute successively longer WherePaths using the previous generation
  5015   5016     ** of WherePaths as the basis for the next.  Keep track of the mxChoice
  5016   5017     ** best paths at each generation */
  5017   5018     for(iLoop=0; iLoop<nLoop; iLoop++){
  5018   5019       nTo = 0;
  5019   5020       for(ii=0, pFrom=aFrom; ii<nFrom; ii++, pFrom++){
  5020   5021         for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
  5021   5022           Bitmask maskNew;
  5022   5023           Bitmask revMask = 0;
  5023         -        u8 isOrderedValid = pFrom->isOrderedValid;
  5024         -        u8 isOrdered = pFrom->isOrdered;
         5024  +        i8 isOrdered = pFrom->isOrdered;
  5025   5025           if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue;
  5026   5026           if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue;
  5027   5027           /* At this point, pWLoop is a candidate to be the next loop. 
  5028   5028           ** Compute its cost */
  5029   5029           rCost = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow);
  5030   5030           rCost = sqlite3LogEstAdd(rCost, pFrom->rCost);
  5031   5031           nOut = pFrom->nRow + pWLoop->nOut;
  5032   5032           maskNew = pFrom->maskLoop | pWLoop->maskSelf;
  5033         -        if( !isOrderedValid ){
  5034         -          switch( wherePathSatisfiesOrderBy(pWInfo,
         5033  +        if( isOrdered<0 ){
         5034  +          isOrdered = wherePathSatisfiesOrderBy(pWInfo,
  5035   5035                          pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
  5036         -                       iLoop, pWLoop, &revMask) ){
  5037         -            case 1:  /* Yes.  pFrom+pWLoop does satisfy the ORDER BY clause */
  5038         -              isOrdered = 1;
  5039         -              isOrderedValid = 1;
  5040         -              break;
  5041         -            case 0:  /* No.  pFrom+pWLoop will require a separate sort */
  5042         -              isOrdered = 0;
  5043         -              isOrderedValid = 1;
  5044         -              rCost = sqlite3LogEstAdd(rCost, rSortCost);
  5045         -              break;
  5046         -            default: /* Cannot tell yet.  Try again on the next iteration */
  5047         -              break;
         5036  +                       iLoop, pWLoop, &revMask);
         5037  +          if( isOrdered==0 ){
         5038  +            rCost = sqlite3LogEstAdd(rCost, rSortCost);
  5048   5039             }
  5049   5040           }else{
  5050   5041             revMask = pFrom->revLoop;
  5051   5042           }
  5052   5043           /* Check to see if pWLoop should be added to the mxChoice best so far */
  5053   5044           for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){
  5054   5045             if( pTo->maskLoop==maskNew
  5055         -           && pTo->isOrderedValid==isOrderedValid
         5046  +           && ((pTo->isOrdered^isOrdered)&80)==0
  5056   5047              && ((pTo->rCost<=rCost && pTo->nRow<=nOut) ||
  5057   5048                   (pTo->rCost>=rCost && pTo->nRow>=nOut))
  5058   5049             ){
  5059   5050               testcase( jj==nTo-1 );
  5060   5051               break;
  5061   5052             }
  5062   5053           }
  5063   5054           if( jj>=nTo ){
  5064   5055             if( nTo>=mxChoice && rCost>=mxCost ){
  5065   5056   #ifdef WHERETRACE_ENABLED /* 0x4 */
  5066   5057               if( sqlite3WhereTrace&0x4 ){
  5067   5058                 sqlite3DebugPrintf("Skip   %s cost=%-3d,%3d order=%c\n",
  5068   5059                     wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5069         -                  isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
         5060  +                  isOrdered>=0 ? isOrdered+'0' : '?');
  5070   5061               }
  5071   5062   #endif
  5072   5063               continue;
  5073   5064             }
  5074   5065             /* Add a new Path to the aTo[] set */
  5075   5066             if( nTo<mxChoice ){
  5076   5067               /* Increase the size of the aTo set by one */
................................................................................
  5080   5071               jj = mxI;
  5081   5072             }
  5082   5073             pTo = &aTo[jj];
  5083   5074   #ifdef WHERETRACE_ENABLED /* 0x4 */
  5084   5075             if( sqlite3WhereTrace&0x4 ){
  5085   5076               sqlite3DebugPrintf("New    %s cost=%-3d,%3d order=%c\n",
  5086   5077                   wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5087         -                isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
         5078  +                isOrdered>=0 ? isOrdered+'0' : '?');
  5088   5079             }
  5089   5080   #endif
  5090   5081           }else{
  5091   5082             if( pTo->rCost<=rCost && pTo->nRow<=nOut ){
  5092   5083   #ifdef WHERETRACE_ENABLED /* 0x4 */
  5093   5084               if( sqlite3WhereTrace&0x4 ){
  5094   5085                 sqlite3DebugPrintf(
  5095   5086                     "Skip   %s cost=%-3d,%3d order=%c",
  5096   5087                     wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5097         -                  isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
         5088  +                  isOrdered>=0 ? isOrdered+'0' : '?');
  5098   5089                 sqlite3DebugPrintf("   vs %s cost=%-3d,%d order=%c\n",
  5099   5090                     wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
  5100         -                  pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
         5091  +                  pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
  5101   5092               }
  5102   5093   #endif
  5103   5094               testcase( pTo->rCost==rCost );
  5104   5095               continue;
  5105   5096             }
  5106   5097             testcase( pTo->rCost==rCost+1 );
  5107   5098             /* A new and better score for a previously created equivalent path */
  5108   5099   #ifdef WHERETRACE_ENABLED /* 0x4 */
  5109   5100             if( sqlite3WhereTrace&0x4 ){
  5110   5101               sqlite3DebugPrintf(
  5111   5102                   "Update %s cost=%-3d,%3d order=%c",
  5112   5103                   wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5113         -                isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
         5104  +                isOrdered>=0 ? isOrdered+'0' : '?');
  5114   5105               sqlite3DebugPrintf("  was %s cost=%-3d,%3d order=%c\n",
  5115   5106                   wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
  5116         -                pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
         5107  +                pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
  5117   5108             }
  5118   5109   #endif
  5119   5110           }
  5120   5111           /* pWLoop is a winner.  Add it to the set of best so far */
  5121   5112           pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf;
  5122   5113           pTo->revLoop = revMask;
  5123   5114           pTo->nRow = nOut;
  5124   5115           pTo->rCost = rCost;
  5125         -        pTo->isOrderedValid = isOrderedValid;
  5126   5116           pTo->isOrdered = isOrdered;
  5127   5117           memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop);
  5128   5118           pTo->aLoop[iLoop] = pWLoop;
  5129   5119           if( nTo>=mxChoice ){
  5130   5120             mxI = 0;
  5131   5121             mxCost = aTo[0].rCost;
  5132   5122             mxOut = aTo[0].nRow;
................................................................................
  5143   5133   
  5144   5134   #ifdef WHERETRACE_ENABLED  /* >=2 */
  5145   5135       if( sqlite3WhereTrace>=2 ){
  5146   5136         sqlite3DebugPrintf("---- after round %d ----\n", iLoop);
  5147   5137         for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){
  5148   5138           sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c",
  5149   5139              wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
  5150         -           pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
  5151         -        if( pTo->isOrderedValid && pTo->isOrdered ){
         5140  +           pTo->isOrdered>=0 ? (pTo->isOrdered+'0') : '?');
         5141  +        if( pTo->isOrdered>0 ){
  5152   5142             sqlite3DebugPrintf(" rev=0x%llx\n", pTo->revLoop);
  5153   5143           }else{
  5154   5144             sqlite3DebugPrintf("\n");
  5155   5145           }
  5156   5146         }
  5157   5147       }
  5158   5148   #endif
................................................................................
  5187   5177      && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0
  5188   5178      && pWInfo->eDistinct==WHERE_DISTINCT_NOOP
  5189   5179      && nRowEst
  5190   5180     ){
  5191   5181       Bitmask notUsed;
  5192   5182       int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom,
  5193   5183                    WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], &notUsed);
  5194         -    if( rc==1 ) pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
         5184  +    if( rc==pWInfo->pResultSet->nExpr ){
         5185  +      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
         5186  +    }
  5195   5187     }
  5196         -  if( pFrom->isOrdered ){
         5188  +  if( pWInfo->pOrderBy && pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){
  5197   5189       if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){
  5198   5190         pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
  5199   5191       }else{
  5200   5192         pWInfo->bOBSat = 1;
  5201   5193         pWInfo->revMask = pFrom->revLoop;
  5202   5194       }
  5203   5195     }
................................................................................
  5382   5374   ** be used to compute the appropriate cursor depending on which index is
  5383   5375   ** used.
  5384   5376   */
  5385   5377   WhereInfo *sqlite3WhereBegin(
  5386   5378     Parse *pParse,        /* The parser context */
  5387   5379     SrcList *pTabList,    /* FROM clause: A list of all tables to be scanned */
  5388   5380     Expr *pWhere,         /* The WHERE clause */
  5389         -  ExprList *pOrderBy,   /* An ORDER BY clause, or NULL */
         5381  +  ExprList *pOrderBy,   /* An ORDER BY (or GROUP BY) clause, or NULL */
  5390   5382     ExprList *pResultSet, /* Result set of the query */
  5391   5383     u16 wctrlFlags,       /* One of the WHERE_* flags defined in sqliteInt.h */
  5392   5384     int iIdxCur           /* If WHERE_ONETABLE_ONLY is set, index cursor number */
  5393   5385   ){
  5394   5386     int nByteWInfo;            /* Num. bytes allocated for WhereInfo struct */
  5395   5387     int nTabList;              /* Number of elements in pTabList */
  5396   5388     WhereInfo *pWInfo;         /* Will become the return value of this function */
................................................................................
  5404   5396     sqlite3 *db;               /* Database connection */
  5405   5397     int rc;                    /* Return code */
  5406   5398   
  5407   5399   
  5408   5400     /* Variable initialization */
  5409   5401     db = pParse->db;
  5410   5402     memset(&sWLB, 0, sizeof(sWLB));
         5403  +
         5404  +  /* An ORDER/GROUP BY clause of more than 63 terms cannot be optimized */
         5405  +  testcase( pOrderBy && pOrderBy->nExpr==BMS-1 );
         5406  +  if( pOrderBy && pOrderBy->nExpr>=BMS ) pOrderBy = 0;
  5411   5407     sWLB.pOrderBy = pOrderBy;
  5412   5408   
  5413   5409     /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via
  5414   5410     ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */
  5415   5411     if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){
  5416   5412       wctrlFlags &= ~WHERE_WANT_DISTINCT;
  5417   5413     }

Changes to src/whereInt.h.

   117    117         u16 nEq;               /* Number of equality constraints */
   118    118         u16 nSkip;             /* Number of initial index columns to skip */
   119    119         Index *pIndex;         /* Index used, or NULL */
   120    120       } btree;
   121    121       struct {               /* Information for virtual tables */
   122    122         int idxNum;            /* Index number */
   123    123         u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
   124         -      u8 isOrdered;          /* True if satisfies ORDER BY */
          124  +      i8 isOrdered;          /* True if satisfies ORDER BY */
   125    125         u16 omitMask;          /* Terms that may be omitted */
   126    126         char *idxStr;          /* Index identifier string */
   127    127       } vtab;
   128    128     } u;
   129    129     u32 wsFlags;          /* WHERE_* flags describing the plan */
   130    130     u16 nLTerm;           /* Number of entries in aLTerm[] */
   131    131     /**** whereLoopXfer() copies fields above ***********************/
................................................................................
   179    179   ** at the end is the choosen query plan.
   180    180   */
   181    181   struct WherePath {
   182    182     Bitmask maskLoop;     /* Bitmask of all WhereLoop objects in this path */
   183    183     Bitmask revLoop;      /* aLoop[]s that should be reversed for ORDER BY */
   184    184     LogEst nRow;          /* Estimated number of rows generated by this path */
   185    185     LogEst rCost;         /* Total cost of this path */
   186         -  u8 isOrdered;         /* True if this path satisfies ORDER BY */
   187         -  u8 isOrderedValid;    /* True if the isOrdered field is valid */
          186  +  i8 isOrdered;         /* No. of ORDER BY terms satisfied. -1 for unknown */
   188    187     WhereLoop **aLoop;    /* Array of WhereLoop objects implementing this path */
   189    188   };
   190    189   
   191    190   /*
   192    191   ** The query generator uses an array of instances of this structure to
   193    192   ** help it analyze the subexpressions of the WHERE clause.  Each WHERE
   194    193   ** clause subexpression is separated from the others by AND operators,