SQLite

Check-in [b150902579]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experiments with the optimization of ORDER BY and GROUP BY clauses.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | orderby-planning
Files: files | file ages | folders
SHA1: b150902579d708b454efd5f8750e26a816f7f1a6
User & Date: drh 2014-03-18 15:30:27.660
Context
2014-03-18
18:59
Adjust the query planner to keep track of the number of ORDER BY terms satisfied. Still doesn't do anything with this information. Some tests fail after this check-in, but all failures are believed to be benign. The failures will be addressed at a later stage. (check-in: 59d49b7fc4 user: drh tags: orderby-planning)
15:30
Experiments with the optimization of ORDER BY and GROUP BY clauses. (check-in: b150902579 user: drh tags: orderby-planning)
2014-03-17
15:06
Clean up some obsolete "register" declarations in printf.c. (check-in: ecd9d3f945 user: drh tags: trunk)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to src/where.c.
4502
4503
4504
4505
4506
4507
4508
4509
4510


4511
4512
4513
4514
4515
4516
4517
4502
4503
4504
4505
4506
4507
4508


4509
4510
4511
4512
4513
4514
4515
4516
4517







-
-
+
+







    if( i>=nConstraint ){
      pNew->nLTerm = mxTerm+1;
      assert( pNew->nLTerm<=pNew->nLSlot );
      pNew->u.vtab.idxNum = pIdxInfo->idxNum;
      pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr;
      pIdxInfo->needToFreeIdxStr = 0;
      pNew->u.vtab.idxStr = pIdxInfo->idxStr;
      pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0)
                                     && pIdxInfo->orderByConsumed);
      pNew->u.vtab.isOrdered = (i8)(pIdxInfo->orderByConsumed ?
                                      pIdxInfo->nOrderBy : 0);
      pNew->rSetup = 0;
      pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost);
      pNew->nOut = sqlite3LogEst(pIdxInfo->estimatedRows);
      whereLoopInsert(pBuilder, pNew);
      if( pNew->u.vtab.needFree ){
        sqlite3_free(pNew->u.vtab.idxStr);
        pNew->u.vtab.needFree = 0;
4664
4665
4666
4667
4668
4669
4670
4671

4672
4673
4674
4675



4676
4677
4678
4679
4680
4681
4682
4683
4684
4685

4686
4687
4688
4689
4690
4691
4692
4664
4665
4666
4667
4668
4669
4670

4671
4672



4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684

4685
4686
4687
4688
4689
4690
4691
4692







-
+

-
-
-
+
+
+









-
+







  whereLoopClear(db, pNew);
  return rc;
}

/*
** Examine a WherePath (with the addition of the extra WhereLoop of the 5th
** parameters) to see if it outputs rows in the requested ORDER BY
** (or GROUP BY) without requiring a separate sort operation.  Return:
** (or GROUP BY) without requiring a separate sort operation.  Return N:
** 
**    0:  ORDER BY is not satisfied.  Sorting required
**    1:  ORDER BY is satisfied.      Omit sorting
**   -1:  Unknown at this time
**   N>0:   N terms of the ORDER BY clause are satisfied
**   N==0:  No terms of the ORDER BY clause are satisfied
**   N<0:   Unknown yet how many terms of ORDER BY might be satisfied.   
**
** Note that processing for WHERE_GROUPBY and WHERE_DISTINCTBY is not as
** strict.  With GROUP BY and DISTINCT the only requirement is that
** equivalent rows appear immediately adjacent to one another.  GROUP BY
** and DISTINT do not require rows to appear in any particular order as long
** as equivelent rows are grouped together.  Thus for GROUP BY and DISTINCT
** the pOrderBy terms can be matched in any order.  With ORDER BY, the 
** pOrderBy terms must be matched in strict left-to-right order.
*/
static int wherePathSatisfiesOrderBy(
static i8 wherePathSatisfiesOrderBy(
  WhereInfo *pWInfo,    /* The WHERE clause */
  ExprList *pOrderBy,   /* ORDER BY or GROUP BY or DISTINCT clause to check */
  WherePath *pPath,     /* The WherePath to check */
  u16 wctrlFlags,       /* Might contain WHERE_GROUPBY or WHERE_DISTINCTBY */
  u16 nLoop,            /* Number of entries in pPath->aLoop[] */
  WhereLoop *pLast,     /* Add this WhereLoop to the end of pPath->aLoop[] */
  Bitmask *pRevMask     /* OUT: Mask of WhereLoops to run in reverse order */
4911
4912
4913
4914
4915
4916
4917
4918

4919
4920
4921
4922
4923
4924
4925
4911
4912
4913
4914
4915
4916
4917

4918
4919
4920
4921
4922
4923
4924
4925







-
+







        if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue;
        if( (mTerm&~orderDistinctMask)==0 ){
          obSat |= MASKBIT(i);
        }
      }
    }
  } /* End the loop over all WhereLoops from outer-most down to inner-most */
  if( obSat==obDone ) return 1;
  if( obSat==obDone ) return nOrderBy;
  if( !isOrderDistinct ) return 0;
  return -1;
}

#ifdef WHERETRACE_ENABLED
/* For debugging use only: */
static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
4997
4998
4999
5000
5001
5002
5003
5004

5005
5006
5007
5008
5009

5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024

5025
5026
5027
5028
5029
5030
5031
5032
5033
5034


5035
5036

5037
5038

5039
5040
5041
5042
5043
5044

5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055

5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069

5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087

5088
5089
5090
5091
5092
5093
5094
5095
5096
5097

5098
5099
5100

5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113

5114
5115
5116

5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
4997
4998
4999
5000
5001
5002
5003

5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023


5024
5025
5026
5027
5028
5029
5030
5031
5032


5033
5034
5035

5036


5037






5038



5039
5040
5041
5042
5043
5044
5045

5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059

5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077

5078
5079
5080
5081
5082
5083
5084
5085
5086
5087

5088
5089
5090

5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103

5104
5105
5106

5107
5108
5109
5110
5111
5112
5113
5114
5115

5116
5117
5118
5119
5120
5121
5122







-
+





+













-
-
+








-
-
+
+

-
+
-
-
+
-
-
-
-
-
-
+
-
-
-







-
+













-
+

















-
+









-
+


-
+












-
+


-
+








-







  aFrom[0].nRow = MIN(pParse->nQueryLoop, 46);  assert( 46==sqlite3LogEst(25) );
  nFrom = 1;

  /* Precompute the cost of sorting the final result set, if the caller
  ** to sqlite3WhereBegin() was concerned about sorting */
  rSortCost = 0;
  if( pWInfo->pOrderBy==0 || nRowEst==0 ){
    aFrom[0].isOrderedValid = 1;
    aFrom[0].isOrdered = 0;
  }else{
    /* TUNING: Estimated cost of sorting is 48*N*log2(N) where N is the
    ** number of output rows. The 48 is the expected size of a row to sort. 
    ** FIXME:  compute a better estimate of the 48 multiplier based on the
    ** result set expressions. */
    aFrom[0].isOrdered = -1;
    rSortCost = nRowEst + estLog(nRowEst);
    WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost));
  }

  /* Compute successively longer WherePaths using the previous generation
  ** of WherePaths as the basis for the next.  Keep track of the mxChoice
  ** best paths at each generation */
  for(iLoop=0; iLoop<nLoop; iLoop++){
    nTo = 0;
    for(ii=0, pFrom=aFrom; ii<nFrom; ii++, pFrom++){
      for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
        Bitmask maskNew;
        Bitmask revMask = 0;
        u8 isOrderedValid = pFrom->isOrderedValid;
        u8 isOrdered = pFrom->isOrdered;
        i8 isOrdered = pFrom->isOrdered;
        if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue;
        if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue;
        /* At this point, pWLoop is a candidate to be the next loop. 
        ** Compute its cost */
        rCost = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow);
        rCost = sqlite3LogEstAdd(rCost, pFrom->rCost);
        nOut = pFrom->nRow + pWLoop->nOut;
        maskNew = pFrom->maskLoop | pWLoop->maskSelf;
        if( !isOrderedValid ){
          switch( wherePathSatisfiesOrderBy(pWInfo,
        if( isOrdered<0 ){
          isOrdered = wherePathSatisfiesOrderBy(pWInfo,
                       pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                       iLoop, pWLoop, &revMask) ){
                       iLoop, pWLoop, &revMask);
            case 1:  /* Yes.  pFrom+pWLoop does satisfy the ORDER BY clause */
              isOrdered = 1;
          if( isOrdered==0 ){
              isOrderedValid = 1;
              break;
            case 0:  /* No.  pFrom+pWLoop will require a separate sort */
              isOrdered = 0;
              isOrderedValid = 1;
              rCost = sqlite3LogEstAdd(rCost, rSortCost);
            rCost = sqlite3LogEstAdd(rCost, rSortCost);
              break;
            default: /* Cannot tell yet.  Try again on the next iteration */
              break;
          }
        }else{
          revMask = pFrom->revLoop;
        }
        /* Check to see if pWLoop should be added to the mxChoice best so far */
        for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){
          if( pTo->maskLoop==maskNew
           && pTo->isOrderedValid==isOrderedValid
           && ((pTo->isOrdered^isOrdered)&80)==0
           && ((pTo->rCost<=rCost && pTo->nRow<=nOut) ||
                (pTo->rCost>=rCost && pTo->nRow>=nOut))
          ){
            testcase( jj==nTo-1 );
            break;
          }
        }
        if( jj>=nTo ){
          if( nTo>=mxChoice && rCost>=mxCost ){
#ifdef WHERETRACE_ENABLED /* 0x4 */
            if( sqlite3WhereTrace&0x4 ){
              sqlite3DebugPrintf("Skip   %s cost=%-3d,%3d order=%c\n",
                  wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
                  isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
                  isOrdered>=0 ? isOrdered+'0' : '?');
            }
#endif
            continue;
          }
          /* Add a new Path to the aTo[] set */
          if( nTo<mxChoice ){
            /* Increase the size of the aTo set by one */
            jj = nTo++;
          }else{
            /* New path replaces the prior worst to keep count below mxChoice */
            jj = mxI;
          }
          pTo = &aTo[jj];
#ifdef WHERETRACE_ENABLED /* 0x4 */
          if( sqlite3WhereTrace&0x4 ){
            sqlite3DebugPrintf("New    %s cost=%-3d,%3d order=%c\n",
                wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
                isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
                isOrdered>=0 ? isOrdered+'0' : '?');
          }
#endif
        }else{
          if( pTo->rCost<=rCost && pTo->nRow<=nOut ){
#ifdef WHERETRACE_ENABLED /* 0x4 */
            if( sqlite3WhereTrace&0x4 ){
              sqlite3DebugPrintf(
                  "Skip   %s cost=%-3d,%3d order=%c",
                  wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
                  isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
                  isOrdered>=0 ? isOrdered+'0' : '?');
              sqlite3DebugPrintf("   vs %s cost=%-3d,%d order=%c\n",
                  wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
                  pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
                  pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
            }
#endif
            testcase( pTo->rCost==rCost );
            continue;
          }
          testcase( pTo->rCost==rCost+1 );
          /* A new and better score for a previously created equivalent path */
#ifdef WHERETRACE_ENABLED /* 0x4 */
          if( sqlite3WhereTrace&0x4 ){
            sqlite3DebugPrintf(
                "Update %s cost=%-3d,%3d order=%c",
                wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
                isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?');
                isOrdered>=0 ? isOrdered+'0' : '?');
            sqlite3DebugPrintf("  was %s cost=%-3d,%3d order=%c\n",
                wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
                pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
                pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
          }
#endif
        }
        /* pWLoop is a winner.  Add it to the set of best so far */
        pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf;
        pTo->revLoop = revMask;
        pTo->nRow = nOut;
        pTo->rCost = rCost;
        pTo->isOrderedValid = isOrderedValid;
        pTo->isOrdered = isOrdered;
        memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop);
        pTo->aLoop[iLoop] = pWLoop;
        if( nTo>=mxChoice ){
          mxI = 0;
          mxCost = aTo[0].rCost;
          mxOut = aTo[0].nRow;
5143
5144
5145
5146
5147
5148
5149
5150
5151


5152
5153
5154
5155
5156
5157
5158
5133
5134
5135
5136
5137
5138
5139


5140
5141
5142
5143
5144
5145
5146
5147
5148







-
-
+
+








#ifdef WHERETRACE_ENABLED  /* >=2 */
    if( sqlite3WhereTrace>=2 ){
      sqlite3DebugPrintf("---- after round %d ----\n", iLoop);
      for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){
        sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c",
           wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
           pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?');
        if( pTo->isOrderedValid && pTo->isOrdered ){
           pTo->isOrdered>=0 ? (pTo->isOrdered+'0') : '?');
        if( pTo->isOrdered>0 ){
          sqlite3DebugPrintf(" rev=0x%llx\n", pTo->revLoop);
        }else{
          sqlite3DebugPrintf("\n");
        }
      }
    }
#endif
5187
5188
5189
5190
5191
5192
5193

5194
5195
5196




5197
5198
5199
5200
5201
5202
5203
5177
5178
5179
5180
5181
5182
5183
5184



5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195







+
-
-
-
+
+
+
+







   && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0
   && pWInfo->eDistinct==WHERE_DISTINCT_NOOP
   && nRowEst
  ){
    Bitmask notUsed;
    int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom,
                 WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], &notUsed);
    if( rc==pWInfo->pResultSet->nExpr ){
    if( rc==1 ) pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
  }
  if( pFrom->isOrdered ){
      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
    }
  }
  if( pWInfo->pOrderBy && pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){
    if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){
      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
    }else{
      pWInfo->bOBSat = 1;
      pWInfo->revMask = pFrom->revLoop;
    }
  }
5382
5383
5384
5385
5386
5387
5388
5389

5390
5391
5392
5393
5394
5395
5396
5374
5375
5376
5377
5378
5379
5380

5381
5382
5383
5384
5385
5386
5387
5388







-
+







** be used to compute the appropriate cursor depending on which index is
** used.
*/
WhereInfo *sqlite3WhereBegin(
  Parse *pParse,        /* The parser context */
  SrcList *pTabList,    /* FROM clause: A list of all tables to be scanned */
  Expr *pWhere,         /* The WHERE clause */
  ExprList *pOrderBy,   /* An ORDER BY clause, or NULL */
  ExprList *pOrderBy,   /* An ORDER BY (or GROUP BY) clause, or NULL */
  ExprList *pResultSet, /* Result set of the query */
  u16 wctrlFlags,       /* One of the WHERE_* flags defined in sqliteInt.h */
  int iIdxCur           /* If WHERE_ONETABLE_ONLY is set, index cursor number */
){
  int nByteWInfo;            /* Num. bytes allocated for WhereInfo struct */
  int nTabList;              /* Number of elements in pTabList */
  WhereInfo *pWInfo;         /* Will become the return value of this function */
5404
5405
5406
5407
5408
5409
5410




5411
5412
5413
5414
5415
5416
5417
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413







+
+
+
+







  sqlite3 *db;               /* Database connection */
  int rc;                    /* Return code */


  /* Variable initialization */
  db = pParse->db;
  memset(&sWLB, 0, sizeof(sWLB));

  /* An ORDER/GROUP BY clause of more than 63 terms cannot be optimized */
  testcase( pOrderBy && pOrderBy->nExpr==BMS-1 );
  if( pOrderBy && pOrderBy->nExpr>=BMS ) pOrderBy = 0;
  sWLB.pOrderBy = pOrderBy;

  /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via
  ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */
  if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){
    wctrlFlags &= ~WHERE_WANT_DISTINCT;
  }
Changes to src/whereInt.h.
117
118
119
120
121
122
123
124

125
126
127
128
129
130
131
117
118
119
120
121
122
123

124
125
126
127
128
129
130
131







-
+







      u16 nEq;               /* Number of equality constraints */
      u16 nSkip;             /* Number of initial index columns to skip */
      Index *pIndex;         /* Index used, or NULL */
    } btree;
    struct {               /* Information for virtual tables */
      int idxNum;            /* Index number */
      u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
      u8 isOrdered;          /* True if satisfies ORDER BY */
      i8 isOrdered;          /* True if satisfies ORDER BY */
      u16 omitMask;          /* Terms that may be omitted */
      char *idxStr;          /* Index identifier string */
    } vtab;
  } u;
  u32 wsFlags;          /* WHERE_* flags describing the plan */
  u16 nLTerm;           /* Number of entries in aLTerm[] */
  /**** whereLoopXfer() copies fields above ***********************/
179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194
179
180
181
182
183
184
185

186

187
188
189
190
191
192
193







-
+
-







** at the end is the choosen query plan.
*/
struct WherePath {
  Bitmask maskLoop;     /* Bitmask of all WhereLoop objects in this path */
  Bitmask revLoop;      /* aLoop[]s that should be reversed for ORDER BY */
  LogEst nRow;          /* Estimated number of rows generated by this path */
  LogEst rCost;         /* Total cost of this path */
  u8 isOrdered;         /* True if this path satisfies ORDER BY */
  i8 isOrdered;         /* No. of ORDER BY terms satisfied. -1 for unknown */
  u8 isOrderedValid;    /* True if the isOrdered field is valid */
  WhereLoop **aLoop;    /* Array of WhereLoop objects implementing this path */
};

/*
** The query generator uses an array of instances of this structure to
** help it analyze the subexpressions of the WHERE clause.  Each WHERE
** clause subexpression is separated from the others by AND operators,