Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -1929,15 +1929,18 @@ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* ** Estimate the location of a particular key among all keys in an ** index. Store the results in aStat as follows: ** -** aStat[0] Est. number of rows less than pVal -** aStat[1] Est. number of rows equal to pVal +** aStat[0] Est. number of rows less than pRec +** aStat[1] Est. number of rows equal to pRec ** ** Return the index of the sample that is the smallest sample that -** is greater than or equal to pRec. +** is greater than or equal to pRec. Note that this index is not an index +** into the aSample[] array - it is an index into a virtual set of samples +** based on the contents of aSample[] and the number of fields in record +** pRec. */ static int whereKeyStats( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ UnpackedRecord *pRec, /* Vector of values to consider */ @@ -1944,71 +1947,162 @@ int roundUp, /* Round up if true. Round down if false */ tRowcnt *aStat /* OUT: stats written here */ ){ IndexSample *aSample = pIdx->aSample; int iCol; /* Index of required stats in anEq[] etc. */ + int i; /* Index of first sample >= pRec */ + int iSample; /* Smallest sample larger than or equal to pRec */ int iMin = 0; /* Smallest sample not yet tested */ - int i = pIdx->nSample; /* Smallest sample larger than or equal to pRec */ int iTest; /* Next sample to test */ int res; /* Result of comparison operation */ + int nField; /* Number of fields in pRec */ + tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ #ifndef SQLITE_DEBUG UNUSED_PARAMETER( pParse ); #endif assert( pRec!=0 ); - iCol = pRec->nField - 1; assert( pIdx->nSample>0 ); - assert( pRec->nField>0 && iColnSampleCol ); + assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); + + /* Do a binary search to find the first sample greater than or equal + ** to pRec. If pRec contains a single field, the set of samples to search + ** is simply the aSample[] array. If the samples in aSample[] contain more + ** than one fields, all fields following the first are ignored. + ** + ** If pRec contains N fields, where N is more than one, then as well as the + ** samples in aSample[] (truncated to N fields), the search also has to + ** consider prefixes of those samples. For example, if the set of samples + ** in aSample is: + ** + ** aSample[0] = (a, 5) + ** aSample[1] = (a, 10) + ** aSample[2] = (b, 5) + ** aSample[3] = (c, 100) + ** aSample[4] = (c, 105) + ** + ** Then the search space should ideally be the samples above and the + ** unique prefixes [a], [b] and [c]. But since that is hard to organize, + ** the code actually searches this set: + ** + ** 0: (a) + ** 1: (a, 5) + ** 2: (a, 10) + ** 3: (a, 10) + ** 4: (b) + ** 5: (b, 5) + ** 6: (c) + ** 7: (c, 100) + ** 8: (c, 105) + ** 9: (c, 105) + ** + ** For each sample in the aSample[] array, N samples are present in the + ** effective sample array. In the above, samples 0 and 1 are based on + ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. + ** + ** Often, sample i of each block of N effective samples has (i+1) fields. + ** Except, each sample may be extended to ensure that it is greater than or + ** equal to the previous sample in the array. For example, in the above, + ** sample 2 is the first sample of a block of N samples, so at first it + ** appears that it should be 1 field in size. However, that would make it + ** smaller than sample 1, so the binary search would not work. As a result, + ** it is extended to two fields. The duplicates that this creates do not + ** cause any problems. + */ + nField = pRec->nField; + iCol = 0; + iSample = pIdx->nSample * nField; do{ - iTest = (iMin+i)/2; - res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec); + int iSamp; /* Index in aSample[] of test sample */ + int n; /* Number of fields in test sample */ + + iTest = (iMin+iSample)/2; + iSamp = iTest / nField; + if( iSamp>0 ){ + /* The proposed effective sample is a prefix of sample aSample[iSamp]. + ** Specifically, the shortest prefix of at least (1 + iTest%nField) + ** fields that is greater than the previous effective sample. */ + for(n=(iTest % nField) + 1; nnField = n; + res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); if( res<0 ){ + iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; + iMin = iTest+1; + }else if( res==0 && nnSample ); - assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) - || pParse->db->mallocFailed ); - }else{ - /* Otherwise, pRec must be smaller than sample $i and larger than - ** sample ($i-1). */ - assert( i==pIdx->nSample - || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 - || pParse->db->mallocFailed ); - assert( i==0 - || sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 - || pParse->db->mallocFailed ); + if( pParse->db->mallocFailed==0 ){ + if( res==0 ){ + /* If (res==0) is true, then pRec must be equal to sample i. */ + assert( inSample ); + assert( iCol==nField-1 ); + pRec->nField = nField; + assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) + || pParse->db->mallocFailed + ); + }else{ + /* Unless i==pIdx->nSample, indicating that pRec is larger than + ** all samples in the aSample[] array, pRec must be smaller than the + ** (iCol+1) field prefix of sample i. */ + assert( i<=pIdx->nSample && i>=0 ); + pRec->nField = iCol+1; + assert( i==pIdx->nSample + || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 + || pParse->db->mallocFailed ); + + /* if i==0 and iCol==0, then record pRec is smaller than all samples + ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must + ** be greater than or equal to the (iCol) field prefix of sample i. + ** If (i>0), then pRec must also be greater than sample (i-1). */ + if( iCol>0 ){ + pRec->nField = iCol; + assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 + || pParse->db->mallocFailed ); + } + if( i>0 ){ + pRec->nField = nField; + assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 + || pParse->db->mallocFailed ); + } + } } #endif /* ifdef SQLITE_DEBUG */ - /* At this point, aSample[i] is the first sample that is greater than - ** or equal to pVal. Or if i==pIdx->nSample, then all samples are less - ** than pVal. If aSample[i]==pVal, then res==0. - */ if( res==0 ){ + /* Record pRec is equal to sample i */ + assert( iCol==nField-1 ); aStat[0] = aSample[i].anLt[iCol]; aStat[1] = aSample[i].anEq[iCol]; }else{ - tRowcnt iLower, iUpper, iGap; - if( i==0 ){ - iLower = 0; - iUpper = aSample[0].anLt[iCol]; + /* At this point, the (iCol+1) field prefix of aSample[i] is the first + ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec + ** is larger than all samples in the array. */ + tRowcnt iUpper, iGap; + if( i>=pIdx->nSample ){ + iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); }else{ - i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); - iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol]; - iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol]; + iUpper = aSample[i].anLt[iCol]; } - aStat[1] = pIdx->aAvgEq[iCol]; + if( iLower>=iUpper ){ iGap = 0; }else{ iGap = iUpper - iLower; } @@ -2016,11 +2110,15 @@ iGap = (iGap*2)/3; }else{ iGap = iGap/3; } aStat[0] = iLower + iGap; + aStat[1] = pIdx->aAvgEq[iCol]; } + + /* Restore the pRec->nField value before returning. */ + pRec->nField = nField; return i; } #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ /* Index: test/analyze9.test ================================================================== --- test/analyze9.test +++ test/analyze9.test @@ -1131,7 +1131,119 @@ SELECT * FROM t6 WHERE a < 20 AND (b BETWEEN ? AND 60) } { 0 0 0 {SEARCH TABLE t6 USING INDEX bb (b>? AND b 25) matches 76 rows +# (70 * 2/3 + 30). Before, due to the problem, the planner was estimating +# that this matched 100 rows. +# +reset_db +do_execsql_test 26.2.1 { + BEGIN; + CREATE TABLE t1(x, y, z); + CREATE INDEX i1 ON t1(x, y); + CREATE INDEX i2 ON t1(z); + + WITH + cnt(y) AS (SELECT 0 UNION ALL SELECT y+1 FROM cnt WHERE y<99), + letters(x) AS ( + SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D' + ) + INSERT INTO t1(x, y) SELECT x, y FROM letters, cnt; + + WITH + letters(x) AS ( + SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D' + ) + INSERT INTO t1(x, y) SELECT x, 70 FROM letters; + + WITH + cnt(i) AS (SELECT 0 UNION ALL SELECT i+1 FROM cnt WHERE i<9999) + INSERT INTO t1(x, y) SELECT i, i FROM cnt; + + UPDATE t1 SET z = (rowid / 95); + ANALYZE; + COMMIT; +} + +do_eqp_test 26.2.2 { + SELECT * FROM t1 WHERE x='B' AND y>25 AND z=?; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x=? AND y>?)} +} + finish_test + + +