Index: src/where.c
==================================================================
--- src/where.c
+++ src/where.c
@@ -1929,15 +1929,18 @@
 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
 /*
 ** Estimate the location of a particular key among all keys in an
 ** index.  Store the results in aStat as follows:
 **
-**    aStat[0]      Est. number of rows less than pVal
-**    aStat[1]      Est. number of rows equal to pVal
+**    aStat[0]      Est. number of rows less than pRec
+**    aStat[1]      Est. number of rows equal to pRec
 **
 ** Return the index of the sample that is the smallest sample that
-** is greater than or equal to pRec.
+** is greater than or equal to pRec. Note that this index is not an index
+** into the aSample[] array - it is an index into a virtual set of samples
+** based on the contents of aSample[] and the number of fields in record 
+** pRec. 
 */
 static int whereKeyStats(
   Parse *pParse,              /* Database connection */
   Index *pIdx,                /* Index to consider domain of */
   UnpackedRecord *pRec,       /* Vector of values to consider */
@@ -1944,71 +1947,162 @@
   int roundUp,                /* Round up if true.  Round down if false */
   tRowcnt *aStat              /* OUT: stats written here */
 ){
   IndexSample *aSample = pIdx->aSample;
   int iCol;                   /* Index of required stats in anEq[] etc. */
+  int i;                      /* Index of first sample >= pRec */
+  int iSample;                /* Smallest sample larger than or equal to pRec */
   int iMin = 0;               /* Smallest sample not yet tested */
-  int i = pIdx->nSample;      /* Smallest sample larger than or equal to pRec */
   int iTest;                  /* Next sample to test */
   int res;                    /* Result of comparison operation */
+  int nField;                 /* Number of fields in pRec */
+  tRowcnt iLower = 0;         /* anLt[] + anEq[] of largest sample pRec is > */
 
 #ifndef SQLITE_DEBUG
   UNUSED_PARAMETER( pParse );
 #endif
   assert( pRec!=0 );
-  iCol = pRec->nField - 1;
   assert( pIdx->nSample>0 );
-  assert( pRec->nField>0 && iCol<pIdx->nSampleCol );
+  assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol );
+
+  /* Do a binary search to find the first sample greater than or equal
+  ** to pRec. If pRec contains a single field, the set of samples to search
+  ** is simply the aSample[] array. If the samples in aSample[] contain more
+  ** than one fields, all fields following the first are ignored.
+  **
+  ** If pRec contains N fields, where N is more than one, then as well as the
+  ** samples in aSample[] (truncated to N fields), the search also has to
+  ** consider prefixes of those samples. For example, if the set of samples
+  ** in aSample is:
+  **
+  **     aSample[0] = (a, 5) 
+  **     aSample[1] = (a, 10) 
+  **     aSample[2] = (b, 5) 
+  **     aSample[3] = (c, 100) 
+  **     aSample[4] = (c, 105)
+  **
+  ** Then the search space should ideally be the samples above and the 
+  ** unique prefixes [a], [b] and [c]. But since that is hard to organize, 
+  ** the code actually searches this set:
+  **
+  **     0: (a) 
+  **     1: (a, 5) 
+  **     2: (a, 10) 
+  **     3: (a, 10) 
+  **     4: (b) 
+  **     5: (b, 5) 
+  **     6: (c) 
+  **     7: (c, 100) 
+  **     8: (c, 105)
+  **     9: (c, 105)
+  **
+  ** For each sample in the aSample[] array, N samples are present in the
+  ** effective sample array. In the above, samples 0 and 1 are based on 
+  ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc.
+  **
+  ** Often, sample i of each block of N effective samples has (i+1) fields.
+  ** Except, each sample may be extended to ensure that it is greater than or
+  ** equal to the previous sample in the array. For example, in the above, 
+  ** sample 2 is the first sample of a block of N samples, so at first it 
+  ** appears that it should be 1 field in size. However, that would make it 
+  ** smaller than sample 1, so the binary search would not work. As a result, 
+  ** it is extended to two fields. The duplicates that this creates do not 
+  ** cause any problems.
+  */
+  nField = pRec->nField;
+  iCol = 0;
+  iSample = pIdx->nSample * nField;
   do{
-    iTest = (iMin+i)/2;
-    res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec);
+    int iSamp;                    /* Index in aSample[] of test sample */
+    int n;                        /* Number of fields in test sample */
+
+    iTest = (iMin+iSample)/2;
+    iSamp = iTest / nField;
+    if( iSamp>0 ){
+      /* The proposed effective sample is a prefix of sample aSample[iSamp].
+      ** Specifically, the shortest prefix of at least (1 + iTest%nField) 
+      ** fields that is greater than the previous effective sample.  */
+      for(n=(iTest % nField) + 1; n<nField; n++){
+        if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break;
+      }
+    }else{
+      n = iTest + 1;
+    }
+
+    pRec->nField = n;
+    res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec);
     if( res<0 ){
+      iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1];
+      iMin = iTest+1;
+    }else if( res==0 && n<nField ){
+      iLower = aSample[iSamp].anLt[n-1];
       iMin = iTest+1;
+      res = -1;
     }else{
-      i = iTest;
+      iSample = iTest;
+      iCol = n-1;
     }
-  }while( res && iMin<i );
+  }while( res && iMin<iSample );
+  i = iSample / nField;
 
 #ifdef SQLITE_DEBUG
   /* The following assert statements check that the binary search code
   ** above found the right answer. This block serves no purpose other
   ** than to invoke the asserts.  */
-  if( res==0 ){
-    /* If (res==0) is true, then sample $i must be equal to pRec */
-    assert( i<pIdx->nSample );
-    assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
-         || pParse->db->mallocFailed );
-  }else{
-    /* Otherwise, pRec must be smaller than sample $i and larger than
-    ** sample ($i-1).  */
-    assert( i==pIdx->nSample 
-         || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
-         || pParse->db->mallocFailed );
-    assert( i==0
-         || sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
-         || pParse->db->mallocFailed );
+  if( pParse->db->mallocFailed==0 ){
+    if( res==0 ){
+      /* If (res==0) is true, then pRec must be equal to sample i. */
+      assert( i<pIdx->nSample );
+      assert( iCol==nField-1 );
+      pRec->nField = nField;
+      assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) 
+           || pParse->db->mallocFailed 
+      );
+    }else{
+      /* Unless i==pIdx->nSample, indicating that pRec is larger than
+      ** all samples in the aSample[] array, pRec must be smaller than the
+      ** (iCol+1) field prefix of sample i.  */
+      assert( i<=pIdx->nSample && i>=0 );
+      pRec->nField = iCol+1;
+      assert( i==pIdx->nSample 
+           || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
+           || pParse->db->mallocFailed );
+
+      /* if i==0 and iCol==0, then record pRec is smaller than all samples
+      ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must
+      ** be greater than or equal to the (iCol) field prefix of sample i.
+      ** If (i>0), then pRec must also be greater than sample (i-1).  */
+      if( iCol>0 ){
+        pRec->nField = iCol;
+        assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0
+             || pParse->db->mallocFailed );
+      }
+      if( i>0 ){
+        pRec->nField = nField;
+        assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
+             || pParse->db->mallocFailed );
+      }
+    }
   }
 #endif /* ifdef SQLITE_DEBUG */
 
-  /* At this point, aSample[i] is the first sample that is greater than
-  ** or equal to pVal.  Or if i==pIdx->nSample, then all samples are less
-  ** than pVal.  If aSample[i]==pVal, then res==0.
-  */
   if( res==0 ){
+    /* Record pRec is equal to sample i */
+    assert( iCol==nField-1 );
     aStat[0] = aSample[i].anLt[iCol];
     aStat[1] = aSample[i].anEq[iCol];
   }else{
-    tRowcnt iLower, iUpper, iGap;
-    if( i==0 ){
-      iLower = 0;
-      iUpper = aSample[0].anLt[iCol];
+    /* At this point, the (iCol+1) field prefix of aSample[i] is the first 
+    ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec
+    ** is larger than all samples in the array. */
+    tRowcnt iUpper, iGap;
+    if( i>=pIdx->nSample ){
+      iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
     }else{
-      i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
-      iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
-      iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
+      iUpper = aSample[i].anLt[iCol];
     }
-    aStat[1] = pIdx->aAvgEq[iCol];
+
     if( iLower>=iUpper ){
       iGap = 0;
     }else{
       iGap = iUpper - iLower;
     }
@@ -2016,11 +2110,15 @@
       iGap = (iGap*2)/3;
     }else{
       iGap = iGap/3;
     }
     aStat[0] = iLower + iGap;
+    aStat[1] = pIdx->aAvgEq[iCol];
   }
+
+  /* Restore the pRec->nField value before returning.  */
+  pRec->nField = nField;
   return i;
 }
 #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
 
 /*

Index: test/analyze9.test
==================================================================
--- test/analyze9.test
+++ test/analyze9.test
@@ -1131,7 +1131,119 @@
     SELECT * FROM t6 WHERE a < 20 AND (b BETWEEN ? AND 60)
   } {
     0 0 0 {SEARCH TABLE t6 USING INDEX bb (b>? AND b<?)}
   }
 }
+
+#-------------------------------------------------------------------------
+# Check that a problem in they way stat4 data is used has been 
+# resolved (see below).
+#
+reset_db
+do_test 26.1.1 {
+  db transaction {
+    execsql { 
+      CREATE TABLE t1(x, y, z);
+      CREATE INDEX t1xy ON t1(x, y);
+      CREATE INDEX t1z ON t1(z);
+    }
+    for {set i 0} {$i < 10000} {incr i} {
+      execsql { INSERT INTO t1(x, y) VALUES($i, $i) }
+    }
+    for {set i 0} {$i < 10} {incr i} {
+      execsql {
+        WITH cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt WHERE x<100)
+        INSERT INTO t1(x, y) SELECT 10000+$i, x FROM cnt;
+        INSERT INTO t1(x, y) SELECT 10000+$i, 100;
+      }
+    }
+    execsql {
+      UPDATE t1 SET z = rowid / 20;
+      ANALYZE;
+    }
+  }
+} {}
+
+do_execsql_test 26.1.2 {
+  SELECT count(*) FROM t1 WHERE x = 10000 AND y < 50;
+} {49}
+do_execsql_test 26.1.3 {
+  SELECT count(*) FROM t1 WHERE z = 444;
+} {20}
+
+# The analyzer knows that any (z=?) expression matches 20 rows. So it
+# will use index "t1z" if the estimate of hits for (x=10000 AND y<50)
+# is greater than 20 rows.
+#
+# And it should be. The analyzer has a stat4 sample as follows:
+#
+#   sample=(x=10000, y=100) nLt=(10000 10099)
+#
+# There should be no other samples that start with (x=10000). So it knows 
+# that (x=10000 AND y<50) must match somewhere between 0 and 99 rows, but
+# know more than that. Guessing less than 20 is therefore unreasonable.
+#
+# At one point though, due to a problem in whereKeyStats(), the planner was
+# estimating that (x=10000 AND y<50) would match only 2 rows.
+#
+do_eqp_test 26.1.4 {
+  SELECT * FROM t1 WHERE x = 10000 AND y < 50 AND z = 444;
+} {
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?)}
+}
+
+
+# This test - 26.2.* - tests that another manifestation of the same problem
+# is no longer present in the library. Assuming:
+# 
+#   CREATE INDEX t1xy ON t1(x, y)
+#
+# and that have samples for index t1xy as follows:
+#
+#
+#   sample=('A', 70)        nEq=(100, 2)        nLt=(900, 970)
+#   sample=('B', 70)        nEq=(100, 2)        nLt=(1000, 1070)    
+#
+# the planner should estimate that (x = 'B' AND y > 25) matches 76 rows
+# (70 * 2/3 + 30). Before, due to the problem, the planner was estimating 
+# that this matched 100 rows.
+# 
+reset_db
+do_execsql_test 26.2.1 {
+  BEGIN;
+    CREATE TABLE t1(x, y, z);
+    CREATE INDEX i1 ON t1(x, y);
+    CREATE INDEX i2 ON t1(z);
+  
+    WITH 
+    cnt(y) AS (SELECT 0 UNION ALL SELECT y+1 FROM cnt WHERE y<99),
+    letters(x) AS (
+      SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D'
+    )
+    INSERT INTO t1(x, y) SELECT x, y FROM letters, cnt;
+  
+    WITH
+    letters(x) AS (
+      SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D'
+    )
+    INSERT INTO t1(x, y) SELECT x, 70 FROM letters;
+  
+    WITH
+    cnt(i) AS (SELECT 0 UNION ALL SELECT i+1 FROM cnt WHERE i<9999)
+    INSERT INTO t1(x, y) SELECT i, i FROM cnt;
+  
+    UPDATE t1 SET z = (rowid / 95);
+    ANALYZE;
+  COMMIT;
+}
+
+do_eqp_test 26.2.2 {
+  SELECT * FROM t1 WHERE x='B' AND y>25 AND z=?;
+} {
+  0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x=? AND y>?)}
+}
+
 
 finish_test
+
+
+