Index: src/vdbemem.c
==================================================================
--- src/vdbemem.c
+++ src/vdbemem.c
@@ -1080,10 +1080,12 @@
       pVal->u.i = -1 * pVal->u.i;
       /* (double)-1 In case of SQLITE_OMIT_FLOATING_POINT... */
       pVal->r = (double)-1 * pVal->r;
       sqlite3ValueApplyAffinity(pVal, affinity, enc);
     }
+  }else if( op==TK_NULL ){
+    pVal = sqlite3ValueNew(db);
   }
 #ifndef SQLITE_OMIT_BLOB_LITERAL
   else if( op==TK_BLOB ){
     int nVal;
     assert( pExpr->u.zToken[0]=='x' || pExpr->u.zToken[0]=='X' );

Index: src/where.c
==================================================================
--- src/where.c
+++ src/where.c
@@ -115,10 +115,11 @@
 #define TERM_CODED      0x04   /* This term is already coded */
 #define TERM_COPIED     0x08   /* Has a child */
 #define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
 #define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
 #define TERM_OR_OK      0x40   /* Used during OR-clause processing */
+#define TERM_VNULL      0x80   /* Manufactured x>NULL or x<=NULL term */
 
 /*
 ** An instance of the following structure holds all information about a
 ** WHERE clause.  Mostly this is a container for one or more WhereTerms.
 */
@@ -208,10 +209,11 @@
 #define WO_GE     (WO_EQ<<(TK_GE-TK_EQ))
 #define WO_MATCH  0x040
 #define WO_ISNULL 0x080
 #define WO_OR     0x100       /* Two or more OR-connected terms */
 #define WO_AND    0x200       /* Two or more AND-connected terms */
+#define WO_NOOP   0x800       /* This term does not restrict search space */
 
 #define WO_ALL    0xfff       /* Mask of all possible WO_* values */
 #define WO_SINGLE 0x0ff       /* Mask of all non-compound WO_* values */
 
 /*
@@ -1058,11 +1060,11 @@
         pWC->a[idxNew].iParent = idxTerm;
         pTerm->nChild = 1;
       }else{
         sqlite3ExprListDelete(db, pList);
       }
-      pTerm->eOperator = 0;  /* case 1 trumps case 2 */
+      pTerm->eOperator = WO_NOOP;  /* case 1 trumps case 2 */
     }
   }
 }
 #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */
 
@@ -1322,10 +1324,46 @@
       pNewTerm->prereqAll = pTerm->prereqAll;
     }
   }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 
+#ifdef SQLITE_ENABLE_STAT2
+  /* When sqlite_stat2 histogram data is available an operator of the
+  ** form "x IS NOT NULL" can sometimes be evaluated more efficiently
+  ** as "x>NULL" if x is not an INTEGER PRIMARY KEY.  So construct a
+  ** virtual term of that form.
+  **
+  ** Note that the virtual term must be tagged with TERM_VNULL.  This
+  ** TERM_VNULL tag will suppress the not-null check at the beginning
+  ** of the loop.  Without the TERM_VNULL flag, the not-null check at
+  ** the start of the loop will prevent any results from being returned.
+  */
+  if( pExpr->op==TK_NOTNULL && pExpr->pLeft->iColumn>=0 ){
+    Expr *pNewExpr;
+    Expr *pLeft = pExpr->pLeft;
+    int idxNew;
+    WhereTerm *pNewTerm;
+
+    pNewExpr = sqlite3PExpr(pParse, TK_GT,
+                            sqlite3ExprDup(db, pLeft, 0),
+                            sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0);
+
+    idxNew = whereClauseInsert(pWC, pNewExpr,
+                              TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL);
+    testcase( idxNew==0 );
+    pNewTerm = &pWC->a[idxNew];
+    pNewTerm->leftCursor = pLeft->iTable;
+    pNewTerm->u.leftColumn = pLeft->iColumn;
+    pNewTerm->eOperator = WO_GT;
+    pNewTerm->iParent = idxTerm;
+    pTerm = &pWC->a[idxTerm];
+    pTerm->nChild = 1;
+    pTerm->wtFlags |= TERM_COPIED;
+    pNewTerm->prereqAll = pTerm->prereqAll;
+  }
+#endif /* SQLITE_ENABLE_STAT2 */
+
   /* Prevent ON clause terms of a LEFT JOIN from being used to drive
   ** an index for tables to the left of the join.
   */
   pTerm->prereqRight |= extraRight;
 }
@@ -2199,15 +2237,22 @@
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 
 /*
 ** Argument pIdx is a pointer to an index structure that has an array of
 ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
-** stored in Index.aSample. The domain of values stored in said column
-** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions.
-** Region 0 contains all values smaller than the first sample value. Region
-** 1 contains values larger than or equal to the value of the first sample,
-** but smaller than the value of the second. And so on.
+** stored in Index.aSample. These samples divide the domain of values stored
+** the index into (SQLITE_INDEX_SAMPLES+1) regions.
+** Region 0 contains all values less than the first sample value. Region
+** 1 contains values between the first and second samples.  Region 2 contains
+** values between samples 2 and 3.  And so on.  Region SQLITE_INDEX_SAMPLES
+** contains values larger than the last sample.
+**
+** If the index contains many duplicates of a single value, then it is
+** possible that two or more adjacent samples can hold the same value.
+** When that is the case, the smallest possible region code is returned
+** when roundUp is false and the largest possible region code is returned
+** when roundUp is true.
 **
 ** If successful, this function determines which of the regions value 
 ** pVal lies in, sets *piRegion to the region index (a value between 0
 ** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
 ** Or, if an OOM occurs while converting text values between encodings,
@@ -2216,22 +2261,34 @@
 #ifdef SQLITE_ENABLE_STAT2
 static int whereRangeRegion(
   Parse *pParse,              /* Database connection */
   Index *pIdx,                /* Index to consider domain of */
   sqlite3_value *pVal,        /* Value to consider */
+  int roundUp,                /* Return largest valid region if true */
   int *piRegion               /* OUT: Region of domain in which value lies */
 ){
+  assert( roundUp==0 || roundUp==1 );
   if( ALWAYS(pVal) ){
     IndexSample *aSample = pIdx->aSample;
     int i = 0;
     int eType = sqlite3_value_type(pVal);
 
     if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
       double r = sqlite3_value_double(pVal);
       for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
         if( aSample[i].eType==SQLITE_NULL ) continue;
-        if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;
+        if( aSample[i].eType>=SQLITE_TEXT ) break;
+        if( roundUp ){
+          if( aSample[i].u.r>r ) break;
+        }else{
+          if( aSample[i].u.r>=r ) break;
+        }
+      }
+    }else if( eType==SQLITE_NULL ){
+      i = 0;
+      if( roundUp ){
+        while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++;
       }
     }else{ 
       sqlite3 *db = pParse->db;
       CollSeq *pColl;
       const u8 *z;
@@ -2258,11 +2315,11 @@
         assert( z && pColl && pColl->xCmp );
       }
       n = sqlite3ValueBytes(pVal, pColl->enc);
 
       for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
-        int r;
+        int c;
         int eSampletype = aSample[i].eType;
         if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
         if( (eSampletype!=eType) ) break;
 #ifndef SQLITE_OMIT_UTF16
         if( pColl->enc!=SQLITE_UTF8 ){
@@ -2272,18 +2329,18 @@
           );
           if( !zSample ){
             assert( db->mallocFailed );
             return SQLITE_NOMEM;
           }
-          r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
+          c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
           sqlite3DbFree(db, zSample);
         }else
 #endif
         {
-          r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
+          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
         }
-        if( r>0 ) break;
+        if( c-roundUp>=0 ) break;
       }
     }
 
     assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
     *piRegion = i;
@@ -2362,13 +2419,13 @@
 ** constraints (if any). A return value of 100 indicates that it is expected
 ** that the range scan will visit every row (100%) selected by the equality
 ** constraints.
 **
 ** In the absence of sqlite_stat2 ANALYZE data, each range inequality
-** reduces the search space by 2/3rds.  Hence a single constraint (x>?)
-** results in a return of 33 and a range constraint (x>? AND x<?) results
-** in a return of 11.
+** reduces the search space by 3/4ths.  Hence a single constraint (x>?)
+** results in a return of 25 and a range constraint (x>? AND x<?) results
+** in a return of 6.
 */
 static int whereRangeScanEst(
   Parse *pParse,       /* Parsing & code generating context */
   Index *p,            /* The index containing the range-compared column; "x" */
   int nEq,             /* index into p->aCol[] of the range-compared column */
@@ -2384,73 +2441,209 @@
     sqlite3_value *pLowerVal = 0;
     sqlite3_value *pUpperVal = 0;
     int iEst;
     int iLower = 0;
     int iUpper = SQLITE_INDEX_SAMPLES;
+    int roundUpUpper;
+    int roundUpLower;
     u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
 
     if( pLower ){
       Expr *pExpr = pLower->pExpr->pRight;
       rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
+      assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
+      roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
     }
     if( rc==SQLITE_OK && pUpper ){
       Expr *pExpr = pUpper->pExpr->pRight;
       rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);
+      assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
+      roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0;
     }
 
     if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
       sqlite3ValueFree(pLowerVal);
       sqlite3ValueFree(pUpperVal);
       goto range_est_fallback;
     }else if( pLowerVal==0 ){
-      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
+      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
       if( pLower ) iLower = iUpper/2;
     }else if( pUpperVal==0 ){
-      rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
+      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
       if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
     }else{
-      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
+      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
       if( rc==SQLITE_OK ){
-        rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
+        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
       }
     }
+    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
 
     iEst = iUpper - iLower;
     testcase( iEst==SQLITE_INDEX_SAMPLES );
     assert( iEst<=SQLITE_INDEX_SAMPLES );
     if( iEst<1 ){
-      iEst = 1;
+      *piEst = 50/SQLITE_INDEX_SAMPLES;
+    }else{
+      *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
     }
-
     sqlite3ValueFree(pLowerVal);
     sqlite3ValueFree(pUpperVal);
-    *piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES;
     return rc;
   }
 range_est_fallback:
 #else
   UNUSED_PARAMETER(pParse);
   UNUSED_PARAMETER(p);
   UNUSED_PARAMETER(nEq);
 #endif
   assert( pLower || pUpper );
-  if( pLower && pUpper ){
-    *piEst = 11;
+  *piEst = 100;
+  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4;
+  if( pUpper ) *piEst /= 4;
+  return rc;
+}
+
+#ifdef SQLITE_ENABLE_STAT2
+/*
+** Estimate the number of rows that will be returned based on
+** an equality constraint x=VALUE and where that VALUE occurs in
+** the histogram data.  This only works when x is the left-most
+** column of an index and sqlite_stat2 histogram data is available
+** for that index.
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+int whereEqualScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  Index *p,            /* The index whose left-most column is pTerm */
+  Expr *pExpr,         /* Expression for VALUE in the x=VALUE constraint */
+  double *pnRow        /* Write the revised row estimate here */
+){
+  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
+  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
+  u8 aff;                   /* Column affinity */
+  int rc;                   /* Subfunction return code */
+  double nRowEst;           /* New estimate of the number of rows */
+
+  assert( p->aSample!=0 );
+  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
+  rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
+  if( rc ) goto whereEqualScanEst_cancel;
+  if( pRhs==0 ) return SQLITE_NOTFOUND;
+  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
+  if( rc ) goto whereEqualScanEst_cancel;
+  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
+  if( rc ) goto whereEqualScanEst_cancel;
+  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
+  if( iLower>=iUpper ){
+    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
+    if( nRowEst<*pnRow ) *pnRow = nRowEst;
   }else{
-    *piEst = 33;
+    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
+    *pnRow = nRowEst;
+  }
+
+whereEqualScanEst_cancel:
+  sqlite3ValueFree(pRhs);
+  return rc;
+}
+#endif /* defined(SQLITE_ENABLE_STAT2) */
+
+#ifdef SQLITE_ENABLE_STAT2
+/*
+** Estimate the number of rows that will be returned based on
+** an IN constraint where the right-hand side of the IN operator
+** is a list of values.  Example:
+**
+**        WHERE x IN (1,2,3,4)
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+int whereInScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  Index *p,            /* The index whose left-most column is pTerm */
+  ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
+  double *pnRow        /* Write the revised row estimate here */
+){
+  sqlite3_value *pVal = 0;  /* One value from list */
+  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
+  u8 aff;                   /* Column affinity */
+  int rc = SQLITE_OK;       /* Subfunction return code */
+  double nRowEst;           /* New estimate of the number of rows */
+  int nSpan = 0;            /* Number of histogram regions spanned */
+  int nSingle = 0;          /* Histogram regions hit by a single value */
+  int nNotFound = 0;        /* Count of values that are not constants */
+  int i;                               /* Loop counter */
+  u8 aSpan[SQLITE_INDEX_SAMPLES+1];    /* Histogram regions that are spanned */
+  u8 aSingle[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions hit once */
+
+  assert( p->aSample!=0 );
+  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
+  memset(aSpan, 0, sizeof(aSpan));
+  memset(aSingle, 0, sizeof(aSingle));
+  for(i=0; i<pList->nExpr; i++){
+    sqlite3ValueFree(pVal);
+    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
+    if( rc ) break;
+    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
+      nNotFound++;
+      continue;
+    }
+    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
+    if( rc ) break;
+    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
+    if( rc ) break;
+    if( iLower>=iUpper ){
+      aSingle[iLower] = 1;
+    }else{
+      assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
+      while( iLower<iUpper ) aSpan[iLower++] = 1;
+    }
+  }
+  if( rc==SQLITE_OK ){
+    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
+      if( aSpan[i] ){
+        nSpan++;
+      }else if( aSingle[i] ){
+        nSingle++;
+      }
+    }
+    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
+               + nNotFound*p->aiRowEst[1];
+    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
+    *pnRow = nRowEst;
+    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
+                 nSpan, nSingle, nNotFound, nRowEst));
   }
+  sqlite3ValueFree(pVal);
   return rc;
 }
+#endif /* defined(SQLITE_ENABLE_STAT2) */
 
 
 /*
-** Find the query plan for accessing a particular table.  Write the
+** Find the best query plan for accessing a particular table.  Write the
 ** best query plan and its cost into the WhereCost object supplied as the
 ** last parameter.
 **
 ** The lowest cost plan wins.  The cost is an estimate of the amount of
-** CPU and disk I/O need to process the request using the selected plan.
+** CPU and disk I/O needed to process the requested result.
 ** Factors that influence cost include:
 **
 **    *  The estimated number of rows that will be retrieved.  (The
 **       fewer the better.)
 **
@@ -2465,11 +2658,11 @@
 ** SQLITE_BIG_DBL. If a plan is found that uses the named index, 
 ** then the cost is calculated in the usual way.
 **
 ** If a NOT INDEXED clause (pSrc->notIndexed!=0) was attached to the table 
 ** in the SELECT statement, then no indexes are considered. However, the 
-** selected plan may still take advantage of the tables built-in rowid
+** selected plan may still take advantage of the built-in rowid primary key
 ** index.
 */
 static void bestBtreeIndex(
   Parse *pParse,              /* The parsing context */
   WhereClause *pWC,           /* The WHERE clause */
@@ -2508,13 +2701,15 @@
     /* An INDEXED BY clause specifies a particular index to use */
     pIdx = pProbe = pSrc->pIndex;
     wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
     eqTermMask = idxEqTermMask;
   }else{
-    /* There is no INDEXED BY clause.  Create a fake Index object to
-    ** represent the primary key */
-    Index *pFirst;                /* Any other index on the table */
+    /* There is no INDEXED BY clause.  Create a fake Index object in local
+    ** variable sPk to represent the rowid primary key index.  Make this
+    ** fake index the first in a chain of Index objects with all of the real
+    ** indices to follow */
+    Index *pFirst;                  /* First of real indices on the table */
     memset(&sPk, 0, sizeof(Index));
     sPk.nColumn = 1;
     sPk.aiColumn = &aiColumnPk;
     sPk.aiRowEst = aiRowEstPk;
     sPk.onError = OE_Replace;
@@ -2521,10 +2716,12 @@
     sPk.pTable = pSrc->pTab;
     aiRowEstPk[0] = pSrc->pTab->nRowEst;
     aiRowEstPk[1] = 1;
     pFirst = pSrc->pTab->pIndex;
     if( pSrc->notIndexed==0 ){
+      /* The real indices of the table are only considered if the
+      ** NOT INDEXED qualifier is omitted from the FROM clause */
       sPk.pNext = pFirst;
     }
     pProbe = &sPk;
     wsFlagMask = ~(
         WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
@@ -2538,19 +2735,22 @@
   for(; pProbe; pIdx=pProbe=pProbe->pNext){
     const unsigned int * const aiRowEst = pProbe->aiRowEst;
     double cost;                /* Cost of using pProbe */
     double nRow;                /* Estimated number of rows in result set */
     int rev;                    /* True to scan in reverse order */
+    double nSearch;             /* Estimated number of binary searches */
     int wsFlags = 0;
     Bitmask used = 0;
 
     /* The following variables are populated based on the properties of
-    ** scan being evaluated. They are then used to determine the expected
+    ** index being evaluated. They are then used to determine the expected
     ** cost and number of rows returned.
     **
     **  nEq: 
     **    Number of equality terms that can be implemented using the index.
+    **    In other words, the number of initial fields in the index that
+    **    are used in == or IN or NOT NULL constraints of the WHERE clause.
     **
     **  nInMul:  
     **    The "in-multiplier". This is an estimate of how many seek operations 
     **    SQLite must perform on the index in question. For example, if the 
     **    WHERE clause is:
@@ -2570,46 +2770,54 @@
     **    the sub-select is assumed to return 25 rows for the purposes of 
     **    determining nInMul.
     **
     **  bInEst:  
     **    Set to true if there was at least one "x IN (SELECT ...)" term used 
-    **    in determining the value of nInMul.
+    **    in determining the value of nInMul.  Note that the RHS of the
+    **    IN operator must be a SELECT, not a value list, for this variable
+    **    to be true.
     **
     **  estBound:
     **    An estimate on the amount of the table that must be searched.  A
     **    value of 100 means the entire table is searched.  Range constraints
     **    might reduce this to a value less than 100 to indicate that only
     **    a fraction of the table needs searching.  In the absence of
     **    sqlite_stat2 ANALYZE data, a single inequality reduces the search
-    **    space to 1/3rd its original size.  So an x>? constraint reduces
-    **    estBound to 33.  Two constraints (x>? AND x<?) reduce estBound to 11.
+    **    space to 1/4rd its original size.  So an x>? constraint reduces
+    **    estBound to 25.  Two constraints (x>? AND x<?) reduce estBound to 6.
     **
     **  bSort:   
     **    Boolean. True if there is an ORDER BY clause that will require an 
     **    external sort (i.e. scanning the index being evaluated will not 
     **    correctly order records).
     **
     **  bLookup: 
-    **    Boolean. True if for each index entry visited a lookup on the 
-    **    corresponding table b-tree is required. This is always false 
-    **    for the rowid index. For other indexes, it is true unless all the 
-    **    columns of the table used by the SELECT statement are present in 
-    **    the index (such an index is sometimes described as a covering index).
+    **    Boolean. True if a table lookup is required for each index entry
+    **    visited.  In other words, true if this is not a covering index.
+    **    This is always false for the rowid primary key index of a table.
+    **    For other indexes, it is true unless all the columns of the table
+    **    used by the SELECT statement are present in the index (such an
+    **    index is sometimes described as a covering index).
     **    For example, given the index on (a, b), the second of the following 
-    **    two queries requires table b-tree lookups, but the first does not.
+    **    two queries requires table b-tree lookups in order to find the value
+    **    of column c, but the first does not because columns a and b are
+    **    both available in the index.
     **
     **             SELECT a, b    FROM tbl WHERE a = 1;
     **             SELECT a, b, c FROM tbl WHERE a = 1;
     */
-    int nEq;
-    int bInEst = 0;
-    int nInMul = 1;
-    int estBound = 100;
-    int nBound = 0;             /* Number of range constraints seen */
-    int bSort = 0;
-    int bLookup = 0;
-    WhereTerm *pTerm;           /* A single term of the WHERE clause */
+    int nEq;                      /* Number of == or IN terms matching index */
+    int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
+    int nInMul = 1;               /* Number of distinct equalities to lookup */
+    int estBound = 100;           /* Estimated reduction in search space */
+    int nBound = 0;               /* Number of range constraints seen */
+    int bSort = 0;                /* True if external sort required */
+    int bLookup = 0;              /* True if not a covering index */
+    WhereTerm *pTerm;             /* A single term of the WHERE clause */
+#ifdef SQLITE_ENABLE_STAT2
+    WhereTerm *pFirstTerm = 0;    /* First term matching the index */
+#endif
 
     /* Determine the values of nEq and nInMul */
     for(nEq=0; nEq<pProbe->nColumn; nEq++){
       int j = pProbe->aiColumn[nEq];
       pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
@@ -2617,18 +2825,23 @@
       wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
       if( pTerm->eOperator & WO_IN ){
         Expr *pExpr = pTerm->pExpr;
         wsFlags |= WHERE_COLUMN_IN;
         if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+          /* "x IN (SELECT ...)":  Assume the SELECT returns 25 rows */
           nInMul *= 25;
           bInEst = 1;
-        }else if( ALWAYS(pExpr->x.pList) ){
-          nInMul *= pExpr->x.pList->nExpr + 1;
+        }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
+          /* "x IN (value, value, ...)" */
+          nInMul *= pExpr->x.pList->nExpr;
         }
       }else if( pTerm->eOperator & WO_ISNULL ){
         wsFlags |= WHERE_COLUMN_NULL;
       }
+#ifdef SQLITE_ENABLE_STAT2
+      if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
+#endif
       used |= pTerm->prereqRight;
     }
 
     /* Determine the value of estBound. */
     if( nEq<pProbe->nColumn ){
@@ -2692,44 +2905,82 @@
         bLookup = 1;
       }
     }
 
     /*
-    ** Estimate the number of rows of output.  For an IN operator,
-    ** do not let the estimate exceed half the rows in the table.
+    ** Estimate the number of rows of output.  For an "x IN (SELECT...)"
+    ** constraint, do not let the estimate exceed half the rows in the table.
     */
     nRow = (double)(aiRowEst[nEq] * nInMul);
     if( bInEst && nRow*2>aiRowEst[0] ){
       nRow = aiRowEst[0]/2;
       nInMul = (int)(nRow / aiRowEst[nEq]);
     }
 
-    /* Assume constant cost to access a row and logarithmic cost to
-    ** do a binary search.  Hence, the initial cost is the number of output
-    ** rows plus log2(table-size) times the number of binary searches.
+#ifdef SQLITE_ENABLE_STAT2
+    /* If the constraint is of the form x=VALUE and histogram
+    ** data is available for column x, then it might be possible
+    ** to get a better estimate on the number of rows based on
+    ** VALUE and how common that value is according to the histogram.
     */
-    cost = nRow + nInMul*estLog(aiRowEst[0]);
+    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
+      if( pFirstTerm->eOperator==WO_EQ ){
+        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
+      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
+        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
+      }
+    }
+#endif /* SQLITE_ENABLE_STAT2 */
 
     /* Adjust the number of rows and the cost downward to reflect rows
     ** that are excluded by range constraints.
     */
     nRow = (nRow * (double)estBound) / (double)100;
-    cost = (cost * (double)estBound) / (double)100;
+    if( nRow<1 ) nRow = 1;
 
-    /* Add in the estimated cost of sorting the result
+    /* Assume constant cost to advance from one row to the next and
+    ** logarithmic cost to do a binary search.  Hence, the initial cost
+    ** is the number of output rows plus log2(table-size) times the
+    ** number of binary searches.
+    **
+    ** Because fan-out on tables is so much higher than the fan-out on
+    ** indices (because table btrees contain only integer keys in non-leaf
+    ** nodes) we weight the cost of a table binary search as 1/10th the
+    ** cost of an index binary search.
+    */
+    if( pIdx ){
+      if( bLookup ){
+        /* For an index lookup followed by a table lookup:
+        **    nInMul index searches to find the start of each index range
+        **  + nRow steps through the index
+        **  + nRow table searches to lookup the table entry using the rowid
+        */
+        nSearch = nInMul + nRow/10;
+      }else{
+        /* For a covering index:
+        **     nInMul binary searches to find the initial entry 
+        **   + nRow steps through the index
+        */
+        nSearch = nInMul;
+      }
+    }else{
+      /* For a rowid primary key lookup:
+      **    nInMult binary searches to find the initial entry scaled by 1/10th
+      **  + nRow steps through the table
+      */
+      nSearch = nInMul/10;
+    }
+    cost = nRow + nSearch*estLog(aiRowEst[0]);
+
+    /* Add in the estimated cost of sorting the result.  This cost is expanded
+    ** by a fudge factor of 3.0 to account for the fact that a sorting step 
+    ** involves a write and is thus more expensive than a lookup step.
     */
     if( bSort ){
-      cost += cost*estLog(cost);
+      cost += nRow*estLog(nRow)*(double)3;
     }
 
-    /* If all information can be taken directly from the index, we avoid
-    ** doing table lookups.  This reduces the cost by half.  (Not really -
-    ** this needs to be fixed.)
-    */
-    if( pIdx && bLookup==0 ){
-      cost /= (double)2;
-    }
     /**** Cost of using this index has now been computed ****/
 
     /* If there are additional constraints on this table that cannot
     ** be used with the current index, but which might lower the number
     ** of output rows, adjust the nRow value accordingly.  This only 
@@ -2766,19 +3017,23 @@
             ** set size by a factor of 10 */
             nRow /= 10;
           }
         }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
           if( nSkipRange ){
-            /* Ignore the first nBound range constraints since the index
+            /* Ignore the first nSkipRange range constraints since the index
             ** has already accounted for these */
             nSkipRange--;
           }else{
             /* Assume each additional range constraint reduces the result
-            ** set size by a factor of 3 */
+            ** set size by a factor of 3.  Indexed range constraints reduce
+            ** the search space by a larger factor: 4.  We make indexed range
+            ** more selective intentionally because of the subjective 
+            ** observation that indexed range constraints really are more
+            ** selective in practice, on average. */
             nRow /= 3;
           }
-        }else{
+        }else if( pTerm->eOperator!=WO_NOOP ){
           /* Any other expression lowers the output row count by half */
           nRow /= 2;
         }
       }
       if( nRow<2 ) nRow = 2;
@@ -3612,11 +3867,13 @@
     /* Seek the index cursor to the start of the range. */
     nConstraint = nEq;
     if( pRangeStart ){
       Expr *pRight = pRangeStart->pExpr->pRight;
       sqlite3ExprCode(pParse, pRight, regBase+nEq);
-      sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      if( (pRangeStart->wtFlags & TERM_VNULL)==0 ){
+        sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      }
       if( zStartAff ){
         if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){
           /* Since the comparison is to be performed with no conversions
           ** applied to the operands, set the affinity to apply to pRight to 
           ** SQLITE_AFF_NONE.  */
@@ -3651,11 +3908,13 @@
     nConstraint = nEq;
     if( pRangeEnd ){
       Expr *pRight = pRangeEnd->pExpr->pRight;
       sqlite3ExprCacheRemove(pParse, regBase+nEq, 1);
       sqlite3ExprCode(pParse, pRight, regBase+nEq);
-      sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      if( (pRangeEnd->wtFlags & TERM_VNULL)==0 ){
+        sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
+      }
       if( zEndAff ){
         if( sqlite3CompareAffinity(pRight, zEndAff[nEq])==SQLITE_AFF_NONE){
           /* Since the comparison is to be performed with no conversions
           ** applied to the operands, set the affinity to apply to pRight to 
           ** SQLITE_AFF_NONE.  */

Index: test/analyze2.test
==================================================================
--- test/analyze2.test
+++ test/analyze2.test
@@ -152,26 +152,26 @@
   0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~75 rows)}
 }
 do_eqp_test 2.7 {
   SELECT * FROM t1 WHERE x BETWEEN -400 AND -300 AND y BETWEEN 100 AND 300
 } {
-  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~25 rows)}
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~12 rows)}
 }
 do_eqp_test 2.8 {
   SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN -400 AND -300
 } {
-  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~25 rows)}
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~12 rows)}
 }
 do_eqp_test 2.9 {
   SELECT * FROM t1 WHERE x BETWEEN 500 AND 100 AND y BETWEEN 100 AND 300
 } {
-  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~25 rows)}
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~12 rows)}
 }
 do_eqp_test 2.10 {
   SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN 500 AND 100
 } {
-  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~25 rows)}
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~12 rows)}
 }
 
 do_test analyze2-3.1 {
   set alphabet [list a b c d e f g h i j]
   execsql BEGIN
@@ -205,11 +205,11 @@
   0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~50 rows)}
 }
 do_eqp_test 3.4 {
   SELECT * FROM t1 WHERE x BETWEEN 100 AND 400 AND y BETWEEN 'a' AND 'h'
 } {
-  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~50 rows)}
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~100 rows)}
 }
 do_eqp_test 3.5 {
   SELECT * FROM t1 WHERE x<'a' AND y>'h'
 } {
   0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~66 rows)}
@@ -240,14 +240,16 @@
   execsql COMMIT
   execsql ANALYZE
 } {}
 do_test analyze2-4.2 {
   execsql { 
+    PRAGMA automatic_index=OFF;
     SELECT tbl,idx,group_concat(sample,' ') 
     FROM sqlite_stat2 
     WHERE idx = 't3a' 
-    GROUP BY tbl,idx
+    GROUP BY tbl,idx;
+    PRAGMA automatic_index=ON;
   }
 } {t3 t3a {AfA bEj CEj dEj EEj fEj GEj hEj IEj jEj}}
 do_test analyze2-4.3 {
   execsql { 
     SELECT tbl,idx,group_concat(sample,' ') 
@@ -406,26 +408,26 @@
   sqlite3 db test.db
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~110000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~60000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 do_test analyze2-6.2.2 {
   db cache flush
   execsql ANALYZE
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 do_test analyze2-6.2.3 {
   sqlite3 db test.db
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 do_test analyze2-6.2.4 {
   execsql { 
     PRAGMA writable_schema = 1;
     DELETE FROM sqlite_master WHERE tbl_name = 'sqlite_stat1';
   }
@@ -432,11 +434,11 @@
   sqlite3 db test.db
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~110000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~60000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 do_test analyze2-6.2.5 {
   execsql { 
     PRAGMA writable_schema = 1;
     DELETE FROM sqlite_master WHERE tbl_name = 'sqlite_stat2';
   }
@@ -443,11 +445,11 @@
   sqlite3 db test.db
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~110000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~60000 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 do_test analyze2-6.2.6 {
   execsql { 
     PRAGMA writable_schema = 1;
     INSERT INTO sqlite_master SELECT * FROM master;
   }
@@ -455,11 +457,11 @@
   execsql ANALYZE
   eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
         t5.a>1 AND t5.a<15 AND
         t6.a>1
   }
-} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 
 #--------------------------------------------------------------------
 # These tests, analyze2-7.*, test that the sqlite_stat2 functionality
 # works in shared-cache mode. Note that these tests reuse the database
 # created for the analyze2-6.* tests.
@@ -499,54 +501,54 @@
   do_test analyze2-7.5 {
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db1
-  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
   do_test analyze2-7.6 {
     incr_schema_cookie test.db
     execsql { SELECT * FROM sqlite_master } db2
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db2
-  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
   do_test analyze2-7.7 {
     incr_schema_cookie test.db
     execsql { SELECT * FROM sqlite_master } db1
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db1
-  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 
   do_test analyze2-7.8 {
     execsql { DELETE FROM sqlite_stat2 } db2
     execsql { SELECT * FROM sqlite_master } db1
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db1
-  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
   do_test analyze2-7.9 {
     execsql { SELECT * FROM sqlite_master } db2
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db2
-  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 
   do_test analyze2-7.10 {
     incr_schema_cookie test.db
     execsql { SELECT * FROM sqlite_master } db1
     eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
           t5.a>1 AND t5.a<15 AND
           t6.a>1
     } db1
-  } {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~2 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
+  } {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a<?) (~1 rows)} 0 1 1 {SEARCH TABLE t6 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
 
   db1 close
   db2 close
   sqlite3_enable_shared_cache $::enable_shared_cache
 }
 
 finish_test

Index: test/analyze3.test
==================================================================
--- test/analyze3.test
+++ test/analyze3.test
@@ -246,11 +246,11 @@
   }
   execsql COMMIT
 } {}
 do_eqp_test analyze3-2.2 {
   SELECT count(a) FROM t1 WHERE b LIKE 'a%'
-} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (b>? AND b<?) (~55000 rows)}}
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (b>? AND b<?) (~30000 rows)}}
 do_eqp_test analyze3-2.3 {
   SELECT count(a) FROM t1 WHERE b LIKE '%a'
 } {0 0 0 {SCAN TABLE t1 (~500000 rows)}}
 
 do_test analyze3-2.4 {

ADDED   test/analyze5.test
Index: test/analyze5.test
==================================================================
--- /dev/null
+++ test/analyze5.test
@@ -0,0 +1,194 @@
+# 2011 January 19
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# This file implements tests for SQLite library.  The focus of the tests
+# in this file is the use of the sqlite_stat2 histogram data on tables
+# with many repeated values and only a few distinct values.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+ifcapable !stat2 {
+  finish_test
+  return
+}
+
+set testprefix analyze5
+
+proc eqp {sql {db db}} {
+  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
+}
+
+unset -nocomplain i t u v w x y z
+do_test analyze5-1.0 {
+  db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
+  for {set i 0} {$i < 1000} {incr i} {
+    set y [expr {$i>=25 && $i<=50}]
+    set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
+    set x $z
+    set w $z
+    set t [expr {$z+0.5}]
+    switch $z {
+      0 {set u "alpha"; unset x}
+      1 {set u "bravo"}
+      2 {set u "charlie"}
+      3 {set u "delta"; unset w}
+    }
+    if {$i%2} {set v $u} {set v [string toupper $u]}
+    db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
+  }
+  db eval { 
+    CREATE INDEX t1t ON t1(t);  -- 0.5, 1.5, 2.5, and 3.5
+    CREATE INDEX t1u ON t1(u);  -- text
+    CREATE INDEX t1v ON t1(v);  -- mixed case text
+    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
+    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
+    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
+    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
+    ANALYZE;
+    SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno;
+  }
+} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
+do_test analyze5-1.1 {
+  string tolower \
+   [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}]
+} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
+do_test analyze5-1.2 {
+  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno}
+} {{} 0 0 0 0 1 1 1 2 2}
+do_test analyze5-1.3 {
+  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno}
+} {{} {} {} {} 1 1 1 2 2 3}
+do_test analyze5-1.4 {
+  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno}
+} {0 0 0 0 0 0 0 0 0 0}
+do_test analyze5-1.5 {
+  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno}
+} {0 0 0 0 1 1 1 2 2 3}
+do_test analyze5-1.6 {
+  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno}
+} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5}
+
+
+# Verify that range queries generate the correct row count estimates
+#
+foreach {testid where index rows} {
+    1  {z>=0 AND z<=0}       t1z  400
+    2  {z>=1 AND z<=1}       t1z  300
+    3  {z>=2 AND z<=2}       t1z  200
+    4  {z>=3 AND z<=3}       t1z  100
+    5  {z>=4 AND z<=4}       t1z   50
+    6  {z>=-1 AND z<=-1}     t1z   50
+    7  {z>1 AND z<3}         t1z  200
+    8  {z>0 AND z<100}       t1z  600
+    9  {z>=1 AND z<100}      t1z  600
+   10  {z>1 AND z<100}       t1z  300
+   11  {z>=2 AND z<100}      t1z  300
+   12  {z>2 AND z<100}       t1z  100
+   13  {z>=3 AND z<100}      t1z  100
+   14  {z>3 AND z<100}       t1z   50
+   15  {z>=4 AND z<100}      t1z   50
+   16  {z>=-100 AND z<=-1}   t1z   50
+   17  {z>=-100 AND z<=0}    t1z  400
+   18  {z>=-100 AND z<0}     t1z   50
+   19  {z>=-100 AND z<=1}    t1z  700
+   20  {z>=-100 AND z<2}     t1z  700
+   21  {z>=-100 AND z<=2}    {}   111
+   22  {z>=-100 AND z<3}     {}   111
+  
+   31  {z>=0.0 AND z<=0.0}   t1z  400
+   32  {z>=1.0 AND z<=1.0}   t1z  300
+   33  {z>=2.0 AND z<=2.0}   t1z  200
+   34  {z>=3.0 AND z<=3.0}   t1z  100
+   35  {z>=4.0 AND z<=4.0}   t1z   50
+   36  {z>=-1.0 AND z<=-1.0} t1z   50
+   37  {z>1.5 AND z<3.0}     t1z  200
+   38  {z>0.5 AND z<100}     t1z  600
+   39  {z>=1.0 AND z<100}    t1z  600
+   40  {z>1.5 AND z<100}     t1z  300
+   41  {z>=2.0 AND z<100}    t1z  300
+   42  {z>2.1 AND z<100}     t1z  100
+   43  {z>=3.0 AND z<100}    t1z  100
+   44  {z>3.2 AND z<100}     t1z   50
+   45  {z>=4.0 AND z<100}    t1z   50
+   46  {z>=-100 AND z<=-1.0} t1z   50
+   47  {z>=-100 AND z<=0.0}  t1z  400
+   48  {z>=-100 AND z<0.0}   t1z   50
+   49  {z>=-100 AND z<=1.0}  t1z  700
+   50  {z>=-100 AND z<2.0}   t1z  700
+   51  {z>=-100 AND z<=2.0}  {}   111
+   52  {z>=-100 AND z<3.0}   {}   111
+  
+  101  {z=-1}                t1z   50
+  102  {z=0}                 t1z  400
+  103  {z=1}                 t1z  300
+  104  {z=2}                 t1z  200
+  105  {z=3}                 t1z  100
+  106  {z=4}                 t1z   50
+  107  {z=-10.0}             t1z   50
+  108  {z=0.0}               t1z  400
+  109  {z=1.0}               t1z  300
+  110  {z=2.0}               t1z  200
+  111  {z=3.0}               t1z  100
+  112  {z=4.0}               t1z   50
+  113  {z=1.5}               t1z   50
+  114  {z=2.5}               t1z   50
+  
+  201  {z IN (-1)}           t1z   50
+  202  {z IN (0)}            t1z  400
+  203  {z IN (1)}            t1z  300
+  204  {z IN (2)}            t1z  200
+  205  {z IN (3)}            t1z  100
+  206  {z IN (4)}            t1z   50
+  207  {z IN (0.5)}          t1z   50
+  208  {z IN (0,1)}          t1z  700
+  209  {z IN (0,1,2)}        {}   100
+  210  {z IN (0,1,2,3)}      {}   100
+  211  {z IN (0,1,2,3,4,5)}  {}   100
+  212  {z IN (1,2)}          t1z  500
+  213  {z IN (2,3)}          t1z  300
+  214  {z=3 OR z=2}          t1z  300
+  215  {z IN (-1,3)}         t1z  150
+  216  {z=-1 OR z=3}         t1z  150
+
+  300  {y=0}                 {}   100
+  301  {y=1}                 t1y   50
+  302  {y=0.1}               t1y   50
+
+} {
+  # Verify that the expected index is used with the expected row count
+  do_test analyze5-1.${testid}a {
+    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
+    set idx {}
+    regexp {INDEX (t1.) } $x all idx
+    regexp {~([0-9]+) rows} $x all nrow
+    list $idx $nrow
+  } [list $index $rows]
+
+  # Verify that the same result is achieved regardless of whether or not
+  # the index is used
+  do_test analyze5-1.${testid}b {
+    set w2 [string map {y +y z +z} $where]
+    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
+                     ORDER BY +rowid"]
+    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
+    if {$a1==$a2} {
+      set res ok
+    } else {
+      set res "a1=\[$a1\] a2=\[$a2\]"
+    }
+    set res
+  } {ok}
+}
+
+
+finish_test

Index: test/e_createtable.test
==================================================================
--- test/e_createtable.test
+++ test/e_createtable.test
@@ -1377,11 +1377,11 @@
 
   2    "EXPLAIN QUERY PLAN SELECT * FROM t2 ORDER BY b, c"
        {0 0 0 {SCAN TABLE t2 USING INDEX sqlite_autoindex_t2_1 (~1000000 rows)}}
 
   3    "EXPLAIN QUERY PLAN SELECT * FROM t2 WHERE b=10 AND c>10"
-       {0 0 0 {SEARCH TABLE t2 USING INDEX sqlite_autoindex_t2_1 (b=? AND c>?) (~3 rows)}}
+       {0 0 0 {SEARCH TABLE t2 USING INDEX sqlite_autoindex_t2_1 (b=? AND c>?) (~2 rows)}}
 }
 
 # EVIDENCE-OF: R-45493-35653 A CHECK constraint may be attached to a
 # column definition or specified as a table constraint. In practice it
 # makes no difference.

Index: test/eqp.test
==================================================================
--- test/eqp.test
+++ test/eqp.test
@@ -390,20 +390,20 @@
 # t2.* FROM t1, t2 WHERE t1.a=1 AND t1.b>2; 0|0|0|SEARCH TABLE t1
 # USING COVERING INDEX i2 (a=? AND b>?) (~3 rows) 0|1|1|SCAN TABLE t2
 # (~1000000 rows)
 do_execsql_test 5.4.0 {CREATE TABLE t2(c, d)}
 det 5.4.1 "SELECT t1.*, t2.* FROM t1, t2 WHERE t1.a=1 AND t1.b>2" {
-  0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~3 rows)}
+  0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~2 rows)}
   0 1 1 {SCAN TABLE t2 (~1000000 rows)}
 }
 
 # EVIDENCE-OF: R-21040-07025 sqlite> EXPLAIN QUERY PLAN SELECT t1.*,
 # t2.* FROM t2, t1 WHERE t1.a=1 AND t1.b>2; 0|0|1|SEARCH TABLE t1
 # USING COVERING INDEX i2 (a=? AND b>?) (~3 rows) 0|1|0|SCAN TABLE t2
 # (~1000000 rows)
 det 5.5 "SELECT t1.*, t2.* FROM t2, t1 WHERE t1.a=1 AND t1.b>2" {
-  0 0 1 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~3 rows)}
+  0 0 1 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~2 rows)}
   0 1 0 {SCAN TABLE t2 (~1000000 rows)}
 }
 
 # EVIDENCE-OF: R-39007-61103 sqlite> CREATE INDEX i3 ON t1(b);
 # sqlite> EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=1 OR b=2;

Index: test/indexedby.test
==================================================================
--- test/indexedby.test
+++ test/indexedby.test
@@ -152,14 +152,14 @@
 # a CREATE VIEW statement is dropped and recreated.
 #
 do_execsql_test indexedby-5.1 {
   CREATE VIEW v2 AS SELECT * FROM t1 INDEXED BY i1 WHERE a > 5;
   EXPLAIN QUERY PLAN SELECT * FROM v2 
-} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~330000 rows)}}
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~250000 rows)}}
 do_execsql_test indexedby-5.2 {
   EXPLAIN QUERY PLAN SELECT * FROM v2 WHERE b = 10 
-} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~33000 rows)}}
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~25000 rows)}}
 do_test indexedby-5.3 {
   execsql { DROP INDEX i1 }
   catchsql { SELECT * FROM v2 }
 } {1 {no such index: i1}}
 do_test indexedby-5.4 {

Index: test/like.test
==================================================================
--- test/like.test
+++ test/like.test
@@ -705,36 +705,36 @@
       INSERT INTO t10 VALUES(234,234,234,234,234,234);
       INSERT INTO t10 VALUES(345,345,345,345,345,345);
       INSERT INTO t10 VALUES(45,45,45,45,45,45);
     }
     count {
-      SELECT a FROM t10 WHERE b LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE b LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.2 {
     count {
-      SELECT a FROM t10 WHERE c LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE c LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.3 {
     count {
-      SELECT a FROM t10 WHERE d LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE d LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.4 {
     count {
-      SELECT a FROM t10 WHERE e LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE e LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.5 {
     count {
-      SELECT a FROM t10 WHERE f LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE f LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 3 like 0}
   do_test like-10.6 {
     count {
-      SELECT a FROM t10 WHERE a LIKE '12%' ORDER BY a;
+      SELECT a FROM t10 WHERE a LIKE '12%' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.10 {
     execsql {
       CREATE TABLE t10b(
@@ -746,36 +746,36 @@
         f TEXT UNIQUE
       );
       INSERT INTO t10b SELECT * FROM t10;
     }
     count {
-      SELECT a FROM t10b WHERE b GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE b GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.11 {
     count {
-      SELECT a FROM t10b WHERE c GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE c GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.12 {
     count {
-      SELECT a FROM t10b WHERE d GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE d GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.13 {
     count {
-      SELECT a FROM t10b WHERE e GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE e GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
   do_test like-10.14 {
     count {
-      SELECT a FROM t10b WHERE f GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE f GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 3 like 0}
   do_test like-10.15 {
     count {
-      SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY a;
+      SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY +a;
     }
   } {12 123 scan 5 like 6}
 }
 
 # LIKE and GLOB where the default collating sequence is not appropriate
@@ -817,11 +817,11 @@
 } {abc abcd nosort t11 *}
 do_test like-11.3 {
   queryplan {
     PRAGMA case_sensitive_like=OFF;
     CREATE INDEX t11b ON t11(b);
-    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
+    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a;
   }
 } {abc abcd ABC ABCD sort {} t11b}
 do_test like-11.4 {
   queryplan {
     PRAGMA case_sensitive_like=ON;
@@ -831,41 +831,41 @@
 do_test like-11.5 {
   queryplan {
     PRAGMA case_sensitive_like=OFF;
     DROP INDEX t11b;
     CREATE INDEX t11bnc ON t11(b COLLATE nocase);
-    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
+    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a;
   }
 } {abc abcd ABC ABCD sort {} t11bnc}
 do_test like-11.6 {
   queryplan {
     CREATE INDEX t11bb ON t11(b COLLATE binary);
-    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
+    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a;
   }
 } {abc abcd ABC ABCD sort {} t11bnc}
 do_test like-11.7 {
   queryplan {
     PRAGMA case_sensitive_like=ON;
-    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
+    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a;
   }
 } {abc abcd sort {} t11bb}
 do_test like-11.8 {
   queryplan {
     PRAGMA case_sensitive_like=OFF;
-    SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY a;
+    SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY +a;
   }
 } {abc abcd sort {} t11bb}
 do_test like-11.9 {
   queryplan {
     CREATE INDEX t11cnc ON t11(c COLLATE nocase);
     CREATE INDEX t11cb ON t11(c COLLATE binary);
-    SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY a;
+    SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY +a;
   }
 } {abc abcd ABC ABCD sort {} t11cnc}
 do_test like-11.10 {
   queryplan {
-    SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY a;
+    SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY +a;
   }
 } {abc abcd sort {} t11cb}
 
 
 finish_test

Index: test/minmax3.test
==================================================================
--- test/minmax3.test
+++ test/minmax3.test
@@ -50,10 +50,11 @@
     INSERT INTO t1 VALUES('2', NULL,  'three');
     INSERT INTO t1 VALUES('2', 'II',  'two');
     INSERT INTO t1 VALUES('2', 'V',   'five');
     INSERT INTO t1 VALUES('3', 'VI',  'six');
     COMMIT;
+    PRAGMA automatic_index=OFF;
   }
 } {}
 do_test minmax3-1.1.1 {
   # Linear scan.
   count { SELECT max(y) FROM t1 WHERE x = '2'; }

Index: test/where3.test
==================================================================
--- test/where3.test
+++ test/where3.test
@@ -223,18 +223,19 @@
   INSERT INTO t301 VALUES(1,2,3);
   CREATE TABLE t302(x, y);
   ANALYZE;
   explain query plan SELECT * FROM t302, t301 WHERE t302.x=5 AND t301.a=t302.y;
 } {
-  0 0 0 {SCAN TABLE t302 (~0 rows)} 
+  0 0 0 {SCAN TABLE t302 (~1 rows)} 
   0 1 1 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}
 }
+exit
 do_execsql_test where3-3.1 {
   explain query plan
   SELECT * FROM t301, t302 WHERE t302.x=5 AND t301.a=t302.y;
 } {
-  0 0 1 {SCAN TABLE t302 (~0 rows)} 
+  0 0 1 {SCAN TABLE t302 (~1 rows)} 
   0 1 0 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}
 }
 
 # Verify that when there are multiple tables in a join which must be
 # full table scans that the query planner attempts put the table with

Index: test/where9.test
==================================================================
--- test/where9.test
+++ test/where9.test
@@ -470,11 +470,11 @@
   # an OR.
   #
   do_execsql_test where9-5.3 {
     EXPLAIN QUERY PLAN SELECT a FROM t1 WHERE b>1000 AND (c>=31031 OR d IS NULL)
   } {
-    0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>?) (~165000 rows)}
+    0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>?) (~125000 rows)}
   }
 }
 
 ############################################################################
 # Make sure OR-clauses work correctly on UPDATE and DELETE statements.