Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -8,10 +8,103 @@ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains code associated with the ANALYZE command. +** +** The ANALYZE command gather statistics about the content of tables +** and indices. These statistics are made available to the query planner +** to help it make better decisions about the best way to implement a +** query. +** +** Two system tables are created as follows: +** +** CREATE TABLE sqlite_stat1(tbl, idx, stat); +** CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample, cnt); +** +** Additional tables might be added in future releases of SQLite. +** The sqlite_stat2 table is only created and used if SQLite is +** compiled with SQLITE_ENABLE_STAT2. Older versions of SQLite +** omit the sqlite_stat2.cnt column. Newer versions of SQLite are +** able to use older versions of the stat2 table that lack the cnt +** column. +** +** Format of sqlite_stat1: +** +** There is normally one row per index, with the index identified by the +** name in the idx column. The tbl column is the name of the table to +** which the index belongs. In each such row, the stat column will be +** a string consisting of a list of integers. The first integer in this +** list is the number of rows in the index and in the table. The second +** integer is the average number of rows in the index that have the same +** value in the first column of the index. The third integer is the average +** number of rows in the index that have the same value for the first two +** columns. The N-th integer (for N>1) is the average number of rows in +** the index which have the same value for the first N-1 columns. For +** a K-column index, there will be K+1 integers in the stat column. If +** the index is unique, then the last integer will be 1. +** +** The list of integers in the stat column can optionally be followed +** by the keyword "unordered". The "unordered" keyword, if it is present, +** must be separated from the last integer by a single space. If the +** "unordered" keyword is present, then the query planner assumes that +** the index is unordered and will not use the index for a range query. +** +** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat +** column contains a single integer which is the (estimated) number of +** rows in the table identified by sqlite_stat1.tbl. +** +** Format of sqlite_stat2: +** +** The sqlite_stat2 is only created and is only used if SQLite is compiled +** with SQLITE_ENABLE_STAT2. The "stat2" table contains additional information +** about the key distribution within an index. The index is identified by +** the "idx" column and the "tbl" column is the name of the table to which +** the index belongs. There are usually multiple rows in the sqlite_stat2 +** table for each index. +** +** The sqlite_stat2 entires for an index that have sampleno>=0 are +** sampled key values for the first column of the index taken at +** intervals along the index. The sqlite_stat2.sample column holds +** the value of the key in the left-most column of the index. +** +** The samples are numbered from 0 to S-1 +** where S is 10 by default. The number of samples created by the +** ANALYZE command can be adjusted at compile-time using the +** SQLITE_INDEX_SAMPLES macro. The maximum number of samples is +** SQLITE_MAX_SAMPLES, currently set to 100. There are places in the +** code that use an unsigned character to count samples, so an upper +** bound on SQLITE_MAX_SAMPLES is 255. +** +** Suppose the index contains C rows. And let the number +** of samples be S. SQLite assumes that the samples are taken from the +** following rows for i between 0 and S-1: +** +** rownumber = (i*C*2 + C)/(S*2) +** +** Conceptually, the index is divided into S bins and the sample is +** taken from the middle of each bin. The ANALYZE will not attempt +** to populate sqlite_stat2 for an index that holds fewer than S*2 +** entries. +** +** If the key value for a sample (the sqlite_stat2.sample column) is a +** large string or blob, SQLite will only use the first 255 bytes of +** that string or blob. +** +** The sqlite_stat2.cnt column contains the number of entries in the +** index for which sqlite_stat2.sample matches the left-most column +** of the index. In other words, sqlite_stat2.cnt holds the number of +** times the sqlite_stat2.sample value appears in the index.. Many +** older versions of SQLite omit the sqlite_stat2.cnt column. +** +** If the sqlite_stat2.sampleno value is -1, then that row holds a first- +** column key that is a frequently used key in the index. The +** sqlite_stat2.cnt column will hold the number of occurrances of that key. +** This information is useful to the query planner in cases where a +** large percentage of the rows in indexed field have one of a small +** handful of value but the balance of the rows in the index have +** distinct or nearly distinct keys. */ #ifndef SQLITE_OMIT_ANALYZE #include "sqliteInt.h" /* @@ -41,11 +134,11 @@ const char *zName; const char *zCols; } aTable[] = { { "sqlite_stat1", "tbl,idx,stat" }, #ifdef SQLITE_ENABLE_STAT2 - { "sqlite_stat2", "tbl,idx,sampleno,sample" }, + { "sqlite_stat2", "tbl,idx,sampleno,sample,cnt" }, #endif }; int aRoot[] = {0, 0}; u8 aCreateTbl[] = {0, 0}; @@ -84,18 +177,28 @@ ); }else{ /* The sqlite_stat[12] table already exists. Delete all rows. */ sqlite3VdbeAddOp2(v, OP_Clear, aRoot[i], iDb); } +#ifdef SQLITE_ENABLE_STAT2 + if( i==1 && iDb!=1 && pStat->nCol==4 ){ + sqlite3NestedParse(pParse, + "UPDATE %Q.sqlite_master SET sql='CREATE TABLE sqlite_stat2(%s)'" + " WHERE name='sqlite_stat2'", pDb->zName, aTable[i].zCols + ); + sqlite3ChangeCookie(pParse, iDb); + } +#endif } } /* Open the sqlite_stat[12] tables for writing. */ for(i=0; inTab++; sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0); for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ int nCol; KeyInfo *pKey; + int addrIfNot; /* address of OP_IfNot */ + int *aChngAddr; /* Array of jump instruction addresses */ if( pOnlyIdx && pOnlyIdx!=pIdx ) continue; + VdbeNoopComment((v, "Begin analysis of %s", pIdx->zName)); nCol = pIdx->nColumn; pKey = sqlite3IndexKeyinfo(pParse, pIdx); if( iMem+1+(nCol*2)>pParse->nMem ){ pParse->nMem = iMem+1+(nCol*2); } + aChngAddr = sqlite3DbMallocRaw(db, sizeof(int)*pIdx->nColumn); + if( aChngAddr==0 ) continue; /* Open a cursor to the index to be analyzed. */ assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) ); sqlite3VdbeAddOp4(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb, (char *)pKey, P4_KEYINFO_HANDOFF); @@ -185,30 +299,43 @@ #ifdef SQLITE_ENABLE_STAT2 /* If this iteration of the loop is generating code to analyze the ** first index in the pTab->pIndex list, then register regLast has ** not been populated. In this case populate it now. */ - if( pTab->pIndex==pIdx ){ - sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES, regSamplerecno); - sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES*2-1, regTemp); - sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES*2, regTemp2); - - sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regLast); - sqlite3VdbeAddOp2(v, OP_Null, 0, regFirst); - addr = sqlite3VdbeAddOp3(v, OP_Lt, regSamplerecno, 0, regLast); - sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regLast, regFirst); - sqlite3VdbeAddOp3(v, OP_Multiply, regLast, regTemp, regLast); - sqlite3VdbeAddOp2(v, OP_AddImm, regLast, SQLITE_INDEX_SAMPLES*2-2); - sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regLast, regLast); - sqlite3VdbeJumpHere(v, addr); - } - - /* Zero the regSampleno and regRecno registers. */ + if( once ){ + once = 0; + sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES*2, regSample2); + + sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regCount); + sqlite3VdbeAddOp3(v, OP_Add, regCount, regCount, regCount2); + + + /* Generate code for a subroutine that store the most recent sample + ** in the sqlite_stat2 table + */ + shortJump = sqlite3VdbeAddOp0(v, OP_Goto); + sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 5, regRec, "aaaba", 0); + VdbeComment((v, "begin stat2 write subroutine")); + sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur+1, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, iStatCur+1, regRec, regRowid); + sqlite3VdbeAddOp2(v, OP_AddImm, regSampleno, 1); + sqlite3VdbeAddOp2(v, OP_AddImm, regReady, -1); + addrStoreStat2 = sqlite3VdbeAddOp2(v, OP_IfPos, regReady, shortJump+1); + sqlite3VdbeAddOp1(v, OP_Return, regGosub); + VdbeComment((v, "end stat2 write subroutine")); + sqlite3VdbeJumpHere(v, shortJump); + } + /* Reset state registers */ + sqlite3VdbeAddOp2(v, OP_Copy, regCount2, regNext); + shortJump = sqlite3VdbeAddOp3(v, OP_Lt, regSample2, 0, regCount); + sqlite3VdbeAddOp3(v, OP_Divide, regSample2, regCount, regNext); + sqlite3VdbeJumpHere(v, shortJump); sqlite3VdbeAddOp2(v, OP_Integer, 0, regSampleno); - sqlite3VdbeAddOp2(v, OP_Integer, 0, regRecno); - sqlite3VdbeAddOp2(v, OP_Copy, regFirst, regSamplerecno); -#endif + sqlite3VdbeAddOp2(v, OP_Integer, 0, regSampleIdx); + sqlite3VdbeAddOp2(v, OP_Integer, 0, regReady); + +#endif /* SQLITE_ENABLE_STAT2 */ /* The block of memory cells initialized here is used as follows. ** ** iMem: ** The total number of rows in the table. @@ -234,79 +361,80 @@ /* Start the analysis loop. This loop runs through all the entries in ** the index b-tree. */ endOfLoop = sqlite3VdbeMakeLabel(v); sqlite3VdbeAddOp2(v, OP_Rewind, iIdxCur, endOfLoop); topOfLoop = sqlite3VdbeCurrentAddr(v); - sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1); + sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1); /* Increment row counter */ for(i=0; iazColl!=0 ); assert( pIdx->azColl[i]!=0 ); pColl = sqlite3LocateCollSeq(pParse, pIdx->azColl[i]); - sqlite3VdbeAddOp4(v, OP_Ne, regCol, 0, iMem+nCol+i+1, - (char*)pColl, P4_COLLSEQ); + aChngAddr[i] = sqlite3VdbeAddOp4(v, OP_Ne, regCol, 0, iMem+nCol+i+1, + (char*)pColl, P4_COLLSEQ); sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); - } - if( db->mallocFailed ){ - /* If a malloc failure has occurred, then the result of the expression - ** passed as the second argument to the call to sqlite3VdbeJumpHere() - ** below may be negative. Which causes an assert() to fail (or an - ** out-of-bounds write if SQLITE_DEBUG is not defined). */ - return; + VdbeComment((v, "jump if column %d changed", i)); +#ifdef SQLITE_ENABLE_STAT2 + if( i==0 && addrStoreStat2 ){ + sqlite3VdbeAddOp2(v, OP_AddImm, regSampleCnt, 1); + VdbeComment((v, "incr repeat count")); + } +#endif } sqlite3VdbeAddOp2(v, OP_Goto, 0, endOfLoop); for(i=0; ia==0 ) return; + for(j=0; jn; j++){ + IndexSample *p = &pArray->a[j]; + if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){ + sqlite3_free(p->u.z); + } + } + sqlite3_free(pArray->a); + memset(pArray, 0, sizeof(*pArray)); +} +#endif + /* -** If the Index.aSample variable is not NULL, delete the aSample[] array -** and its contents. +** Delete the sample and common-key arrays from the index. */ void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){ #ifdef SQLITE_ENABLE_STAT2 - if( pIdx->aSample ){ - int j; - for(j=0; jaSample[j]; - if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){ - sqlite3DbFree(db, p->u.z); - } - } - sqlite3DbFree(db, pIdx->aSample); - } + deleteIndexSampleArray(db, &pIdx->sample); + deleteIndexSampleArray(db, &pIdx->comkey); #else UNUSED_PARAMETER(db); UNUSED_PARAMETER(pIdx); #endif } +#ifdef SQLITE_ENABLE_STAT2 +/* +** Enlarge an array of IndexSample objects. +*/ +static IndexSample *allocIndexSample( + sqlite3 *db, /* Database connection to malloc against */ + IndexSampleArray *pArray, /* The array to enlarge */ + int i /* Return this element */ +){ + IndexSample *p; + if( i>=pArray->nAlloc ){ + int szNew = i+1; + p = (IndexSample*)sqlite3_realloc(pArray->a, szNew*sizeof(IndexSample)); + if( p==0 ) return 0; + pArray->a = p; + memset(&pArray->a[pArray->n], 0, (szNew-(pArray->n))*sizeof(IndexSample)); + pArray->nAlloc = szNew; + } + if( i>=pArray->n ) pArray->n = i+1; + return &pArray->a[i]; +} +#endif + /* ** Load the content of the sqlite_stat1 and sqlite_stat2 tables. The ** contents of sqlite_stat1 are used to populate the Index.aiRowEst[] ** arrays. The contents of sqlite_stat2 are used to populate the -** Index.aSample[] arrays. +** Index.sample and Index.comkey arrays. ** ** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR ** is returned. In this case, even if SQLITE_ENABLE_STAT2 was defined ** during compilation and the sqlite_stat2 table is present, no data is ** read from it. @@ -601,10 +764,11 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ analysisInfo sInfo; HashElem *i; char *zSql; int rc; + Table *pTab; /* Stat1 or Stat2 table */ assert( iDb>=0 && iDbnDb ); assert( db->aDb[iDb].pBt!=0 ); /* Clear any prior statistics */ @@ -611,17 +775,16 @@ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){ Index *pIdx = sqliteHashData(i); sqlite3DefaultRowEst(pIdx); sqlite3DeleteIndexSamples(db, pIdx); - pIdx->aSample = 0; } /* Check to make sure the sqlite_stat1 table exists */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zName; - if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){ + if( (pTab=sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase))==0 ){ return SQLITE_ERROR; } /* Load new statistics out of the sqlite_stat1 table */ zSql = sqlite3MPrintf(db, @@ -634,18 +797,21 @@ } /* Load the statistics from the sqlite_stat2 table. */ #ifdef SQLITE_ENABLE_STAT2 - if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){ + if( rc==SQLITE_OK + && (pTab=sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase))==0 ){ rc = SQLITE_ERROR; } if( rc==SQLITE_OK ){ sqlite3_stmt *pStmt = 0; zSql = sqlite3MPrintf(db, - "SELECT idx,sampleno,sample FROM %Q.sqlite_stat2", sInfo.zDatabase); + "SELECT idx, sampleno, sample, %s FROM %Q.sqlite_stat2" + " ORDER BY rowid DESC", + pTab->nCol>=5 ? "cnt" : "0", sInfo.zDatabase); if( !zSql ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); sqlite3DbFree(db, zSql); @@ -653,55 +819,49 @@ if( rc==SQLITE_OK ){ while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ + int iSample; + int eType; + IndexSample *pSample; zIndex = (char *)sqlite3_column_text(pStmt, 0); - pIdx = zIndex ? sqlite3FindIndex(db, zIndex, sInfo.zDatabase) : 0; - if( pIdx ){ - int iSample = sqlite3_column_int(pStmt, 1); - if( iSample=0 ){ - int eType = sqlite3_column_type(pStmt, 2); - - if( pIdx->aSample==0 ){ - static const int sz = sizeof(IndexSample)*SQLITE_INDEX_SAMPLES; - pIdx->aSample = (IndexSample *)sqlite3DbMallocRaw(0, sz); - if( pIdx->aSample==0 ){ - db->mallocFailed = 1; - break; - } - memset(pIdx->aSample, 0, sz); - } - - assert( pIdx->aSample ); - { - IndexSample *pSample = &pIdx->aSample[iSample]; - pSample->eType = (u8)eType; - if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ - pSample->u.r = sqlite3_column_double(pStmt, 2); - }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){ - const char *z = (const char *)( - (eType==SQLITE_BLOB) ? - sqlite3_column_blob(pStmt, 2): - sqlite3_column_text(pStmt, 2) - ); - int n = sqlite3_column_bytes(pStmt, 2); - if( n>24 ){ - n = 24; - } - pSample->nByte = (u8)n; - if( n < 1){ - pSample->u.z = 0; - }else{ - pSample->u.z = sqlite3DbStrNDup(0, z, n); - if( pSample->u.z==0 ){ - db->mallocFailed = 1; - break; - } - } - } + if( zIndex==0 ) continue; + pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase); + if( pIdx==0 ) continue; + iSample = sqlite3_column_int(pStmt, 1); + if( iSample>=SQLITE_MAX_SAMPLES ) continue; + if( iSample<0 ){ + pSample = allocIndexSample(db, &pIdx->comkey, pIdx->comkey.n); + }else{ + pSample = allocIndexSample(db, &pIdx->sample, iSample); + } + if( pSample==0 ) break; + eType = sqlite3_column_type(pStmt, 2); + pSample->eType = (u8)eType; + pSample->nCopy = sqlite3_column_int(pStmt, 3); + if( eType==SQLITE_INTEGER ){ + pSample->u.i = sqlite3_column_int64(pStmt, 2); + }else if( eType==SQLITE_FLOAT ){ + pSample->u.r = sqlite3_column_double(pStmt, 2); + }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){ + const char *z = (const char *)( + (eType==SQLITE_BLOB) ? + sqlite3_column_blob(pStmt, 2): + sqlite3_column_text(pStmt, 2) + ); + int n = sqlite3_column_bytes(pStmt, 2); + if( n>255 ) n = 255; + pSample->nByte = (u8)n; + if( n < 1){ + pSample->u.z = 0; + }else{ + pSample->u.z = sqlite3DbStrNDup(0, z, n); + if( pSample->u.z==0 ){ + db->mallocFailed = 1; + break; } } } } rc = sqlite3_finalize(pStmt); Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -79,11 +79,14 @@ /* ** The number of samples of an index that SQLite takes in order to ** construct a histogram of the table content when running ANALYZE ** and with SQLITE_ENABLE_STAT2 */ -#define SQLITE_INDEX_SAMPLES 10 +#ifndef SQLITE_INDEX_SAMPLES +# define SQLITE_INDEX_SAMPLES 10 +#endif +#define SQLITE_MAX_SAMPLES 100 /* ** The following macros are used to cast pointers to integers and ** integers to pointers. The way you do this varies from one compiler ** to the next, so we have developed the following set of #if statements @@ -610,10 +613,11 @@ typedef struct FuncDef FuncDef; typedef struct FuncDefHash FuncDefHash; typedef struct IdList IdList; typedef struct Index Index; typedef struct IndexSample IndexSample; +typedef struct IndexSampleArray IndexSampleArray; typedef struct KeyClass KeyClass; typedef struct KeyInfo KeyInfo; typedef struct Lookaside Lookaside; typedef struct LookasideSlot LookasideSlot; typedef struct Module Module; @@ -1445,10 +1449,34 @@ #define UNPACKED_IGNORE_ROWID 0x0004 /* Ignore trailing rowid on key1 */ #define UNPACKED_INCRKEY 0x0008 /* Make this key an epsilon larger */ #define UNPACKED_PREFIX_MATCH 0x0010 /* A prefix match is considered OK */ #define UNPACKED_PREFIX_SEARCH 0x0020 /* A prefix match is considered OK */ +/* +** Each sample stored in the sqlite_stat2 table is represented in memory +** using a structure of this type. +*/ +struct IndexSample { + union { + char *z; /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */ + double r; /* Value if eType is SQLITE_FLOAT */ + i64 i; /* Value if eType is SQLITE_INTEGER */ + } u; + u8 eType; /* SQLITE_NULL, SQLITE_INTEGER ... etc. */ + u8 nByte; /* Size in byte of text or blob. */ + u32 nCopy; /* How many copies of this sample are in the database */ +}; + +/* +** An array of IndexSample elements is as follows: +*/ +struct IndexSampleArray { + u16 n; /* Number of elements in the array */ + u16 nAlloc; /* Space allocated to a[] */ + IndexSample *a; /* The samples */ +}; + /* ** Each SQL index is represented in memory by an ** instance of the following structure. ** ** The columns of the table that are to be indexed are described @@ -1486,24 +1514,14 @@ char *zColAff; /* String defining the affinity of each column */ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* Array of size Index.nColumn. True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ - IndexSample *aSample; /* Array of SQLITE_INDEX_SAMPLES samples */ -}; - -/* -** Each sample stored in the sqlite_stat2 table is represented in memory -** using a structure of this type. -*/ -struct IndexSample { - union { - char *z; /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */ - double r; /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */ - } u; - u8 eType; /* SQLITE_NULL, SQLITE_INTEGER ... etc. */ - u8 nByte; /* Size in byte of text or blob. */ +#ifdef SQLITE_ENABLE_STAT2 + IndexSampleArray sample; /* Sampled histogram for the first column */ + IndexSampleArray comkey; /* The most common keys */ +#endif }; /* ** Each token coming out of the lexer is an instance of ** this structure. Tokens are also used as part of an expression. Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -1051,11 +1051,10 @@ pIn1 = &aMem[pOp->p1]; pOut = &aMem[pOp->p2]; assert( pOut!=pIn1 ); sqlite3VdbeMemShallowCopy(pOut, pIn1, MEM_Ephem); Deephemeralize(pOut); - REGISTER_TRACE(pOp->p2, pOut); break; } /* Opcode: SCopy P1 P2 * * * ** @@ -1538,10 +1537,11 @@ case OP_AddImm: { /* in1 */ pIn1 = &aMem[pOp->p1]; memAboutToChange(p, pIn1); sqlite3VdbeMemIntegerify(pIn1); pIn1->u.i += pOp->p2; + REGISTER_TRACE(pOp->p1, pIn1); break; } /* Opcode: MustBeInt P1 P2 * * * ** @@ -2466,10 +2466,11 @@ /* Loop through the elements that will make up the record to figure ** out how much space is required for the new record. */ for(pRec=pData0; pRec<=pLast; pRec++){ assert( memIsValid(pRec) ); + REGISTER_TRACE((pRec-pData0)+pOp->p1, pRec); if( zAffinity ){ applyAffinity(pRec, zAffinity[pRec-pData0], encoding); } if( pRec->flags&MEM_Zero && pRec->n>0 ){ sqlite3VdbeMemExpandBlob(pRec); Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -2420,69 +2420,88 @@ } #endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Argument pIdx is a pointer to an index structure that has an array of -** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column -** stored in Index.aSample. These samples divide the domain of values stored -** the index into (SQLITE_INDEX_SAMPLES+1) regions. -** Region 0 contains all values less than the first sample value. Region -** 1 contains values between the first and second samples. Region 2 contains -** values between samples 2 and 3. And so on. Region SQLITE_INDEX_SAMPLES -** contains values larger than the last sample. +** pIdx->sample.n (hereafter "S") evenly spaced samples of the first indexed +** column stored in Index.sample. These samples divide the domain of values +** stored the index into S+1 regions. Region 0 contains all values less than +** the first sample value. Region 1 contains values between the first and +** second samples. Region 2 contains values between samples 2 and 3. And so +** on. Region S contains values larger than the last sample. +** +** Note that samples are computed as being centered on S buckets where each +** bucket contains the nearly same number of rows. This routine takes samples +** to be dividers between regions, though. Hence, region 0 and region S +** contain half as many rows as the interior regions. ** ** If the index contains many duplicates of a single value, then it is ** possible that two or more adjacent samples can hold the same value. ** When that is the case, the smallest possible region code is returned ** when roundUp is false and the largest possible region code is returned ** when roundUp is true. ** ** If successful, this function determines which of the regions value ** pVal lies in, sets *piRegion to the region index (a value between 0 -** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK. +** and S, inclusive) and returns SQLITE_OK. ** Or, if an OOM occurs while converting text values between encodings, ** SQLITE_NOMEM is returned and *piRegion is undefined. */ #ifdef SQLITE_ENABLE_STAT2 static int whereRangeRegion( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ sqlite3_value *pVal, /* Value to consider */ int roundUp, /* Return largest valid region if true */ - int *piRegion /* OUT: Region of domain in which value lies */ + int *piRegion, /* OUT: Region of domain in which value lies */ + u32 *pnCopy /* OUT: Number of rows with pVal, or -1 if unk */ ){ assert( roundUp==0 || roundUp==1 ); if( ALWAYS(pVal) ){ - IndexSample *aSample = pIdx->aSample; + IndexSample *aSample = pIdx->sample.a; + int nSample = pIdx->sample.n; int i = 0; int eType = sqlite3_value_type(pVal); - if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ + assert( nSample>0 ); + if( eType==SQLITE_INTEGER ){ + i64 x = sqlite3_value_int64(pVal); + for(i=0; i=SQLITE_TEXT ) break; + if( aSample[i].u.i==x ) *pnCopy = aSample[i].nCopy; + if( roundUp ){ + if( aSample[i].u.i>x ) break; + }else{ + if( aSample[i].u.i>=x ) break; + } + } + }else if( eType==SQLITE_FLOAT ){ double r = sqlite3_value_double(pVal); - for(i=0; i=SQLITE_TEXT ) break; + if( aSample[i].u.r==r ) *pnCopy = aSample[i].nCopy; if( roundUp ){ if( aSample[i].u.r>r ) break; }else{ if( aSample[i].u.r>=r ) break; } } }else if( eType==SQLITE_NULL ){ i = 0; + if( aSample[0].eType==SQLITE_NULL ) *pnCopy = aSample[0].nCopy; if( roundUp ){ - while( idb; CollSeq *pColl; const u8 *z; int n; - /* pVal comes from sqlite3ValueFromExpr() so the type cannot be NULL */ assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); - if( eType==SQLITE_BLOB ){ z = (const u8 *)sqlite3_value_blob(pVal); pColl = db->pDfltColl; assert( pColl->enc==SQLITE_UTF8 ); }else{ @@ -2498,11 +2517,11 @@ } assert( z && pColl && pColl->xCmp ); } n = sqlite3ValueBytes(pVal, pColl->enc); - for(i=0; ixCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); } + if( c==0 ) *pnCopy = aSample[i].nCopy; if( c-roundUp>=0 ) break; } } - assert( i>=0 && i<=SQLITE_INDEX_SAMPLES ); + assert( i>=0 && i<=pIdx->sample.n ); *piRegion = i; } return SQLITE_OK; } #endif /* #ifdef SQLITE_ENABLE_STAT2 */ @@ -2619,18 +2639,20 @@ ){ int rc = SQLITE_OK; #ifdef SQLITE_ENABLE_STAT2 - if( nEq==0 && p->aSample ){ + if( nEq==0 && p->sample.a ){ sqlite3_value *pLowerVal = 0; sqlite3_value *pUpperVal = 0; int iEst; int iLower = 0; - int iUpper = SQLITE_INDEX_SAMPLES; + int nSample = p->sample.n; + int iUpper = p->sample.n; int roundUpUpper = 0; int roundUpLower = 0; + u32 nC = 0; u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pLower ){ Expr *pExpr = pLower->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal); @@ -2647,30 +2669,30 @@ if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){ sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); goto range_est_fallback; }else if( pLowerVal==0 ){ - rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper, &nC); if( pLower ) iLower = iUpper/2; }else if( pUpperVal==0 ){ - rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); - if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2; + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower, &nC); + if( pUpper ) iUpper = (iLower + p->sample.n + 1)/2; }else{ - rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper, &nC); if( rc==SQLITE_OK ){ - rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower, &nC); } } WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper)); - iEst = iUpper - iLower; - testcase( iEst==SQLITE_INDEX_SAMPLES ); - assert( iEst<=SQLITE_INDEX_SAMPLES ); + testcase( iEst==nSample ); + assert( iEst<=nSample ); + assert( nSample>0 ); if( iEst<1 ){ - *piEst = 50/SQLITE_INDEX_SAMPLES; + *piEst = 50/nSample; }else{ - *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES; + *piEst = (iEst*100)/nSample; } sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); return rc; } @@ -2714,31 +2736,40 @@ sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ + u32 nC = 0; /* Key copy count */ - assert( p->aSample!=0 ); + assert( p->sample.a!=0 ); + assert( p->sample.n>0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pExpr ){ rc = valueFromExpr(pParse, pExpr, aff, &pRhs); if( rc ) goto whereEqualScanEst_cancel; }else{ pRhs = sqlite3ValueNew(pParse->db); } if( pRhs==0 ) return SQLITE_NOTFOUND; - rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); - if( rc ) goto whereEqualScanEst_cancel; - rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); - if( rc ) goto whereEqualScanEst_cancel; - WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); - if( iLower>=iUpper ){ - nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); - if( nRowEst<*pnRow ) *pnRow = nRowEst; - }else{ - nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; - *pnRow = nRowEst; + rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower, &nC); + if( rc ) goto whereEqualScanEst_cancel; + if( nC==0 ){ + rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper, &nC); + if( rc ) goto whereEqualScanEst_cancel; + } + if( nC ){ + WHERETRACE(("equality scan count: %u\n", nC)); + *pnRow = nC; + }else{ + WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); + if( iLower>=iUpper ){ + nRowEst = p->aiRowEst[0]/(p->sample.n*3); + if( nRowEst<*pnRow ) *pnRow = nRowEst; + }else{ + nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->sample.n; + *pnRow = nRowEst; + } } whereEqualScanEst_cancel: sqlite3ValueFree(pRhs); return rc; @@ -2774,46 +2805,49 @@ int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ int nSpan = 0; /* Number of histogram regions spanned */ int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ - int i; /* Loop counter */ - u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ - u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */ + int i; /* Loop counter */ + u32 nC; /* Exact count of rows for a key */ + int nSample = p->sample.n; /* Number of samples */ + u8 aSpan[SQLITE_MAX_SAMPLES+1]; /* Histogram regions that are spanned */ + u8 aSingle[SQLITE_MAX_SAMPLES+1]; /* Histogram regions hit once */ - assert( p->aSample!=0 ); + assert( p->sample.a!=0 ); + assert( nSample>0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; - memset(aSpan, 0, sizeof(aSpan)); - memset(aSingle, 0, sizeof(aSingle)); + memset(aSpan, 0, nSample+1); + memset(aSingle, 0, nSample+1); for(i=0; inExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ nNotFound++; continue; } - rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); + rc = whereRangeRegion(pParse, p, pVal, 0, &iLower, &nC); if( rc ) break; - rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); + rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper, &nC); if( rc ) break; if( iLower>=iUpper ){ aSingle[iLower] = 1; }else{ - assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + assert( iLower>=0 && iUpper<=nSample ); while( iLoweraiRowEst[0]/(2*SQLITE_INDEX_SAMPLES) + nRowEst = (nSpan*3+nSingle)*p->aiRowEst[0]/(3*nSample) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", nSpan, nSingle, nNotFound, nRowEst)); @@ -3027,11 +3061,11 @@ } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 - if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm; + if( nEq==0 && pProbe->sample.a ) pFirstTerm = pTerm; #endif used |= pTerm->prereqRight; } /* Determine the value of estBound. */ Index: test/analyze2.test ================================================================== --- test/analyze2.test +++ test/analyze2.test @@ -69,31 +69,31 @@ for {set i 0} {$i < 1000} {incr i} { execsql { INSERT INTO t1 VALUES($i) } } execsql { ANALYZE; - SELECT * FROM sqlite_stat2; + SELECT tbl, idx, sampleno, sample FROM sqlite_stat2; } } [list t1 sqlite_autoindex_t1_1 0 50 \ - t1 sqlite_autoindex_t1_1 1 149 \ - t1 sqlite_autoindex_t1_1 2 249 \ - t1 sqlite_autoindex_t1_1 3 349 \ - t1 sqlite_autoindex_t1_1 4 449 \ - t1 sqlite_autoindex_t1_1 5 549 \ - t1 sqlite_autoindex_t1_1 6 649 \ - t1 sqlite_autoindex_t1_1 7 749 \ - t1 sqlite_autoindex_t1_1 8 849 \ - t1 sqlite_autoindex_t1_1 9 949 \ + t1 sqlite_autoindex_t1_1 1 150 \ + t1 sqlite_autoindex_t1_1 2 250 \ + t1 sqlite_autoindex_t1_1 3 350 \ + t1 sqlite_autoindex_t1_1 4 450 \ + t1 sqlite_autoindex_t1_1 5 550 \ + t1 sqlite_autoindex_t1_1 6 650 \ + t1 sqlite_autoindex_t1_1 7 750 \ + t1 sqlite_autoindex_t1_1 8 850 \ + t1 sqlite_autoindex_t1_1 9 950 \ ] do_test analyze2-1.2 { execsql { - DELETE FROM t1 WHERe x>9; + DELETE FROM t1 WHERE x>20; ANALYZE; SELECT tbl, idx, group_concat(sample, ' ') FROM sqlite_stat2; } -} {t1 sqlite_autoindex_t1_1 {0 1 2 3 4 5 6 7 8 9}} +} {t1 sqlite_autoindex_t1_1 {1 3 5 7 9 11 13 15 17 19}} do_test analyze2-1.3 { execsql { DELETE FROM t1 WHERE x>8; ANALYZE; SELECT * FROM sqlite_stat2; @@ -187,39 +187,39 @@ SELECT tbl,idx,group_concat(sample,' ') FROM sqlite_stat2 WHERE idx = 't1_x' GROUP BY tbl,idx } -} {t1 t1_x {100 299 499 699 899 ajj cjj ejj gjj ijj}} +} {t1 t1_x {100 300 500 700 900 baa daa faa haa jaa}} do_test analyze2-3.2 { execsql { SELECT tbl,idx,group_concat(sample,' ') FROM sqlite_stat2 WHERE idx = 't1_y' GROUP BY tbl,idx } -} {t1 t1_y {100 299 499 699 899 ajj cjj ejj gjj ijj}} +} {t1 t1_y {100 300 500 700 900 baa daa faa haa jaa}} do_eqp_test 3.3 { SELECT * FROM t1 WHERE x BETWEEN 100 AND 500 AND y BETWEEN 'a' AND 'b' } { - 0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y? AND y? AND x'h' } { - 0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~66 rows)} + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~133 rows)} } do_eqp_test 3.6 { SELECT * FROM t1 WHERE x<444 AND y>'h' } { - 0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~66 rows)} + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~133 rows)} } do_eqp_test 3.7 { SELECT * FROM t1 WHERE x<221 AND y>'g' } { 0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x 'A' AND a < 'C' AND b > 'A' AND b < 'C' } { 0 0 0 {SEARCH TABLE t3 USING INDEX t3b (b>? AND b'ccc' } {0 0 0 {SEARCH TABLE t4 USING COVERING INDEX t4x (x>?) (~800 rows)}} do_eqp_test 5.4 { SELECT * FROM t4 AS t41, t4 AS t42 WHERE t41.x>'ccc' AND t42.x>'ggg' Index: test/analyze5.test ================================================================== --- test/analyze5.test +++ test/analyze5.test @@ -126,45 +126,45 @@ 49 {z>=-100 AND z<=1.0} t1z 700 50 {z>=-100 AND z<2.0} t1z 700 51 {z>=-100 AND z<=2.0} t1z 900 52 {z>=-100 AND z<3.0} t1z 900 - 101 {z=-1} t1z 50 + 101 {z=-1} t1z 33 102 {z=0} t1z 400 103 {z=1} t1z 300 104 {z=2} t1z 200 105 {z=3} t1z 100 - 106 {z=4} t1z 50 - 107 {z=-10.0} t1z 50 + 106 {z=4} t1z 33 + 107 {z=-10.0} t1z 33 108 {z=0.0} t1z 400 109 {z=1.0} t1z 300 110 {z=2.0} t1z 200 111 {z=3.0} t1z 100 - 112 {z=4.0} t1z 50 - 113 {z=1.5} t1z 50 - 114 {z=2.5} t1z 50 + 112 {z=4.0} t1z 33 + 113 {z=1.5} t1z 33 + 114 {z=2.5} t1z 33 - 201 {z IN (-1)} t1z 50 + 201 {z IN (-1)} t1z 33 202 {z IN (0)} t1z 400 203 {z IN (1)} t1z 300 204 {z IN (2)} t1z 200 205 {z IN (3)} t1z 100 - 206 {z IN (4)} t1z 50 - 207 {z IN (0.5)} t1z 50 + 206 {z IN (4)} t1z 33 + 207 {z IN (0.5)} t1z 33 208 {z IN (0,1)} t1z 700 209 {z IN (0,1,2)} t1z 900 210 {z IN (0,1,2,3)} {} 100 211 {z IN (0,1,2,3,4,5)} {} 100 212 {z IN (1,2)} t1z 500 213 {z IN (2,3)} t1z 300 214 {z=3 OR z=2} t1z 300 - 215 {z IN (-1,3)} t1z 150 - 216 {z=-1 OR z=3} t1z 150 + 215 {z IN (-1,3)} t1z 133 + 216 {z=-1 OR z=3} t1z 133 300 {y=0} {} 100 - 301 {y=1} t1y 50 - 302 {y=0.1} t1y 50 + 301 {y=1} t1y 33 + 302 {y=0.1} t1y 33 400 {x IS NULL} t1x 400 } { # Verify that the expected index is used with the expected row count @@ -203,13 +203,13 @@ # Verify that range queries generate the correct row count estimates # foreach {testid where index rows} { 500 {x IS NULL AND u='charlie'} t1u 20 - 501 {x=1 AND u='charlie'} t1x 5 + 501 {x=1 AND u='charlie'} t1x 3 502 {x IS NULL} {} 100 - 503 {x=1} t1x 50 + 503 {x=1} t1x 33 504 {x IS NOT NULL} t1x 25 505 {+x IS NOT NULL} {} 500 506 {upper(x) IS NOT NULL} {} 500 } { Index: test/analyze7.test ================================================================== --- test/analyze7.test +++ test/analyze7.test @@ -97,12 +97,14 @@ } do_test analyze7-3.3 { execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123} } {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}} do_test analyze7-3.4 { - execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123} -} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~2 rows)}} + set x [execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}] + regsub -all {[bcd]+} $x {x} x + set x +} {0 0 0 {SEARCH TABLE t1 USING INDEX t1x (x=?) (~2 rows)}} do_test analyze7-3.5 { execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123} } {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}} do_test analyze7-3.6 { execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123} ADDED test/analyze8.test Index: test/analyze8.test ================================================================== --- /dev/null +++ test/analyze8.test @@ -0,0 +1,201 @@ +# 2011 August 5 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# This file implements tests for the ANALYZE command under STAT2. +# Testing the logic that computes the number of copies of each sample. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# There is nothing to test if ANALYZE is disable for this build. +# +ifcapable {!analyze||!vtab||!stat2} { + finish_test + return +} + +# Generate some test data +# +do_test analyze8-1.0 { + set x 100 + set y 1 + set ycnt 0 + set yinc 10 + execsql { + CREATE TABLE t1(x,y); + ANALYZE; + BEGIN; + CREATE INDEX t1x ON t1(x); + CREATE INDEX t1y ON t1(y); + } + for {set i 0} {$i<20} {incr i} { + for {set j 0} {$j<300} {incr j} { + execsql {INSERT INTO t1 VALUES($x,$y)} + incr ycnt + if {$ycnt>=$yinc} {set ycnt 0; incr y} + } + for {set j 0} {$j<100} {incr j} { + incr x + execsql {INSERT INTO t1 VALUES($x,$y)} + incr ycnt + if {$ycnt>=$yinc} {set ycnt 0; incr y} + } + } + execsql { + COMMIT; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno; + } +} {200 301 400 301 600 301 800 301 1000 301 1200 301 1400 301 1600 301 1800 301 2000 301} +do_test analyze8-1.1 { + execsql { + SELECT count(*) FROM t1 WHERE x=200; + } +} {301} + +do_test analyze8-2.0 { + execsql { + BEGIN; + DELETE FROM t1; + } + for {set x 1} {$x<200} {incr x} { + execsql {INSERT INTO t1 VALUES($x,$x)} + } + for {set i 0} {$i<200} {incr i} { + execsql {INSERT INTO t1 VALUES(999,999)} + } + execsql { + COMMIT; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno; + } +} {20 1 60 1 100 1 140 1 180 1 999 200 999 200 999 200 999 200 999 200} +do_test analyze8-2.1 { + for {set i 0} {$i<200} {incr i} { + execsql {INSERT INTO t1 VALUES(0,999)} + } + execsql { + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno; + } +} {0 200 0 200 0 200 10 1 70 1 130 1 190 1 999 200 999 200 999 200} + +do_test analyze8-3.0 { + execsql { + BEGIN; + DROP TABLE t1; + CREATE TABLE t1(a,b); + CREATE INDEX t1all ON t1(a,b); + INSERT INTO t1 VALUES(0,1); + INSERT INTO t1 VALUES(0,2); + INSERT INTO t1 VALUES(0,3); + INSERT INTO t1 VALUES(1,4); + INSERT INTO t1 SELECT a+2, b+4 FROM t1; + INSERT INTO t1 SELECT a+4, b+8 FROM t1; + INSERT INTO t1 SELECT a+8, b+16 FROM t1; + COMMIT; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {0 3 2 3 4 3 5 1 6 3 8 3 10 3 12 3 13 1 14 3} +do_test analyze8-3.1 { + execsql { + DELETE FROM t1; + INSERT INTO t1 VALUES(1,1); + INSERT INTO t1 VALUES(2,2); + INSERT INTO t1 SELECT a+2, b+2 FROM t1; + INSERT INTO t1 SELECT a+4, b+4 FROM t1; + INSERT INTO t1 SELECT a+8, b+8 FROM t1; + INSERT INTO t1 SELECT a+16, b+16 FROM t1; + DELETE FROM t1 WHERE a>21; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {2 1 4 1 6 1 8 1 10 1 12 1 14 1 16 1 18 1 20 1} +do_test analyze8-3.2 { + execsql { + UPDATE t1 SET a=123; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {123 21 123 21 123 21 123 21 123 21 123 21 123 21 123 21 123 21 123 21} +do_test analyze8-3.3 { + execsql { + DELETE FROM t1 WHERE b=1 OR b=2; + ANALYZE; + SELECT count(*) FROM t1; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {19} +do_test analyze8-3.4 { + execsql { + UPDATE t1 SET a=b; + INSERT INTO t1 VALUES(1,1); + INSERT INTO t1 VALUES(2,2); + INSERT INTO t1 SELECT a, b FROM t1; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {2 2 4 2 6 2 8 2 10 2 12 2 14 2 16 2 18 2 20 2} +do_test analyze8-3.5 { + execsql { + UPDATE t1 SET a=1 WHERE b<20; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {1 38 1 38 1 38 1 38 1 38 1 38 1 38 1 38 1 38 20 2} +do_test analyze8-3.6 { + execsql { + UPDATE t1 SET a=b; + UPDATE t1 SET a=20 WHERE b>2; + ANALYZE; + SELECT sample, cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {2 2 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38 20 38} + + + +# Verify that the 5th "cnt" column is added to the sqlite_stat2 table +# on a full ANALYZE if the column is not already present. +# +do_test analyze8-4.0 { + execsql { + UPDATE t1 SET a=b; + ANALYZE; + PRAGMA writable_schema=ON; + UPDATE sqlite_master + SET sql='CREATE TABLE sqlite_stat2(tbl,idx,sampleno,sample)' + WHERE name='sqlite_stat2'; + } + db close + sqlite3 db test.db + execsql { + SELECT sample FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno + } +} {2 4 6 8 10 12 14 16 18 20} +do_test analyze8-4.1 { + catchsql {SELECT sample, cnt FROM sqlite_stat2} +} {1 {no such column: cnt}} +do_test analyze8-4.2 { + execsql { + ANALYZE; + } + db close; + sqlite3 db test.db + execsql { + SELECT sample, +cnt FROM sqlite_stat2 WHERE idx='t1all' ORDER BY sampleno; + } +} {2 2 4 2 6 2 8 2 10 2 12 2 14 2 16 2 18 2 20 2} + + + +finish_test Index: test/tkt-cbd054fa6b.test ================================================================== --- test/tkt-cbd054fa6b.test +++ test/tkt-cbd054fa6b.test @@ -33,13 +33,23 @@ INSERT INTO t1 VALUES (NULL, 'E'); INSERT INTO t1 VALUES (NULL, 'F'); INSERT INTO t1 VALUES (NULL, 'G'); INSERT INTO t1 VALUES (NULL, 'H'); INSERT INTO t1 VALUES (NULL, 'I'); + INSERT INTO t1 VALUES (NULL, 'J'); + INSERT INTO t1 VALUES (NULL, 'K'); + INSERT INTO t1 VALUES (NULL, 'L'); + INSERT INTO t1 VALUES (NULL, 'M'); + INSERT INTO t1 VALUES (NULL, 'N'); + INSERT INTO t1 VALUES (NULL, 'O'); + INSERT INTO t1 VALUES (NULL, 'P'); + INSERT INTO t1 VALUES (NULL, 'Q'); + INSERT INTO t1 VALUES (NULL, 'R'); + INSERT INTO t1 VALUES (NULL, 'S'); SELECT count(*) FROM t1; } -} {10} +} {20} do_test tkt-cbd05-1.2 { db eval { ANALYZE; } } {} @@ -48,11 +58,11 @@ SELECT tbl,idx,group_concat(sample,' ') FROM sqlite_stat2 WHERE idx = 't1_x' GROUP BY tbl,idx } -} {t1 t1_x { A B C D E F G H I}} +} {t1 t1_x {A C E G I K M O Q S}} do_test tkt-cbd05-2.1 { db eval { DROP TABLE t1; CREATE TABLE t1(a INTEGER PRIMARY KEY, b BLOB UNIQUE NOT NULL); @@ -65,13 +75,23 @@ INSERT INTO t1 VALUES(NULL, X'45'); INSERT INTO t1 VALUES(NULL, X'46'); INSERT INTO t1 VALUES(NULL, X'47'); INSERT INTO t1 VALUES(NULL, X'48'); INSERT INTO t1 VALUES(NULL, X'49'); + INSERT INTO t1 VALUES(NULL, X'4A'); + INSERT INTO t1 VALUES(NULL, X'4B'); + INSERT INTO t1 VALUES(NULL, X'4C'); + INSERT INTO t1 VALUES(NULL, X'4D'); + INSERT INTO t1 VALUES(NULL, X'4E'); + INSERT INTO t1 VALUES(NULL, X'4F'); + INSERT INTO t1 VALUES(NULL, X'50'); + INSERT INTO t1 VALUES(NULL, X'51'); + INSERT INTO t1 VALUES(NULL, X'52'); + INSERT INTO t1 VALUES(NULL, X'53'); SELECT count(*) FROM t1; } -} {10} +} {20} do_test tkt-cbd05-2.2 { db eval { ANALYZE; } } {} @@ -80,8 +100,8 @@ SELECT tbl,idx,group_concat(sample,' ') FROM sqlite_stat2 WHERE idx = 't1_x' GROUP BY tbl,idx } -} {t1 t1_x { A B C D E F G H I}} +} {t1 t1_x {A C E G I K M O Q S}} finish_test