Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -741,16 +741,16 @@ if( zRet==0 ){ sqlite3_result_error_nomem(context); return; } - sqlite3_snprintf(24, zRet, "%lld", p->nRow); + sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow); z = zRet + sqlite3Strlen30(zRet); for(i=0; i<(p->nCol-1); i++){ - i64 nDistinct = p->current.anDLt[i] + 1; - i64 iVal = (p->nRow + nDistinct - 1) / nDistinct; - sqlite3_snprintf(24, z, " %lld", iVal); + u64 nDistinct = p->current.anDLt[i] + 1; + u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; + sqlite3_snprintf(24, z, " %llu", iVal); z += sqlite3Strlen30(z); assert( p->current.anEq[i] ); } assert( z[0]=='\0' && z>zRet ); @@ -787,11 +787,11 @@ sqlite3_result_error_nomem(context); }else{ int i; char *z = zRet; for(i=0; inCol; i++){ - sqlite3_snprintf(24, z, "%lld ", aCnt[i]); + sqlite3_snprintf(24, z, "%llu ", (u64)aCnt[i]); z += sqlite3Strlen30(z); } assert( z[0]=='\0' && z>zRet ); z[-1] = '\0'; sqlite3_result_text(context, zRet, -1, sqlite3_free); @@ -1272,24 +1272,22 @@ z++; } aOut[i] = v; if( *z==' ' ) z++; } - if( pIndex ){ +#ifndef SQLITE_ENABLE_STAT3_OR_STAT4 + assert( pIndex!=0 ); +#else + if( pIndex ) +#endif + { if( strcmp(z, "unordered")==0 ){ pIndex->bUnordered = 1; - }else if( sqlite3_strglob("r=[0-9]*", z)==0 ){ + }else if( sqlite3_strglob("sz=[0-9]*", z)==0 ){ int v32 = 0; - sqlite3GetInt32(z+2, &v32); - if( v32>=200 ){ - v32 = 255; - }else if( v32<=0 ){ - v32 = 1; - }else{ - v32 = (128*v32)/100; - } - pIndex->iScanRatio = (u8)v32; + sqlite3GetInt32(z+3, &v32); + pIndex->szIdxRow = sqlite3LogEst(v32); } } } /* @@ -1328,11 +1326,14 @@ if( pIndex ){ decodeIntArray((char*)z, pIndex->nColumn+1, pIndex->aiRowEst, pIndex); if( pIndex->pPartIdxWhere==0 ) pTable->nRowEst = pIndex->aiRowEst[0]; }else{ - decodeIntArray((char*)z, 1, &pTable->nRowEst, 0); + Index fakeIdx; + fakeIdx.szIdxRow = pTable->szTabRow; + decodeIntArray((char*)z, 1, &pTable->nRowEst, &fakeIdx); + pTable->szTabRow = fakeIdx.szIdxRow; } return 0; } Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -2506,11 +2506,10 @@ pBt->max1bytePayload = (u8)pBt->maxLocal; } assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); pBt->pPage1 = pPage1; pBt->nPage = nPage; -assert( pPage1->leaf==0 || pPage1->leaf==1 ); return SQLITE_OK; page1_init_failed: releasePage(pPage1); pBt->pPage1 = 0; Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -877,11 +877,11 @@ } pTable->zName = zName; pTable->iPKey = -1; pTable->pSchema = db->aDb[iDb].pSchema; pTable->nRef = 1; - pTable->nRowEst = 1000000; + pTable->nRowEst = 1048576; assert( pParse->pNewTable==0 ); pParse->pNewTable = pTable; /* If this is the magic sqlite_sequence table used by autoincrement, ** then record a pointer to this table in the main database structure @@ -1506,38 +1506,33 @@ } /* ** Estimate the total row width for a table. */ -static unsigned estimatedTableWidth(const Table *pTab){ +static void estimateTableWidth(Table *pTab){ unsigned wTable = 0; const Column *pTabCol; int i; for(i=pTab->nCol, pTabCol=pTab->aCol; i>0; i--, pTabCol++){ wTable += pTabCol->szEst; } if( pTab->iPKey<0 ) wTable++; - return wTable; + pTab->szTabRow = sqlite3LogEst(wTable*4); } /* -** Set the iScanRatio for an index based on estimates of the average -** table row width and average index row width. Estimates are derived -** from the declared datatypes of the various columns. +** Estimate the average size of a row for an index. */ -static void setIndexScanRatio(Index *pIdx, unsigned wTable){ +static void estimateIndexWidth(Index *pIdx){ unsigned wIndex = 1; int i; const Column *aCol = pIdx->pTable->aCol; for(i=0; inColumn; i++){ assert( pIdx->aiColumn[i]>=0 && pIdx->aiColumn[i]pTable->nCol ); wIndex += aCol[pIdx->aiColumn[i]].szEst; } - assert( 100*wIndex/wTable <= 255 ); - pIdx->iScanRatio = (u8)(128*wIndex/wTable); - /* printf("%s: wIndex=%d wTable=%d ratio=%d\n", - ** pIdx->zName, wIndex, wTable, (100*pIdx->iScanRatio)/128); */ + pIdx->szIdxRow = sqlite3LogEst(wIndex*4); } /* ** This routine is called to report the final ")" that terminates ** a CREATE TABLE statement. @@ -1566,11 +1561,10 @@ ){ Table *p; /* The new table */ sqlite3 *db = pParse->db; /* The database connection */ int iDb; /* Database in which the table lives */ Index *pIdx; /* An implied index of the table */ - unsigned wTable; /* Estimated average width of a row in the table */ if( (pEnd==0 && pSelect==0) || db->mallocFailed ){ return; } p = pParse->pNewTable; @@ -1586,14 +1580,14 @@ if( p->pCheck ){ sqlite3ResolveSelfReference(pParse, p, NC_IsCheck, 0, p->pCheck); } #endif /* !defined(SQLITE_OMIT_CHECK) */ - /* Compute the iScanRatio of implied indices */ - wTable = estimatedTableWidth(p); + /* Estimate the average row size for the table and for all implied indices */ + estimateTableWidth(p); for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){ - setIndexScanRatio(pIdx, wTable); + estimateIndexWidth(pIdx); } /* If the db->init.busy is 1 it means we are reading the SQL off the ** "sqlite_master" or "sqlite_temp_master" table on the disk. ** So do not write to the disk again. Extract the root page number @@ -2816,13 +2810,11 @@ requestedSortOrder = pListItem->sortOrder & sortOrderMask; pIndex->aSortOrder[i] = (u8)requestedSortOrder; if( pTab->aCol[j].notNull==0 ) pIndex->uniqNotNull = 0; } sqlite3DefaultRowEst(pIndex); - if( pParse->pNewTable==0 ){ - setIndexScanRatio(pIndex, estimatedTableWidth(pTab)); - } + if( pParse->pNewTable==0 ) estimateIndexWidth(pIndex); if( pTab==pParse->pNewTable ){ /* This routine has been called to create an automatic index as a ** result of a PRIMARY KEY or UNIQUE clause on a column definition, or ** a PRIMARY KEY or UNIQUE clause following the column definitions. Index: src/pragma.c ================================================================== --- src/pragma.c +++ src/pragma.c @@ -206,10 +206,14 @@ #if defined(SQLITE_HAS_CODEC) { /* zName: */ "hexkey", /* ePragTyp: */ PragTyp_HEXKEY, /* ePragFlag: */ 0, /* iArg: */ 0 }, + { /* zName: */ "hexrekey", + /* ePragTyp: */ PragTyp_HEXKEY, + /* ePragFlag: */ 0, + /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_CHECK) { /* zName: */ "ignore_check_constraints", /* ePragTyp: */ PragTyp_FLAG, /* ePragFlag: */ 0, @@ -414,11 +418,11 @@ { /* zName: */ "writable_schema", /* ePragTyp: */ PragTyp_FLAG, /* ePragFlag: */ 0, /* iArg: */ SQLITE_WriteSchema|SQLITE_RecoveryMode }, }; -/* Number of pragmas: 55 on by default, 66 total. */ +/* Number of pragmas: 55 on by default, 67 total. */ /* End of the automatically generated pragma table. ***************************************************************************/ /* ** Interpret the given string as a safety level. Return 0 for OFF, @@ -1447,32 +1451,34 @@ break; case PragTyp_INDEX_LIST: if( zRight ){ Index *pIdx; Table *pTab; + int i; pTab = sqlite3FindTable(db, zRight, zDb); if( pTab ){ v = sqlite3GetVdbe(pParse); - pIdx = pTab->pIndex; - if( pIdx ){ - int i = 0; - sqlite3VdbeSetNumCols(v, 4); - pParse->nMem = 4; - sqlite3CodeVerifySchema(pParse, iDb); - sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seq", SQLITE_STATIC); - sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC); - sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "unique", SQLITE_STATIC); - sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "r", SQLITE_STATIC); - while(pIdx){ - sqlite3VdbeAddOp2(v, OP_Integer, i, 1); - sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0); - sqlite3VdbeAddOp2(v, OP_Integer, pIdx->onError!=OE_None, 3); - sqlite3VdbeAddOp2(v, OP_Integer, (pIdx->iScanRatio*100+127)/128, 4); - sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); - ++i; - pIdx = pIdx->pNext; - } + sqlite3VdbeSetNumCols(v, 4); + pParse->nMem = 4; + sqlite3CodeVerifySchema(pParse, iDb); + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seq", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "unique", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "avgrowsize", SQLITE_STATIC); + sqlite3VdbeAddOp2(v, OP_Integer, 0, 1); + sqlite3VdbeAddOp2(v, OP_Null, 0, 2); + sqlite3VdbeAddOp2(v, OP_Integer, 1, 3); + sqlite3VdbeAddOp2(v, OP_Integer, + (int)sqlite3LogEstToInt(pTab->szTabRow), 4); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); + for(pIdx=pTab->pIndex, i=1; pIdx; pIdx=pIdx->pNext, i++){ + sqlite3VdbeAddOp2(v, OP_Integer, i, 1); + sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0); + sqlite3VdbeAddOp2(v, OP_Integer, pIdx->onError!=OE_None, 3); + sqlite3VdbeAddOp2(v, OP_Integer, + (int)sqlite3LogEstToInt(pIdx->szIdxRow), 4); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); } } } break; @@ -2177,16 +2183,16 @@ if( zRight ) sqlite3_rekey_v2(db, zDb, zRight, sqlite3Strlen30(zRight)); break; } case PragTyp_HEXKEY: { if( zRight ){ - int i, h1, h2; + u8 iByte; + int i; char zKey[40]; - for(i=0; (h1 = zRight[i])!=0 && (h2 = zRight[i+1])!=0; i+=2){ - h1 += 9*(1&(h1>>6)); - h2 += 9*(1&(h2>>6)); - zKey[i/2] = (h2 & 0x0f) | ((h1 & 0xf)<<4); + for(i=0, iByte=0; ipSrcList==0 ) return 0; - switch( pExpr->op ){ case TK_AGG_COLUMN: case TK_COLUMN: { /* The expression is a column. Locate the table the column is being ** extracted from in NameContext.pSrcList. This table may be real @@ -1147,29 +1165,39 @@ NameContext sNC; Expr *p = pS->pEList->a[iCol].pExpr; sNC.pSrcList = pS->pSrc; sNC.pNext = pNC; sNC.pParse = pNC->pParse; - zType = columnType(&sNC, p, &zOriginDb, &zOriginTab, &zOriginCol); + zType = columnType(&sNC, p,&zOrigDb,&zOrigTab,&zOrigCol, &estWidth); } }else if( ALWAYS(pTab->pSchema) ){ /* A real table */ assert( !pS ); if( iCol<0 ) iCol = pTab->iPKey; assert( iCol==-1 || (iCol>=0 && iColnCol) ); +#ifdef SQLITE_ENABLE_COLUMN_METADATA if( iCol<0 ){ zType = "INTEGER"; - zOriginCol = "rowid"; + zOrigCol = "rowid"; }else{ zType = pTab->aCol[iCol].zType; - zOriginCol = pTab->aCol[iCol].zName; + zOrigCol = pTab->aCol[iCol].zName; + estWidth = pTab->aCol[iCol].szEst; } - zOriginTab = pTab->zName; + zOrigTab = pTab->zName; if( pNC->pParse ){ int iDb = sqlite3SchemaToIndex(pNC->pParse->db, pTab->pSchema); - zOriginDb = pNC->pParse->db->aDb[iDb].zName; + zOrigDb = pNC->pParse->db->aDb[iDb].zName; + } +#else + if( iCol<0 ){ + zType = "INTEGER"; + }else{ + zType = pTab->aCol[iCol].zType; + estWidth = pTab->aCol[iCol].szEst; } +#endif } break; } #ifndef SQLITE_OMIT_SUBQUERY case TK_SELECT: { @@ -1182,22 +1210,25 @@ Expr *p = pS->pEList->a[0].pExpr; assert( ExprHasProperty(pExpr, EP_xIsSelect) ); sNC.pSrcList = pS->pSrc; sNC.pNext = pNC; sNC.pParse = pNC->pParse; - zType = columnType(&sNC, p, &zOriginDb, &zOriginTab, &zOriginCol); + zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol, &estWidth); break; } #endif } - - if( pzOriginDb ){ - assert( pzOriginTab && pzOriginCol ); - *pzOriginDb = zOriginDb; - *pzOriginTab = zOriginTab; - *pzOriginCol = zOriginCol; + +#ifdef SQLITE_ENABLE_COLUMN_METADATA + if( pzOrigDb ){ + assert( pzOrigTab && pzOrigCol ); + *pzOrigDb = zOrigDb; + *pzOrigTab = zOrigTab; + *pzOrigCol = zOrigCol; } +#endif + if( pEstWidth ) *pEstWidth = estWidth; return zType; } /* ** Generate code that will tell the VDBE the declaration types of columns @@ -1219,25 +1250,25 @@ const char *zType; #ifdef SQLITE_ENABLE_COLUMN_METADATA const char *zOrigDb = 0; const char *zOrigTab = 0; const char *zOrigCol = 0; - zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol); + zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol, 0); /* The vdbe must make its own copy of the column-type and other ** column specific strings, in case the schema is reset before this ** virtual machine is deleted. */ sqlite3VdbeSetColName(v, i, COLNAME_DATABASE, zOrigDb, SQLITE_TRANSIENT); sqlite3VdbeSetColName(v, i, COLNAME_TABLE, zOrigTab, SQLITE_TRANSIENT); sqlite3VdbeSetColName(v, i, COLNAME_COLUMN, zOrigCol, SQLITE_TRANSIENT); #else - zType = columnType(&sNC, p, 0, 0, 0); + zType = columnType(&sNC, p, 0, 0, 0, 0); #endif sqlite3VdbeSetColName(v, i, COLNAME_DECLTYPE, zType, SQLITE_TRANSIENT); } -#endif /* SQLITE_OMIT_DECLTYPE */ +#endif /* !defined(SQLITE_OMIT_DECLTYPE) */ } /* ** Generate code that will tell the VDBE the names of columns ** in the result set. This information is used to provide the @@ -1422,39 +1453,41 @@ ** This routine requires that all identifiers in the SELECT ** statement be resolved. */ static void selectAddColumnTypeAndCollation( Parse *pParse, /* Parsing contexts */ - int nCol, /* Number of columns */ - Column *aCol, /* List of columns */ + Table *pTab, /* Add column type information to this table */ Select *pSelect /* SELECT used to determine types and collations */ ){ sqlite3 *db = pParse->db; NameContext sNC; Column *pCol; CollSeq *pColl; int i; Expr *p; struct ExprList_item *a; + u64 szAll = 0; assert( pSelect!=0 ); assert( (pSelect->selFlags & SF_Resolved)!=0 ); - assert( nCol==pSelect->pEList->nExpr || db->mallocFailed ); + assert( pTab->nCol==pSelect->pEList->nExpr || db->mallocFailed ); if( db->mallocFailed ) return; memset(&sNC, 0, sizeof(sNC)); sNC.pSrcList = pSelect->pSrc; a = pSelect->pEList->a; - for(i=0, pCol=aCol; iaCol; inCol; i++, pCol++){ p = a[i].pExpr; - pCol->zType = sqlite3DbStrDup(db, columnType(&sNC, p, 0, 0, 0)); + pCol->zType = sqlite3DbStrDup(db, columnType(&sNC, p,0,0,0, &pCol->szEst)); + szAll += pCol->szEst; pCol->affinity = sqlite3ExprAffinity(p); if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_NONE; pColl = sqlite3ExprCollSeq(pParse, p); if( pColl ){ pCol->zColl = sqlite3DbStrDup(db, pColl->zName); } } + pTab->szTabRow = sqlite3LogEst(szAll*4); } /* ** Given a SELECT statement, generate a Table structure that describes ** the result set of that SELECT. @@ -1478,13 +1511,13 @@ /* The sqlite3ResultSetOfSelect() is only used n contexts where lookaside ** is disabled */ assert( db->lookaside.bEnabled==0 ); pTab->nRef = 1; pTab->zName = 0; - pTab->nRowEst = 1000000; + pTab->nRowEst = 1048576; selectColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol); - selectAddColumnTypeAndCollation(pParse, pTab->nCol, pTab->aCol, pSelect); + selectAddColumnTypeAndCollation(pParse, pTab, pSelect); pTab->iPKey = -1; if( db->mallocFailed ){ sqlite3DeleteTable(db, pTab); return 0; } @@ -3392,15 +3425,15 @@ assert( pFrom->pTab==0 ); sqlite3WalkSelect(pWalker, pSel); pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table)); if( pTab==0 ) return WRC_Abort; pTab->nRef = 1; - pTab->zName = sqlite3MPrintf(db, "sqlite_subquery_%p_", (void*)pTab); + pTab->zName = sqlite3MPrintf(db, "sqlite_sq_%p", (void*)pTab); while( pSel->pPrior ){ pSel = pSel->pPrior; } selectColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol); pTab->iPKey = -1; - pTab->nRowEst = 1000000; + pTab->nRowEst = 1048576; pTab->tabFlags |= TF_Ephemeral; #endif }else{ /* An ordinary table or view name in the FROM clause */ assert( pFrom->pTab==0 ); @@ -3680,11 +3713,11 @@ if( ALWAYS(pTab!=0) && (pTab->tabFlags & TF_Ephemeral)!=0 ){ /* A sub-query in the FROM clause of a SELECT */ Select *pSel = pFrom->pSelect; assert( pSel ); while( pSel->pPrior ) pSel = pSel->pPrior; - selectAddColumnTypeAndCollation(pParse, pTab->nCol, pTab->aCol, pSel); + selectAddColumnTypeAndCollation(pParse, pTab, pSel); } } } return WRC_Continue; } @@ -4606,13 +4639,13 @@ ** In practice the KeyInfo structure will not be used. It is only ** passed to keep OP_OpenRead happy. */ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ if( pIdx->bUnordered==0 - && pIdx->iScanRatio<128 + && pIdx->szIdxRowszTabRow && pIdx->pPartIdxWhere==0 - && (!pBest || pIdx->iScanRatioiScanRatio) + && (!pBest || pIdx->szIdxRowszIdxRow) ){ pBest = pIdx; } } if( pBest ){ Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -468,10 +468,35 @@ typedef u64 tRowcnt; /* 64-bit only if requested at compile-time */ #else typedef u32 tRowcnt; /* 32-bit is the default */ #endif +/* +** Estimated quantities used for query planning are stored as 16-bit +** logarithms. For quantity X, the value stored is 10*log2(X). This +** gives a possible range of values of approximately 1.0e986 to 1e-986. +** But the allowed values are "grainy". Not every value is representable. +** For example, quantities 16 and 17 are both represented by a LogEst +** of 40. However, since LogEst quantatites are suppose to be estimates, +** not exact values, this imprecision is not a problem. +** +** "LogEst" is short for "Logarithimic Estimate". +** +** Examples: +** 1 -> 0 20 -> 43 10000 -> 132 +** 2 -> 10 25 -> 46 25000 -> 146 +** 3 -> 16 100 -> 66 1000000 -> 199 +** 4 -> 20 1000 -> 99 1048576 -> 200 +** 10 -> 33 1024 -> 100 4294967296 -> 320 +** +** The LogEst can be negative to indicate fractional values. +** Examples: +** +** 0.5 -> -10 0.1 -> -33 0.0625 -> -40 +*/ +typedef INT16_TYPE LogEst; + /* ** Macros to determine whether the machine is big or little endian, ** evaluated at runtime. */ #ifdef SQLITE_AMALGAMATION @@ -1360,10 +1385,11 @@ tRowcnt nRowEst; /* Estimated rows in table - from sqlite_stat1 table */ int tnum; /* Root BTree node for this table (see note above) */ i16 iPKey; /* If not negative, use aCol[iPKey] as the primary key */ i16 nCol; /* Number of columns in this table */ u16 nRef; /* Number of pointers to this Table */ + LogEst szTabRow; /* Estimated size of each table row in bytes */ u8 tabFlags; /* Mask of TF_* values */ u8 keyConf; /* What to do in case of uniqueness conflict on iPKey */ #ifndef SQLITE_OMIT_ALTERTABLE int addColOffset; /* Offset in CREATE TABLE stmt to add a new column */ #endif @@ -1558,13 +1584,13 @@ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ Expr *pPartIdxWhere; /* WHERE clause for partial indices */ int tnum; /* DB Page containing root of this index */ + LogEst szIdxRow; /* Estimated average row size in bytes */ u16 nColumn; /* Number of columns in table used by this index */ - u8 iScanRatio; /* Scan rate relative to the main table */ - unsigned onError:4; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ + u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ unsigned autoIndex:2; /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */ unsigned bUnordered:1; /* Use this index for == or IN queries only */ unsigned uniqNotNull:1; /* True if UNIQUE and NOT NULL for all columns */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nSample; /* Number of elements in aSample[] */ @@ -2965,10 +2991,16 @@ int sqlite3GetInt32(const char *, int*); int sqlite3Atoi(const char*); int sqlite3Utf16ByteLen(const void *pData, int nChar); int sqlite3Utf8CharLen(const char *pData, int nByte); u32 sqlite3Utf8Read(const u8**); +LogEst sqlite3LogEst(u64); +LogEst sqlite3LogEstAdd(LogEst,LogEst); +#ifndef SQLITE_OMIT_VIRTUALTABLE +LogEst sqlite3LogEstFromDouble(double); +#endif +u64 sqlite3LogEstToInt(LogEst); /* ** Routines to read and write variable-length integers. These used to ** be defined locally, but now we use the varint routines in the util.c ** file. Code should use the MACRO forms below, as the Varint32 versions Index: src/test8.c ================================================================== --- src/test8.c +++ src/test8.c @@ -263,10 +263,11 @@ ** corresponding entry in aIndex[] to 1. */ while( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ const char *zIdx = (const char *)sqlite3_column_text(pStmt, 1); sqlite3_stmt *pStmt2 = 0; + if( zIdx==0 ) continue; zSql = sqlite3_mprintf("PRAGMA index_info(%s)", zIdx); if( !zSql ){ rc = SQLITE_NOMEM; goto get_index_array_out; } Index: src/util.c ================================================================== --- src/util.c +++ src/util.c @@ -1206,5 +1206,82 @@ for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); } } #endif + +/* +** Find (an approximate) sum of two LogEst values. This computation is +** not a simple "+" operator because LogEst is stored as a logarithmic +** value. +** +*/ +LogEst sqlite3LogEstAdd(LogEst a, LogEst b){ + static const unsigned char x[] = { + 10, 10, /* 0,1 */ + 9, 9, /* 2,3 */ + 8, 8, /* 4,5 */ + 7, 7, 7, /* 6,7,8 */ + 6, 6, 6, /* 9,10,11 */ + 5, 5, 5, /* 12-14 */ + 4, 4, 4, 4, /* 15-18 */ + 3, 3, 3, 3, 3, 3, /* 19-24 */ + 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ + }; + if( a>=b ){ + if( a>b+49 ) return a; + if( a>b+31 ) return a+1; + return a+x[a-b]; + }else{ + if( b>a+49 ) return b; + if( b>a+31 ) return b+1; + return b+x[b-a]; + } +} + +/* +** Convert an integer into a LogEst. In other words, compute a +** good approximatation for 10*log2(x). +*/ +LogEst sqlite3LogEst(u64 x){ + static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; + LogEst y = 40; + if( x<8 ){ + if( x<2 ) return 0; + while( x<8 ){ y -= 10; x <<= 1; } + }else{ + while( x>255 ){ y += 40; x >>= 4; } + while( x>15 ){ y += 10; x >>= 1; } + } + return a[x&7] + y - 10; +} + +#ifndef SQLITE_OMIT_VIRTUALTABLE +/* +** Convert a double into a LogEst +** In other words, compute an approximation for 10*log2(x). +*/ +LogEst sqlite3LogEstFromDouble(double x){ + u64 a; + LogEst e; + assert( sizeof(x)==8 && sizeof(a)==8 ); + if( x<=1 ) return 0; + if( x<=2000000000 ) return sqlite3LogEst((u64)x); + memcpy(&a, &x, 8); + e = (a>>52) - 1022; + return e*10; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Convert a LogEst into an integer. +*/ +u64 sqlite3LogEstToInt(LogEst x){ + u64 n; + if( x<10 ) return 1; + n = x%10; + x /= 10; + if( n>=5 ) n -= 2; + else if( n>=1 ) n -= 1; + if( x>=3 ) return (n+8)<<(x-3); + return (n+8)>>(3-x); +} Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -46,30 +46,10 @@ typedef struct WhereLoopBuilder WhereLoopBuilder; typedef struct WhereScan WhereScan; typedef struct WhereOrCost WhereOrCost; typedef struct WhereOrSet WhereOrSet; -/* -** Cost X is tracked as 10*log2(X) stored in a 16-bit integer. The -** maximum cost for ordinary tables is 64*(2**63) which becomes 6900. -** (Virtual tables can return a larger cost, but let's assume they do not.) -** So all costs can be stored in a 16-bit integer without risk -** of overflow. -** -** Costs are estimates, so no effort is made to compute 10*log2(X) exactly. -** Instead, a close estimate is used. Any value of X=1 is stored as 0. -** X=2 is 10. X=3 is 16. X=1000 is 99. etc. Negative values are allowed. -** A WhereCost of -10 means 0.5. WhereCost of -20 means 0.25. And so forth. -** -** The tool/wherecosttest.c source file implements a command-line program -** that will convert WhereCosts to integers, convert integers to WhereCosts -** and do addition and multiplication on WhereCost values. The wherecosttest -** command-line program is a useful utility to have around when working with -** this module. -*/ -typedef short int WhereCost; - /* ** This object contains information needed to implement a single nested ** loop in WHERE clause. ** ** Contrast this object with WhereLoop. This object describes the @@ -130,13 +110,13 @@ #ifdef SQLITE_DEBUG char cId; /* Symbolic ID of this loop for debugging use */ #endif u8 iTab; /* Position in FROM clause of table for this loop */ u8 iSortIdx; /* Sorting index number. 0==None */ - WhereCost rSetup; /* One-time setup cost (ex: create transient index) */ - WhereCost rRun; /* Cost of running each loop */ - WhereCost nOut; /* Estimated number of output rows */ + LogEst rSetup; /* One-time setup cost (ex: create transient index) */ + LogEst rRun; /* Cost of running each loop */ + LogEst nOut; /* Estimated number of output rows */ union { struct { /* Information for internal btree tables */ int nEq; /* Number of equality constraints */ Index *pIndex; /* Index used, or NULL */ } btree; @@ -162,12 +142,12 @@ ** subquery on one operand of an OR operator in the WHERE clause. ** See WhereOrSet for additional information */ struct WhereOrCost { Bitmask prereq; /* Prerequisites */ - WhereCost rRun; /* Cost of running this subquery */ - WhereCost nOut; /* Number of outputs for this subquery */ + LogEst rRun; /* Cost of running this subquery */ + LogEst nOut; /* Number of outputs for this subquery */ }; /* The WhereOrSet object holds a set of possible WhereOrCosts that ** correspond to the subquery(s) of OR-clause processing. Only the ** best N_OR_COST elements are retained. @@ -201,12 +181,12 @@ ** at the end is the choosen query plan. */ struct WherePath { Bitmask maskLoop; /* Bitmask of all WhereLoop objects in this path */ Bitmask revLoop; /* aLoop[]s that should be reversed for ORDER BY */ - WhereCost nRow; /* Estimated number of rows generated by this path */ - WhereCost rCost; /* Total cost of this path */ + LogEst nRow; /* Estimated number of rows generated by this path */ + LogEst rCost; /* Total cost of this path */ u8 isOrdered; /* True if this path satisfies ORDER BY */ u8 isOrderedValid; /* True if the isOrdered field is valid */ WhereLoop **aLoop; /* Array of WhereLoop objects implementing this path */ }; @@ -268,11 +248,11 @@ union { int leftColumn; /* Column number of X in "X " */ WhereOrInfo *pOrInfo; /* Extra information if (eOperator & WO_OR)!=0 */ WhereAndInfo *pAndInfo; /* Extra information if (eOperator& WO_AND)!=0 */ } u; - WhereCost truthProb; /* Probability of truth for this expression */ + LogEst truthProb; /* Probability of truth for this expression */ u16 eOperator; /* A WO_xx value describing */ u8 wtFlags; /* TERM_xxx bit flags. See below */ u8 nChild; /* Number of children that must disable us */ WhereClause *pWC; /* The clause this term is part of */ Bitmask prereqRight; /* Bitmask of tables used by pExpr->pRight */ @@ -416,11 +396,11 @@ SrcList *pTabList; /* List of tables in the join */ ExprList *pOrderBy; /* The ORDER BY clause or NULL */ ExprList *pResultSet; /* Result set. DISTINCT operates on these */ WhereLoop *pLoops; /* List of all WhereLoop objects */ Bitmask revMask; /* Mask of ORDER BY terms that need reversing */ - WhereCost nRowOut; /* Estimated number of output rows */ + LogEst nRowOut; /* Estimated number of output rows */ u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ u8 bOBSat; /* ORDER BY satisfied by indices */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE/DELETE */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ u8 eDistinct; /* One of the WHERE_DISTINCT_* values below */ @@ -476,30 +456,15 @@ #define WHERE_IN_ABLE 0x00000800 /* Able to support an IN operator */ #define WHERE_ONEROW 0x00001000 /* Selects no more than one row */ #define WHERE_MULTI_OR 0x00002000 /* OR using multiple indices */ #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ - -/* Convert a WhereCost value (10 times log2(X)) into its integer value X. -** A rough approximation is used. The value returned is not exact. -*/ -static u64 whereCostToInt(WhereCost x){ - u64 n; - if( x<10 ) return 1; - n = x%10; - x /= 10; - if( n>=5 ) n -= 2; - else if( n>=1 ) n -= 1; - if( x>=3 ) return (n+8)<<(x-3); - return (n+8)>>(3-x); -} - /* ** Return the estimated number of output rows from a WHERE clause */ u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ - return whereCostToInt(pWInfo->nRowOut); + return sqlite3LogEstToInt(pWInfo->nRowOut); } /* ** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this ** WHERE clause returns outputs for DISTINCT processing. @@ -557,12 +522,12 @@ ** so that pSet keeps the N_OR_COST best entries seen so far. */ static int whereOrInsert( WhereOrSet *pSet, /* The WhereOrSet to be updated */ Bitmask prereq, /* Prerequisites of the new entry */ - WhereCost rRun, /* Run-cost of the new entry */ - WhereCost nOut /* Number of outputs for the new entry */ + LogEst rRun, /* Run-cost of the new entry */ + LogEst nOut /* Number of outputs for the new entry */ ){ u16 i; WhereOrCost *p; for(i=pSet->n, p=pSet->a; i>0; i--, p++){ if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ @@ -643,13 +608,10 @@ if( pWC->a!=pWC->aStatic ){ sqlite3DbFree(db, pWC->a); } } -/* Forward declaration */ -static WhereCost whereCost(tRowcnt x); - /* ** Add a single new WhereTerm entry to the WhereClause object pWC. ** The new WhereTerm object is constructed from Expr p and with wtFlags. ** The index in pWC->a[] of the new WhereTerm is returned on success. ** 0 is returned if the new WhereTerm could not be added due to a memory @@ -688,11 +650,11 @@ } pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); } pTerm = &pWC->a[idx = pWC->nTerm++]; if( p && ExprHasProperty(p, EP_Unlikely) ){ - pTerm->truthProb = whereCost(p->iTable) - 99; + pTerm->truthProb = sqlite3LogEst(p->iTable) - 99; }else{ pTerm->truthProb = -1; } pTerm->pExpr = sqlite3ExprSkipCollate(p); pTerm->wtFlags = wtFlags; @@ -1952,79 +1914,16 @@ } return 0; } -/* -** Find (an approximate) sum of two WhereCosts. This computation is -** not a simple "+" operator because WhereCost is stored as a logarithmic -** value. -** -*/ -static WhereCost whereCostAdd(WhereCost a, WhereCost b){ - static const unsigned char x[] = { - 10, 10, /* 0,1 */ - 9, 9, /* 2,3 */ - 8, 8, /* 4,5 */ - 7, 7, 7, /* 6,7,8 */ - 6, 6, 6, /* 9,10,11 */ - 5, 5, 5, /* 12-14 */ - 4, 4, 4, 4, /* 15-18 */ - 3, 3, 3, 3, 3, 3, /* 19-24 */ - 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ - }; - if( a>=b ){ - if( a>b+49 ) return a; - if( a>b+31 ) return a+1; - return a+x[a-b]; - }else{ - if( b>a+49 ) return b; - if( b>a+31 ) return b+1; - return b+x[b-a]; - } -} - -/* -** Convert an integer into a WhereCost. In other words, compute a -** good approximatation for 10*log2(x). -*/ -static WhereCost whereCost(tRowcnt x){ - static WhereCost a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; - WhereCost y = 40; - if( x<8 ){ - if( x<2 ) return 0; - while( x<8 ){ y -= 10; x <<= 1; } - }else{ - while( x>255 ){ y += 40; x >>= 4; } - while( x>15 ){ y += 10; x >>= 1; } - } - return a[x&7] + y - 10; -} - -#ifndef SQLITE_OMIT_VIRTUALTABLE -/* -** Convert a double (as received from xBestIndex of a virtual table) -** into a WhereCost. In other words, compute an approximation for -** 10*log2(x). -*/ -static WhereCost whereCostFromDouble(double x){ - u64 a; - WhereCost e; - assert( sizeof(x)==8 && sizeof(a)==8 ); - if( x<=1 ) return 0; - if( x<=2000000000 ) return whereCost((tRowcnt)x); - memcpy(&a, &x, 8); - e = (a>>52) - 1022; - return e*10; -} -#endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Estimate the logarithm of the input value to base 2. */ -static WhereCost estLog(WhereCost N){ - WhereCost x = whereCost(N); +static LogEst estLog(LogEst N){ + LogEst x = sqlite3LogEst(N); return x>33 ? x - 33 : 0; } /* ** Two routines for printing the content of an sqlite3_index_info @@ -2533,11 +2432,11 @@ ** ** ... FROM t1 WHERE a > ? AND a < ? ... ** ** then nEq is set to 0. ** -** When this function is called, *pnOut is set to the whereCost() of the +** When this function is called, *pnOut is set to the sqlite3LogEst() of the ** number of rows that the index scan is expected to visit without ** considering the range constraints. If nEq is 0, this is the number of ** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) ** to account for the range contraints pLower and pUpper. ** @@ -2549,19 +2448,19 @@ static int whereRangeScanEst( Parse *pParse, /* Parsing & code generating context */ WhereLoopBuilder *pBuilder, WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereCost *pnOut /* IN/OUT: Number of rows visited */ + WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ ){ int rc = SQLITE_OK; - int nOut = (int)*pnOut; - WhereCost nNew; + int nOut = pLoop->nOut; + int nEq = pLoop->u.btree.nEq; + LogEst nNew; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - Index *p = pBuilder->pNew->u.btree.pIndex; - int nEq = pBuilder->pNew->u.btree.nEq; + Index *p = pLoop->u.btree.pIndex; if( p->nSample>0 && nEq==pBuilder->nRecValid && nEqnSampleCol && OptimizationEnabled(pParse->db, SQLITE_Stat3) @@ -2638,18 +2537,18 @@ } pBuilder->pRec = pRec; if( rc==SQLITE_OK ){ if( iUpper>iLower ){ - nNew = whereCost(iUpper - iLower); + nNew = sqlite3LogEst(iUpper - iLower); }else{ - nNew = 10; assert( 10==whereCost(2) ); + nNew = 10; assert( 10==sqlite3LogEst(2) ); } if( nNewnOut = (LogEst)nOut; WHERETRACE(0x100, ("range scan regions: %u..%u est=%d\n", (u32)iLower, (u32)iUpper, nOut)); return SQLITE_OK; } } @@ -2660,20 +2559,20 @@ assert( pLower || pUpper ); /* TUNING: Each inequality constraint reduces the search space 4-fold. ** A BETWEEN operator, therefore, reduces the search space 16-fold */ nNew = nOut; if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){ - nNew -= 20; assert( 20==whereCost(4) ); + nNew -= 20; assert( 20==sqlite3LogEst(4) ); nOut--; } if( pUpper ){ - nNew -= 20; assert( 20==whereCost(4) ); + nNew -= 20; assert( 20==sqlite3LogEst(4) ); nOut--; } if( nNew<10 ) nNew = 10; if( nNewnOut = (LogEst)nOut; return rc; } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* @@ -4313,11 +4212,11 @@ */ static int whereLoopAddBtreeIndex( WhereLoopBuilder *pBuilder, /* The WhereLoop factory */ struct SrcList_item *pSrc, /* FROM clause term being analyzed */ Index *pProbe, /* An index on pSrc */ - WhereCost nInMul /* log(Number of iterations due to IN) */ + LogEst nInMul /* log(Number of iterations due to IN) */ ){ WhereInfo *pWInfo = pBuilder->pWInfo; /* WHERE analyse context */ Parse *pParse = pWInfo->pParse; /* Parsing context */ sqlite3 *db = pParse->db; /* Database connection malloc context */ WhereLoop *pNew; /* Template WhereLoop under construction */ @@ -4326,15 +4225,15 @@ WhereScan scan; /* Iterator for WHERE terms */ Bitmask saved_prereq; /* Original value of pNew->prereq */ u16 saved_nLTerm; /* Original value of pNew->nLTerm */ int saved_nEq; /* Original value of pNew->u.btree.nEq */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ - WhereCost saved_nOut; /* Original value of pNew->nOut */ + LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ - WhereCost nRowEst; /* Estimated index selectivity */ - WhereCost rLogSize; /* Logarithm of table size */ + LogEst nRowEst; /* Estimated index selectivity */ + LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ pNew = pBuilder->pNew; if( db->mallocFailed ) return SQLITE_NOMEM; @@ -4350,11 +4249,11 @@ if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); assert( pNew->u.btree.nEq<=pProbe->nColumn ); if( pNew->u.btree.nEq < pProbe->nColumn ){ iCol = pProbe->aiColumn[pNew->u.btree.nEq]; - nRowEst = whereCost(pProbe->aiRowEst[pNew->u.btree.nEq+1]); + nRowEst = sqlite3LogEst(pProbe->aiRowEst[pNew->u.btree.nEq+1]); if( nRowEst==0 && pProbe->onError==OE_None ) nRowEst = 1; }else{ iCol = -1; nRowEst = 0; } @@ -4364,11 +4263,11 @@ saved_nLTerm = pNew->nLTerm; saved_wsFlags = pNew->wsFlags; saved_prereq = pNew->prereq; saved_nOut = pNew->nOut; pNew->rSetup = 0; - rLogSize = estLog(whereCost(pProbe->aiRowEst[0])); + rLogSize = estLog(sqlite3LogEst(pProbe->aiRowEst[0])); for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; #endif @@ -4391,14 +4290,14 @@ if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; pNew->wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ /* "x IN (SELECT ...)": TUNING: the SELECT returns 25 rows */ - nIn = 46; assert( 46==whereCost(25) ); + nIn = 46; assert( 46==sqlite3LogEst(25) ); }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ - nIn = whereCost(pExpr->x.pList->nExpr); + nIn = sqlite3LogEst(pExpr->x.pList->nExpr); } pNew->rRun += nIn; pNew->u.btree.nEq++; pNew->nOut = nRowEst + nInMul + nIn; }else if( pTerm->eOperator & (WO_EQ) ){ @@ -4416,11 +4315,11 @@ pNew->nOut = nRowEst + nInMul; }else if( pTerm->eOperator & (WO_ISNULL) ){ pNew->wsFlags |= WHERE_COLUMN_NULL; pNew->u.btree.nEq++; /* TUNING: IS NULL selects 2 rows */ - nIn = 10; assert( 10==whereCost(2) ); + nIn = 10; assert( 10==sqlite3LogEst(2) ); pNew->nOut = nRowEst + nInMul + nIn; }else if( pTerm->eOperator & (WO_GT|WO_GE) ){ testcase( pTerm->eOperator & WO_GT ); testcase( pTerm->eOperator & WO_GE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT; @@ -4436,11 +4335,11 @@ pNew->aLTerm[pNew->nLTerm-2] : 0; } if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ /* Adjust nOut and rRun for STAT3 range values */ assert( pNew->nOut==saved_nOut ); - whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &pNew->nOut); + whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 if( nInMul==0 && pProbe->nSample && pNew->u.btree.nEq<=pProbe->nSampleCol @@ -4456,22 +4355,22 @@ && !ExprHasProperty(pExpr, EP_xIsSelect) ){ rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); } assert( nOut==0 || rc==SQLITE_OK ); if( nOut ){ - nOut = whereCost(nOut); + nOut = sqlite3LogEst(nOut); pNew->nOut = MIN(nOut, saved_nOut); } } #endif if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ /* Each row involves a step of the index, then a binary search of ** the main table */ - pNew->rRun = whereCostAdd(pNew->rRun, rLogSize>27 ? rLogSize-17 : 10); + pNew->rRun = sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10); } /* Step cost for each output row */ - pNew->rRun = whereCostAdd(pNew->rRun, pNew->nOut); + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut); whereLoopOutputAdjust(pBuilder->pWC, pNew, pSrc->iCursor); rc = whereLoopInsert(pBuilder, pNew); if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 && pNew->u.btree.nEq<(pProbe->nColumn + (pProbe->zName!=0)) ){ @@ -4567,18 +4466,20 @@ struct SrcList_item *pSrc; /* The FROM clause btree term to add */ WhereLoop *pNew; /* Template WhereLoop object */ int rc = SQLITE_OK; /* Return code */ int iSortIdx = 1; /* Index number */ int b; /* A boolean value */ - WhereCost rSize; /* number of rows in the table */ - WhereCost rLogSize; /* Logarithm of the number of rows in the table */ + LogEst rSize; /* number of rows in the table */ + LogEst rLogSize; /* Logarithm of the number of rows in the table */ WhereClause *pWC; /* The parsed WHERE clause */ + Table *pTab; /* Table being queried */ pNew = pBuilder->pNew; pWInfo = pBuilder->pWInfo; pTabList = pWInfo->pTabList; pSrc = pTabList->a + pNew->iTab; + pTab = pSrc->pTab; pWC = pBuilder->pWC; assert( !IsVirtual(pSrc->pTab) ); if( pSrc->pIndex ){ /* An INDEXED BY clause specifies a particular index to use */ @@ -4592,22 +4493,22 @@ memset(&sPk, 0, sizeof(Index)); sPk.nColumn = 1; sPk.aiColumn = &aiColumnPk; sPk.aiRowEst = aiRowEstPk; sPk.onError = OE_Replace; - sPk.pTable = pSrc->pTab; - aiRowEstPk[0] = pSrc->pTab->nRowEst; + sPk.pTable = pTab; + aiRowEstPk[0] = pTab->nRowEst; aiRowEstPk[1] = 1; pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ /* The real indices of the table are only considered if the ** NOT INDEXED qualifier is omitted from the FROM clause */ sPk.pNext = pFirst; } pProbe = &sPk; } - rSize = whereCost(pSrc->pTab->nRowEst); + rSize = sqlite3LogEst(pTab->nRowEst); rLogSize = estLog(rSize); #ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* Automatic indexes */ if( !pBuilder->pOrSet @@ -4628,17 +4529,17 @@ pNew->nLTerm = 1; pNew->aLTerm[0] = pTerm; /* TUNING: One-time cost for computing the automatic index is ** approximately 7*N*log2(N) where N is the number of rows in ** the table being indexed. */ - pNew->rSetup = rLogSize + rSize + 28; assert( 28==whereCost(7) ); + pNew->rSetup = rLogSize + rSize + 28; assert( 28==sqlite3LogEst(7) ); /* TUNING: Each index lookup yields 20 rows in the table. This ** is more than the usual guess of 10 rows, since we have no way ** of knowning how selective the index will ultimately be. It would ** not be unreasonable to make this value much larger. */ - pNew->nOut = 43; assert( 43==whereCost(20) ); - pNew->rRun = whereCostAdd(rLogSize,pNew->nOut); + pNew->nOut = 43; assert( 43==sqlite3LogEst(20) ); + pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut); pNew->wsFlags = WHERE_AUTO_INDEX; pNew->prereq = mExtra | pTerm->prereqRight; rc = whereLoopInsert(pBuilder, pNew); } } @@ -4667,13 +4568,13 @@ pNew->wsFlags = WHERE_IPK; /* Full table scan */ pNew->iSortIdx = b ? iSortIdx : 0; /* TUNING: Cost of full table scan is 3*(N + log2(N)). - ** + The extra 4 factor is to encourage the use of indexed lookups - ** over full scans. */ - pNew->rRun = whereCostAdd(rSize,rLogSize) + 16; + ** + The extra 3 factor is to encourage the use of indexed lookups + ** over full scans. FIXME */ + pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16; whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; if( rc ) break; }else{ @@ -4682,24 +4583,25 @@ /* Full scan via index */ if( b || ( m==0 && pProbe->bUnordered==0 - && pProbe->iScanRatio<128 + && pProbe->szIdxRowszTabRow && (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 && sqlite3GlobalConfig.bUseCis && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan) ) ){ pNew->iSortIdx = b ? iSortIdx : 0; if( m==0 ){ /* TUNING: Cost of a covering index scan is K*(N + log2(N)). - ** + The extra factor K of between 1.0 and 3.0 is added to - ** encourage the use of indexed lookups. The value of K - ** depends on the iScanRatio value for the index. + ** + The extra factor K of between 1.1 and 3.0 that depends + ** on the relative sizes of the table and the index. K + ** is smaller for smaller indices, thus favoring them. */ - pNew->rRun = whereCostAdd(rSize,rLogSize) + pProbe->iScanRatio/9 + 1; + pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 1 + + (15*pProbe->szIdxRow)/pTab->szTabRow; }else{ assert( b!=0 ); /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N) ** which we will simplify to just N*log2(N) */ pNew->rRun = rSize + rLogSize; @@ -4873,13 +4775,13 @@ pIdxInfo->needToFreeIdxStr = 0; pNew->u.vtab.idxStr = pIdxInfo->idxStr; pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0) && pIdxInfo->orderByConsumed); pNew->rSetup = 0; - pNew->rRun = whereCostFromDouble(pIdxInfo->estimatedCost); + pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost); /* TUNING: Every virtual table query returns 25 rows */ - pNew->nOut = 46; assert( 46==whereCost(25) ); + pNew->nOut = 46; assert( 46==sqlite3LogEst(25) ); whereLoopInsert(pBuilder, pNew); if( pNew->u.vtab.needFree ){ sqlite3_free(pNew->u.vtab.idxStr); pNew->u.vtab.needFree = 0; } @@ -4912,10 +4814,12 @@ pWC = pBuilder->pWC; if( pWInfo->wctrlFlags & WHERE_AND_ONLY ) return SQLITE_OK; pWCEnd = pWC->a + pWC->nTerm; pNew = pBuilder->pNew; memset(&sSum, 0, sizeof(sSum)); + pItem = pWInfo->pTabList->a + pNew->iTab; + iCur = pItem->iCursor; for(pTerm=pWC->a; pTermeOperator & WO_OR)!=0 && (pTerm->u.pOrInfo->indexable & pNew->maskSelf)!=0 ){ @@ -4923,12 +4827,10 @@ WhereTerm * const pOrWCEnd = &pOrWC->a[pOrWC->nTerm]; WhereTerm *pOrTerm; int once = 1; int i, j; - pItem = pWInfo->pTabList->a + pNew->iTab; - iCur = pItem->iCursor; sSubBuild = *pBuilder; sSubBuild.pOrderBy = 0; sSubBuild.pOrSet = &sCur; for(pOrTerm=pOrWC->a; pOrTermnLTerm = 1; @@ -5304,23 +5206,23 @@ ** costs if nRowEst==0. ** ** Return SQLITE_OK on success or SQLITE_NOMEM of a memory allocation ** error occurs. */ -static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ +static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ int mxChoice; /* Maximum number of simultaneous paths tracked */ int nLoop; /* Number of terms in the join */ Parse *pParse; /* Parsing context */ sqlite3 *db; /* The database connection */ int iLoop; /* Loop counter over the terms of the join */ int ii, jj; /* Loop counters */ int mxI = 0; /* Index of next entry to replace */ - WhereCost rCost; /* Cost of a path */ - WhereCost nOut; /* Number of outputs */ - WhereCost mxCost = 0; /* Maximum cost of a set of paths */ - WhereCost mxOut = 0; /* Maximum nOut value on the set of paths */ - WhereCost rSortCost; /* Cost to do a sort */ + LogEst rCost; /* Cost of a path */ + LogEst nOut; /* Number of outputs */ + LogEst mxCost = 0; /* Maximum cost of a set of paths */ + LogEst mxOut = 0; /* Maximum nOut value on the set of paths */ + LogEst rSortCost; /* Cost to do a sort */ int nTo, nFrom; /* Number of valid entries in aTo[] and aFrom[] */ WherePath *aFrom; /* All nFrom paths at the previous level */ WherePath *aTo; /* The nTo best paths at the current level */ WherePath *pFrom; /* An element of aFrom[] that we are working on */ WherePath *pTo; /* An element of aTo[] that we are working on */ @@ -5353,21 +5255,23 @@ /* Seed the search with a single WherePath containing zero WhereLoops. ** ** TUNING: Do not let the number of iterations go above 25. If the cost ** of computing an automatic index is not paid back within the first 25 ** rows, then do not use the automatic index. */ - aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==whereCost(25) ); + aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==sqlite3LogEst(25) ); nFrom = 1; /* Precompute the cost of sorting the final result set, if the caller ** to sqlite3WhereBegin() was concerned about sorting */ rSortCost = 0; if( pWInfo->pOrderBy==0 || nRowEst==0 ){ aFrom[0].isOrderedValid = 1; }else{ - /* TUNING: Estimated cost of sorting is N*log2(N) where N is the - ** number of output rows. */ + /* TUNING: Estimated cost of sorting is 48*N*log2(N) where N is the + ** number of output rows. The 48 is the expected size of a row to sort. + ** FIXME: compute a better estimate of the 48 multiplier based on the + ** result set expressions. */ rSortCost = nRowEst + estLog(nRowEst); WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost)); } /* Compute successively longer WherePaths using the previous generation @@ -5383,12 +5287,12 @@ u8 isOrdered = pFrom->isOrdered; if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue; if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue; /* At this point, pWLoop is a candidate to be the next loop. ** Compute its cost */ - rCost = whereCostAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); - rCost = whereCostAdd(rCost, pFrom->rCost); + rCost = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); + rCost = sqlite3LogEstAdd(rCost, pFrom->rCost); nOut = pFrom->nRow + pWLoop->nOut; maskNew = pFrom->maskLoop | pWLoop->maskSelf; if( !isOrderedValid ){ switch( wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, @@ -5398,11 +5302,11 @@ isOrderedValid = 1; break; case 0: /* No. pFrom+pWLoop will require a separate sort */ isOrdered = 0; isOrderedValid = 1; - rCost = whereCostAdd(rCost, rSortCost); + rCost = sqlite3LogEstAdd(rCost, rSortCost); break; default: /* Cannot tell yet. Try again on the next iteration */ break; } }else{ @@ -5605,11 +5509,11 @@ pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW; pLoop->aLTerm[0] = pTerm; pLoop->nLTerm = 1; pLoop->u.btree.nEq = 1; /* TUNING: Cost of a rowid lookup is 10 */ - pLoop->rRun = 33; /* 33==whereCost(10) */ + pLoop->rRun = 33; /* 33==sqlite3LogEst(10) */ }else{ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ assert( pLoop->aLTermSpace==pLoop->aLTerm ); assert( ArraySize(pLoop->aLTermSpace)==4 ); if( pIdx->onError==OE_None @@ -5628,16 +5532,16 @@ } pLoop->nLTerm = j; pLoop->u.btree.nEq = j; pLoop->u.btree.pIndex = pIdx; /* TUNING: Cost of a unique index lookup is 15 */ - pLoop->rRun = 39; /* 39==whereCost(15) */ + pLoop->rRun = 39; /* 39==sqlite3LogEst(15) */ break; } } if( pLoop->wsFlags ){ - pLoop->nOut = (WhereCost)1; + pLoop->nOut = (LogEst)1; pWInfo->a[0].pWLoop = pLoop; pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur); pWInfo->a[0].iTabCur = iCur; pWInfo->nRowOut = 1; if( pWInfo->pOrderBy ) pWInfo->bOBSat = 1; Index: test/e_select.test ================================================================== --- test/e_select.test +++ test/e_select.test @@ -448,21 +448,21 @@ # EVIDENCE-OF: R-44414-54710 There is a row in the cartesian product # dataset formed by combining each unique combination of a row from the # left-hand and right-hand datasets. # do_join_test e_select-1.4.2.1 { - SELECT * FROM x2 %JOIN% x3 + SELECT * FROM x2 %JOIN% x3 ORDER BY +c, +f } [list -60.06 {} {} -39.24 {} encompass -1 \ - -60.06 {} {} presenting 51 reformation dignified \ + -60.06 {} {} alerting {} -93.79 {} \ + -60.06 {} {} coldest -96 dramatists 82.3 \ -60.06 {} {} conducting -87.24 37.56 {} \ - -60.06 {} {} coldest -96 dramatists 82.3 \ - -60.06 {} {} alerting {} -93.79 {} \ + -60.06 {} {} presenting 51 reformation dignified \ -58 {} 1.21 -39.24 {} encompass -1 \ - -58 {} 1.21 presenting 51 reformation dignified \ - -58 {} 1.21 conducting -87.24 37.56 {} \ + -58 {} 1.21 alerting {} -93.79 {} \ -58 {} 1.21 coldest -96 dramatists 82.3 \ - -58 {} 1.21 alerting {} -93.79 {} \ + -58 {} 1.21 conducting -87.24 37.56 {} \ + -58 {} 1.21 presenting 51 reformation dignified \ ] # TODO: Come back and add a few more like the above. # EVIDENCE-OF: R-20659-43267 In other words, if the left-hand dataset # consists of Nlhs rows of Mlhs columns, and the right-hand dataset of Index: test/pragma.test ================================================================== --- test/pragma.test +++ test/pragma.test @@ -572,11 +572,11 @@ } {} do_test pragma-6.4 { execsql { pragma index_list(t3); } - } {/0 sqlite_autoindex_t3_1 1 \d+/} + } {/0 {} 1 \d+ 1 sqlite_autoindex_t3_1 1 \d+/} } ifcapable {!foreignkey} { execsql {CREATE TABLE t3(a,b UNIQUE)} } do_test pragma-6.5.1 { @@ -645,11 +645,11 @@ db close sqlite3 db test.db execsql { pragma index_list(t3); } -} {/0 t3i1 0 \d+ 1 sqlite_autoindex_t3_1 1 \d+/} +} {/0 {} 1 \d+ 1 t3i1 0 \d+ 2 sqlite_autoindex_t3_1 1 \d+/} do_test pragma-7.1.2 { execsql { pragma index_list(t3_bogus); } } {} @@ -1659,11 +1659,11 @@ do_test 23.3 { db eval { CREATE INDEX i3 ON t1(d,b,c); } db2 eval {PRAGMA index_list(t1)} -} {/0 i3 0 \d+ 1 i2 0 \d+ 2 i1 0 \d+/} +} {/0 {} 1 \d+ 1 i3 0 \d+ 2 i2 0 \d+ 3 i1 0 \d+/} do_test 23.4 { db eval { ALTER TABLE t1 ADD COLUMN e; } db2 eval { Index: test/select1.test ================================================================== --- test/select1.test +++ test/select1.test @@ -540,11 +540,11 @@ } {a.f1 11 a.f2 22 b.f1 11 b.f2 22} do_test select1-6.9.7 { set x [execsql2 { SELECT * FROM test1 a, (select 5, 6) LIMIT 1 }] - regsub -all {subquery_[0-9a-fA-F]+_} $x {subquery} x + regsub -all {sq_[0-9a-fA-F_]+} $x {subquery} x set x } {a.f1 11 a.f2 22 sqlite_subquery.5 5 sqlite_subquery.6 6} do_test select1-6.9.8 { set x [execsql2 { SELECT * FROM test1 a, (select 5 AS x, 6 AS y) AS b LIMIT 1 ADDED tool/logest.c Index: tool/logest.c ================================================================== --- /dev/null +++ tool/logest.c @@ -0,0 +1,141 @@ +/* +** 2013-06-10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains a simple command-line utility for converting from +** integers and LogEst values and back again and for doing simple +** arithmetic operations (multiple and add) on LogEst values. +** +** Usage: +** +** ./LogEst ARGS +** +** Arguments: +** +** 'x' Multiple the top two elements of the stack +** '+' Add the top two elements of the stack +** NUM Convert NUM from integer to LogEst and push onto the stack +** ^NUM Interpret NUM as a LogEst and push onto stack. +** +** Examples: +** +** To convert 123 from LogEst to integer: +** +** ./LogEst ^123 +** +** To convert 123456 from integer to LogEst: +** +** ./LogEst 123456 +** +*/ +#include +#include +#include +#include +#include +#include "sqlite3.h" + +typedef short int LogEst; /* 10 times log2() */ + +LogEst logEstMultiply(LogEst a, LogEst b){ return a+b; } +LogEst logEstAdd(LogEst a, LogEst b){ + static const unsigned char x[] = { + 10, 10, /* 0,1 */ + 9, 9, /* 2,3 */ + 8, 8, /* 4,5 */ + 7, 7, 7, /* 6,7,8 */ + 6, 6, 6, /* 9,10,11 */ + 5, 5, 5, /* 12-14 */ + 4, 4, 4, 4, /* 15-18 */ + 3, 3, 3, 3, 3, 3, /* 19-24 */ + 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ + }; + if( ab+49 ) return a; + if( a>b+31 ) return a+1; + return a+x[a-b]; +} +LogEst logEstFromInteger(sqlite3_uint64 x){ + static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; + LogEst y = 40; + if( x<8 ){ + if( x<2 ) return 0; + while( x<8 ){ y -= 10; x <<= 1; } + }else{ + while( x>255 ){ y += 40; x >>= 4; } + while( x>15 ){ y += 10; x >>= 1; } + } + return a[x&7] + y - 10; +} +static sqlite3_uint64 logEstToInt(LogEst x){ + sqlite3_uint64 n; + if( x<10 ) return 1; + n = x%10; + x /= 10; + if( n>=5 ) n -= 2; + else if( n>=1 ) n -= 1; + if( x>=3 ) return (n+8)<<(x-3); + return (n+8)>>(3-x); +} +static LogEst logEstFromDouble(double x){ + sqlite3_uint64 a; + LogEst e; + assert( sizeof(x)==8 && sizeof(a)==8 ); + if( x<=0.0 ) return -32768; + if( x<1.0 ) return -logEstFromDouble(1/x); + if( x<1024.0 ) return logEstFromInteger((sqlite3_uint64)(1024.0*x)) - 100; + if( x<=2000000000.0 ) return logEstFromInteger((sqlite3_uint64)x); + memcpy(&a, &x, 8); + e = (a>>52) - 1022; + return e*10; +} + +int isFloat(const char *z){ + while( z[0] ){ + if( z[0]=='.' || z[0]=='E' || z[0]=='e' ) return 1; + z++; + } + return 0; +} + +int main(int argc, char **argv){ + int i; + int n = 0; + LogEst a[100]; + for(i=1; i=2 ){ + a[n-2] = logEstAdd(a[n-2],a[n-1]); + n--; + } + }else if( z[0]=='x' ){ + if( n>=2 ){ + a[n-2] = logEstMultiply(a[n-2],a[n-1]); + n--; + } + }else if( z[0]=='^' ){ + a[n++] = atoi(z+1); + }else if( isFloat(z) ){ + a[n++] = logEstFromDouble(atof(z)); + }else{ + a[n++] = logEstFromInteger(atoi(z)); + } + } + for(i=n-1; i>=0; i--){ + if( a[i]<0 ){ + printf("%d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i])); + }else{ + sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024; + printf("%d (%lld.%02lld)\n", a[i], x/100, x%100); + } + } + return 0; +} Index: tool/mkpragmatab.tcl ================================================================== --- tool/mkpragmatab.tcl +++ tool/mkpragmatab.tcl @@ -252,10 +252,14 @@ IF: defined(SQLITE_HAS_CODEC) NAME: hexkey IF: defined(SQLITE_HAS_CODEC) + NAME: hexrekey + TYPE: HEXKEY + IF: defined(SQLITE_HAS_CODEC) + NAME: activate_extensions IF: defined(SQLITE_HAS_CODEC) || defined(SQLITE_ENABLE_CEROD) NAME: soft_heap_limit } DELETED tool/wherecosttest.c Index: tool/wherecosttest.c ================================================================== --- tool/wherecosttest.c +++ /dev/null @@ -1,111 +0,0 @@ -/* -** 2013-06-10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file contains a simple command-line utility for converting from -** integers and WhereCost values and back again and for doing simple -** arithmetic operations (multiple and add) on WhereCost values. -** -** Usage: -** -** ./wherecosttest ARGS -** -** Arguments: -** -** 'x' Multiple the top two elements of the stack -** '+' Add the top two elements of the stack -** NUM Convert NUM from integer to WhereCost and push onto the stack -** ^NUM Interpret NUM as a WhereCost and push onto stack. -** -** Examples: -** -** To convert 123 from WhereCost to integer: -** -** ./wherecosttest ^123 -** -** To convert 123456 from integer to WhereCost: -** -** ./wherecosttest 123456 -** -*/ -#include -#include -#include - -typedef unsigned short int WhereCost; /* 10 times log2() */ - -WhereCost whereCostMultiply(WhereCost a, WhereCost b){ return a+b; } -WhereCost whereCostAdd(WhereCost a, WhereCost b){ - static const unsigned char x[] = { - 10, 10, /* 0,1 */ - 9, 9, /* 2,3 */ - 8, 8, /* 4,5 */ - 7, 7, 7, /* 6,7,8 */ - 6, 6, 6, /* 9,10,11 */ - 5, 5, 5, /* 12-14 */ - 4, 4, 4, 4, /* 15-18 */ - 3, 3, 3, 3, 3, 3, /* 19-24 */ - 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ - }; - if( ab+49 ) return a; - if( a>b+31 ) return a+1; - return a+x[a-b]; -} -WhereCost whereCostFromInteger(int x){ - static WhereCost a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; - WhereCost y = 40; - if( x<8 ){ - if( x<2 ) return 0; - while( x<8 ){ y -= 10; x <<= 1; } - }else{ - while( x>255 ){ y += 40; x >>= 4; } - while( x>15 ){ y += 10; x >>= 1; } - } - return a[x&7] + y - 10; -} -static unsigned long int whereCostToInt(WhereCost x){ - unsigned long int n; - if( x<10 ) return 1; - n = x%10; - x /= 10; - if( n>=5 ) n -= 2; - else if( n>=1 ) n -= 1; - if( x>=3 ) return (n+8)<<(x-3); - return (n+8)>>(3-x); -} - -int main(int argc, char **argv){ - int i; - int n = 0; - WhereCost a[100]; - for(i=1; i=2 ){ - a[n-2] = whereCostAdd(a[n-2],a[n-1]); - n--; - } - }else if( z[0]=='x' ){ - if( n>=2 ){ - a[n-2] = whereCostMultiply(a[n-2],a[n-1]); - n--; - } - }else if( z[0]=='^' ){ - a[n++] = atoi(z+1); - }else{ - a[n++] = whereCostFromInteger(atoi(z)); - } - } - for(i=n-1; i>=0; i--){ - printf("%d (%lu)\n", a[i], whereCostToInt(a[i])); - } - return 0; -}