Changes to src/vdbemem.c.
Changes to src/where.c.
︙ | | |
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
+
|
#define TERM_DYNAMIC 0x01 /* Need to call sqlite3ExprDelete(db, pExpr) */
#define TERM_VIRTUAL 0x02 /* Added by the optimizer. Do not code */
#define TERM_CODED 0x04 /* This term is already coded */
#define TERM_COPIED 0x08 /* Has a child */
#define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */
#define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */
#define TERM_OR_OK 0x40 /* Used during OR-clause processing */
#define TERM_VNULL 0x80 /* Manufactured x>NULL or x<=NULL term */
/*
** An instance of the following structure holds all information about a
** WHERE clause. Mostly this is a container for one or more WhereTerms.
*/
struct WhereClause {
Parse *pParse; /* The parser context */
|
︙ | | |
206
207
208
209
210
211
212
213
214
215
216
217
218
219
|
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
+
|
#define WO_LE (WO_EQ<<(TK_LE-TK_EQ))
#define WO_GT (WO_EQ<<(TK_GT-TK_EQ))
#define WO_GE (WO_EQ<<(TK_GE-TK_EQ))
#define WO_MATCH 0x040
#define WO_ISNULL 0x080
#define WO_OR 0x100 /* Two or more OR-connected terms */
#define WO_AND 0x200 /* Two or more AND-connected terms */
#define WO_NOOP 0x800 /* This term does not restrict search space */
#define WO_ALL 0xfff /* Mask of all possible WO_* values */
#define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */
/*
** Value for wsFlags returned by bestIndex() and stored in
** WhereLevel.wsFlags. These flags determine which search
|
︙ | | |
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
|
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
|
-
+
|
exprAnalyze(pSrc, pWC, idxNew);
pTerm = &pWC->a[idxTerm];
pWC->a[idxNew].iParent = idxTerm;
pTerm->nChild = 1;
}else{
sqlite3ExprListDelete(db, pList);
}
pTerm->eOperator = 0; /* case 1 trumps case 2 */
pTerm->eOperator = WO_NOOP; /* case 1 trumps case 2 */
}
}
}
#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */
/*
|
︙ | | |
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
|
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
pTerm->nChild = 1;
pTerm->wtFlags |= TERM_COPIED;
pNewTerm->prereqAll = pTerm->prereqAll;
}
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */
#ifdef SQLITE_ENABLE_STAT2
/* When sqlite_stat2 histogram data is available an operator of the
** form "x IS NOT NULL" can sometimes be evaluated more efficiently
** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a
** virtual term of that form.
**
** Note that the virtual term must be tagged with TERM_VNULL. This
** TERM_VNULL tag will suppress the not-null check at the beginning
** of the loop. Without the TERM_VNULL flag, the not-null check at
** the start of the loop will prevent any results from being returned.
*/
if( pExpr->op==TK_NOTNULL && pExpr->pLeft->iColumn>=0 ){
Expr *pNewExpr;
Expr *pLeft = pExpr->pLeft;
int idxNew;
WhereTerm *pNewTerm;
pNewExpr = sqlite3PExpr(pParse, TK_GT,
sqlite3ExprDup(db, pLeft, 0),
sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0);
idxNew = whereClauseInsert(pWC, pNewExpr,
TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL);
testcase( idxNew==0 );
pNewTerm = &pWC->a[idxNew];
pNewTerm->leftCursor = pLeft->iTable;
pNewTerm->u.leftColumn = pLeft->iColumn;
pNewTerm->eOperator = WO_GT;
pNewTerm->iParent = idxTerm;
pTerm = &pWC->a[idxTerm];
pTerm->nChild = 1;
pTerm->wtFlags |= TERM_COPIED;
pNewTerm->prereqAll = pTerm->prereqAll;
}
#endif /* SQLITE_ENABLE_STAT2 */
/* Prevent ON clause terms of a LEFT JOIN from being used to drive
** an index for tables to the left of the join.
*/
pTerm->prereqRight |= extraRight;
}
/*
|
︙ | | |
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
|
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
|
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
|
bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */
/*
** Argument pIdx is a pointer to an index structure that has an array of
** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
** stored in Index.aSample. The domain of values stored in said column
** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions.
** Region 0 contains all values smaller than the first sample value. Region
** 1 contains values larger than or equal to the value of the first sample,
** but smaller than the value of the second. And so on.
** stored in Index.aSample. These samples divide the domain of values stored
** the index into (SQLITE_INDEX_SAMPLES+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples. Region 2 contains
** values between samples 2 and 3. And so on. Region SQLITE_INDEX_SAMPLES
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.
**
** If successful, this function determines which of the regions value
** pVal lies in, sets *piRegion to the region index (a value between 0
** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
** Or, if an OOM occurs while converting text values between encodings,
** SQLITE_NOMEM is returned and *piRegion is undefined.
*/
#ifdef SQLITE_ENABLE_STAT2
static int whereRangeRegion(
Parse *pParse, /* Database connection */
Index *pIdx, /* Index to consider domain of */
sqlite3_value *pVal, /* Value to consider */
int roundUp, /* Return largest valid region if true */
int *piRegion /* OUT: Region of domain in which value lies */
){
assert( roundUp==0 || roundUp==1 );
if( ALWAYS(pVal) ){
IndexSample *aSample = pIdx->aSample;
int i = 0;
int eType = sqlite3_value_type(pVal);
if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
double r = sqlite3_value_double(pVal);
for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
if( aSample[i].eType==SQLITE_NULL ) continue;
if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;
if( aSample[i].eType>=SQLITE_TEXT ) break;
if( roundUp ){
if( aSample[i].u.r>r ) break;
}else{
if( aSample[i].u.r>=r ) break;
}
}
}else if( eType==SQLITE_NULL ){
i = 0;
if( roundUp ){
while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++;
}
}else{
sqlite3 *db = pParse->db;
CollSeq *pColl;
const u8 *z;
int n;
|
︙ | | |
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
|
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
|
-
+
-
+
-
+
-
+
|
return SQLITE_NOMEM;
}
assert( z && pColl && pColl->xCmp );
}
n = sqlite3ValueBytes(pVal, pColl->enc);
for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
int r;
int c;
int eSampletype = aSample[i].eType;
if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
if( (eSampletype!=eType) ) break;
#ifndef SQLITE_OMIT_UTF16
if( pColl->enc!=SQLITE_UTF8 ){
int nSample;
char *zSample = sqlite3Utf8to16(
db, pColl->enc, aSample[i].u.z, aSample[i].nByte, &nSample
);
if( !zSample ){
assert( db->mallocFailed );
return SQLITE_NOMEM;
}
r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
sqlite3DbFree(db, zSample);
}else
#endif
{
r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
}
if( r>0 ) break;
if( c-roundUp>=0 ) break;
}
}
assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
*piRegion = i;
}
return SQLITE_OK;
|
︙ | | |
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
|
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
|
-
-
-
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
+
-
-
-
+
+
+
+
-
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
|
** value of 1 indicates that the proposed range scan is expected to visit
** approximately 1/100th (1%) of the rows selected by the nEq equality
** constraints (if any). A return value of 100 indicates that it is expected
** that the range scan will visit every row (100%) selected by the equality
** constraints.
**
** In the absence of sqlite_stat2 ANALYZE data, each range inequality
** reduces the search space by 2/3rds. Hence a single constraint (x>?)
** results in a return of 33 and a range constraint (x>? AND x<?) results
** in a return of 11.
** reduces the search space by 3/4ths. Hence a single constraint (x>?)
** results in a return of 25 and a range constraint (x>? AND x<?) results
** in a return of 6.
*/
static int whereRangeScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index containing the range-compared column; "x" */
int nEq, /* index into p->aCol[] of the range-compared column */
WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */
WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */
int *piEst /* OUT: Return value */
){
int rc = SQLITE_OK;
#ifdef SQLITE_ENABLE_STAT2
if( nEq==0 && p->aSample ){
sqlite3_value *pLowerVal = 0;
sqlite3_value *pUpperVal = 0;
int iEst;
int iLower = 0;
int iUpper = SQLITE_INDEX_SAMPLES;
int roundUpUpper;
int roundUpLower;
u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
if( pLower ){
Expr *pExpr = pLower->pExpr->pRight;
rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
}
if( rc==SQLITE_OK && pUpper ){
Expr *pExpr = pUpper->pExpr->pRight;
rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);
assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0;
}
if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
sqlite3ValueFree(pLowerVal);
sqlite3ValueFree(pUpperVal);
goto range_est_fallback;
}else if( pLowerVal==0 ){
rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
if( pLower ) iLower = iUpper/2;
}else if( pUpperVal==0 ){
rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
}else{
rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
if( rc==SQLITE_OK ){
rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
}
}
WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
iEst = iUpper - iLower;
testcase( iEst==SQLITE_INDEX_SAMPLES );
assert( iEst<=SQLITE_INDEX_SAMPLES );
if( iEst<1 ){
iEst = 1;
}
*piEst = 50/SQLITE_INDEX_SAMPLES;
}else{
*piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
}
sqlite3ValueFree(pLowerVal);
sqlite3ValueFree(pUpperVal);
*piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES;
return rc;
}
range_est_fallback:
#else
UNUSED_PARAMETER(pParse);
UNUSED_PARAMETER(p);
UNUSED_PARAMETER(nEq);
#endif
assert( pLower || pUpper );
*piEst = 100;
if( pLower && pUpper ){
*piEst = 11;
if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4;
if( pUpper ) *piEst /= 4;
return rc;
}
#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an equality constraint x=VALUE and where that VALUE occurs in
** the histogram data. This only works when x is the left-most
** column of an index and sqlite_stat2 histogram data is available
** for that index.
**
** Write the estimated row count into *pnRow and return SQLITE_OK.
** If unable to make an estimate, leave *pnRow unchanged and return
** non-zero.
**
** This routine can fail if it is unable to load a collating sequence
** required for string comparison, or if unable to allocate memory
** for a UTF conversion required for comparison. The error is stored
** in the pParse structure.
*/
int whereEqualScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index whose left-most column is pTerm */
Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */
double *pnRow /* Write the revised row estimate here */
){
sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */
int iLower, iUpper; /* Range of histogram regions containing pRhs */
u8 aff; /* Column affinity */
int rc; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
assert( p->aSample!=0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
if( rc ) goto whereEqualScanEst_cancel;
if( pRhs==0 ) return SQLITE_NOTFOUND;
rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
if( rc ) goto whereEqualScanEst_cancel;
rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
if( rc ) goto whereEqualScanEst_cancel;
WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
if( iLower>=iUpper ){
nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
if( nRowEst<*pnRow ) *pnRow = nRowEst;
}else{
nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
*pnRow = nRowEst;
}
*piEst = 33;
}
whereEqualScanEst_cancel:
sqlite3ValueFree(pRhs);
return rc;
}
#endif /* defined(SQLITE_ENABLE_STAT2) */
#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an IN constraint where the right-hand side of the IN operator
** is a list of values. Example:
**
** WHERE x IN (1,2,3,4)
**
** Write the estimated row count into *pnRow and return SQLITE_OK.
** If unable to make an estimate, leave *pnRow unchanged and return
** non-zero.
**
** This routine can fail if it is unable to load a collating sequence
** required for string comparison, or if unable to allocate memory
** for a UTF conversion required for comparison. The error is stored
** in the pParse structure.
*/
int whereInScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index whose left-most column is pTerm */
ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
double *pnRow /* Write the revised row estimate here */
){
sqlite3_value *pVal = 0; /* One value from list */
int iLower, iUpper; /* Range of histogram regions containing pRhs */
u8 aff; /* Column affinity */
int rc = SQLITE_OK; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
int nSpan = 0; /* Number of histogram regions spanned */
int nSingle = 0; /* Histogram regions hit by a single value */
int nNotFound = 0; /* Count of values that are not constants */
int i; /* Loop counter */
u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */
u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */
assert( p->aSample!=0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
memset(aSpan, 0, sizeof(aSpan));
memset(aSingle, 0, sizeof(aSingle));
for(i=0; i<pList->nExpr; i++){
sqlite3ValueFree(pVal);
rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
if( rc ) break;
if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
nNotFound++;
continue;
}
rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
if( rc ) break;
rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
if( rc ) break;
if( iLower>=iUpper ){
aSingle[iLower] = 1;
}else{
assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
while( iLower<iUpper ) aSpan[iLower++] = 1;
}
}
if( rc==SQLITE_OK ){
for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
if( aSpan[i] ){
nSpan++;
}else if( aSingle[i] ){
nSingle++;
}
}
nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
+ nNotFound*p->aiRowEst[1];
if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
*pnRow = nRowEst;
WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
nSpan, nSingle, nNotFound, nRowEst));
}
sqlite3ValueFree(pVal);
return rc;
}
#endif /* defined(SQLITE_ENABLE_STAT2) */
/*
** Find the query plan for accessing a particular table. Write the
** Find the best query plan for accessing a particular table. Write the
** best query plan and its cost into the WhereCost object supplied as the
** last parameter.
**
** The lowest cost plan wins. The cost is an estimate of the amount of
** CPU and disk I/O need to process the request using the selected plan.
** CPU and disk I/O needed to process the requested result.
** Factors that influence cost include:
**
** * The estimated number of rows that will be retrieved. (The
** fewer the better.)
**
** * Whether or not sorting must occur.
**
** * Whether or not there must be separate lookups in the
** index and in the main table.
**
** If there was an INDEXED BY clause (pSrc->pIndex) attached to the table in
** the SQL statement, then this function only considers plans using the
** named index. If no such plan is found, then the returned cost is
** SQLITE_BIG_DBL. If a plan is found that uses the named index,
** then the cost is calculated in the usual way.
**
** If a NOT INDEXED clause (pSrc->notIndexed!=0) was attached to the table
** in the SELECT statement, then no indexes are considered. However, the
** selected plan may still take advantage of the tables built-in rowid
** selected plan may still take advantage of the built-in rowid primary key
** index.
*/
static void bestBtreeIndex(
Parse *pParse, /* The parsing context */
WhereClause *pWC, /* The WHERE clause */
struct SrcList_item *pSrc, /* The FROM clause term to search */
Bitmask notReady, /* Mask of cursors not available for indexing */
|
︙ | | |
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
|
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
|
-
-
-
+
+
+
+
+
+
+
+
-
+
+
+
|
if( pSrc->pIndex ){
/* An INDEXED BY clause specifies a particular index to use */
pIdx = pProbe = pSrc->pIndex;
wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
eqTermMask = idxEqTermMask;
}else{
/* There is no INDEXED BY clause. Create a fake Index object to
** represent the primary key */
Index *pFirst; /* Any other index on the table */
/* There is no INDEXED BY clause. Create a fake Index object in local
** variable sPk to represent the rowid primary key index. Make this
** fake index the first in a chain of Index objects with all of the real
** indices to follow */
Index *pFirst; /* First of real indices on the table */
memset(&sPk, 0, sizeof(Index));
sPk.nColumn = 1;
sPk.aiColumn = &aiColumnPk;
sPk.aiRowEst = aiRowEstPk;
sPk.onError = OE_Replace;
sPk.pTable = pSrc->pTab;
aiRowEstPk[0] = pSrc->pTab->nRowEst;
aiRowEstPk[1] = 1;
pFirst = pSrc->pTab->pIndex;
if( pSrc->notIndexed==0 ){
/* The real indices of the table are only considered if the
** NOT INDEXED qualifier is omitted from the FROM clause */
sPk.pNext = pFirst;
}
pProbe = &sPk;
wsFlagMask = ~(
WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
);
eqTermMask = WO_EQ|WO_IN;
pIdx = 0;
}
/* Loop over all indices looking for the best one to use
*/
for(; pProbe; pIdx=pProbe=pProbe->pNext){
const unsigned int * const aiRowEst = pProbe->aiRowEst;
double cost; /* Cost of using pProbe */
double nRow; /* Estimated number of rows in result set */
int rev; /* True to scan in reverse order */
double nSearch; /* Estimated number of binary searches */
int wsFlags = 0;
Bitmask used = 0;
/* The following variables are populated based on the properties of
** scan being evaluated. They are then used to determine the expected
** index being evaluated. They are then used to determine the expected
** cost and number of rows returned.
**
** nEq:
** Number of equality terms that can be implemented using the index.
** In other words, the number of initial fields in the index that
** are used in == or IN or NOT NULL constraints of the WHERE clause.
**
** nInMul:
** The "in-multiplier". This is an estimate of how many seek operations
** SQLite must perform on the index in question. For example, if the
** WHERE clause is:
**
** WHERE a IN (1, 2, 3) AND b IN (4, 5, 6)
|
︙ | | |
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
|
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
|
-
+
+
+
-
-
+
+
-
-
-
-
-
+
+
+
+
+
+
-
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
|
**
** If there exists a WHERE term of the form "x IN (SELECT ...)", then
** the sub-select is assumed to return 25 rows for the purposes of
** determining nInMul.
**
** bInEst:
** Set to true if there was at least one "x IN (SELECT ...)" term used
** in determining the value of nInMul.
** in determining the value of nInMul. Note that the RHS of the
** IN operator must be a SELECT, not a value list, for this variable
** to be true.
**
** estBound:
** An estimate on the amount of the table that must be searched. A
** value of 100 means the entire table is searched. Range constraints
** might reduce this to a value less than 100 to indicate that only
** a fraction of the table needs searching. In the absence of
** sqlite_stat2 ANALYZE data, a single inequality reduces the search
** space to 1/3rd its original size. So an x>? constraint reduces
** estBound to 33. Two constraints (x>? AND x<?) reduce estBound to 11.
** space to 1/4rd its original size. So an x>? constraint reduces
** estBound to 25. Two constraints (x>? AND x<?) reduce estBound to 6.
**
** bSort:
** Boolean. True if there is an ORDER BY clause that will require an
** external sort (i.e. scanning the index being evaluated will not
** correctly order records).
**
** bLookup:
** Boolean. True if for each index entry visited a lookup on the
** corresponding table b-tree is required. This is always false
** for the rowid index. For other indexes, it is true unless all the
** columns of the table used by the SELECT statement are present in
** the index (such an index is sometimes described as a covering index).
** Boolean. True if a table lookup is required for each index entry
** visited. In other words, true if this is not a covering index.
** This is always false for the rowid primary key index of a table.
** For other indexes, it is true unless all the columns of the table
** used by the SELECT statement are present in the index (such an
** index is sometimes described as a covering index).
** For example, given the index on (a, b), the second of the following
** two queries requires table b-tree lookups, but the first does not.
** two queries requires table b-tree lookups in order to find the value
** of column c, but the first does not because columns a and b are
** both available in the index.
**
** SELECT a, b FROM tbl WHERE a = 1;
** SELECT a, b, c FROM tbl WHERE a = 1;
*/
int nEq;
int bInEst = 0;
int nInMul = 1;
int estBound = 100;
int nBound = 0; /* Number of range constraints seen */
int bSort = 0;
int bLookup = 0;
WhereTerm *pTerm; /* A single term of the WHERE clause */
int nEq; /* Number of == or IN terms matching index */
int bInEst = 0; /* True if "x IN (SELECT...)" seen */
int nInMul = 1; /* Number of distinct equalities to lookup */
int estBound = 100; /* Estimated reduction in search space */
int nBound = 0; /* Number of range constraints seen */
int bSort = 0; /* True if external sort required */
int bLookup = 0; /* True if not a covering index */
WhereTerm *pTerm; /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT2
WhereTerm *pFirstTerm = 0; /* First term matching the index */
#endif
/* Determine the values of nEq and nInMul */
for(nEq=0; nEq<pProbe->nColumn; nEq++){
int j = pProbe->aiColumn[nEq];
pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
if( pTerm==0 ) break;
wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
if( pTerm->eOperator & WO_IN ){
Expr *pExpr = pTerm->pExpr;
wsFlags |= WHERE_COLUMN_IN;
if( ExprHasProperty(pExpr, EP_xIsSelect) ){
/* "x IN (SELECT ...)": Assume the SELECT returns 25 rows */
nInMul *= 25;
bInEst = 1;
}else if( ALWAYS(pExpr->x.pList) ){
nInMul *= pExpr->x.pList->nExpr + 1;
}else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
/* "x IN (value, value, ...)" */
nInMul *= pExpr->x.pList->nExpr;
}
}else if( pTerm->eOperator & WO_ISNULL ){
wsFlags |= WHERE_COLUMN_NULL;
}
#ifdef SQLITE_ENABLE_STAT2
if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
#endif
used |= pTerm->prereqRight;
}
/* Determine the value of estBound. */
if( nEq<pProbe->nColumn ){
int j = pProbe->aiColumn[nEq];
if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
|
︙ | | |
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
|
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
|
-
-
+
+
+
-
-
+
+
+
-
+
-
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
-
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
-
-
+
+
+
|
wsFlags |= WHERE_IDX_ONLY;
}else{
bLookup = 1;
}
}
/*
** Estimate the number of rows of output. For an IN operator,
** do not let the estimate exceed half the rows in the table.
** Estimate the number of rows of output. For an "x IN (SELECT...)"
** constraint, do not let the estimate exceed half the rows in the table.
*/
nRow = (double)(aiRowEst[nEq] * nInMul);
if( bInEst && nRow*2>aiRowEst[0] ){
nRow = aiRowEst[0]/2;
nInMul = (int)(nRow / aiRowEst[nEq]);
}
#ifdef SQLITE_ENABLE_STAT2
/* Assume constant cost to access a row and logarithmic cost to
** do a binary search. Hence, the initial cost is the number of output
/* If the constraint is of the form x=VALUE and histogram
** data is available for column x, then it might be possible
** to get a better estimate on the number of rows based on
** rows plus log2(table-size) times the number of binary searches.
** VALUE and how common that value is according to the histogram.
*/
cost = nRow + nInMul*estLog(aiRowEst[0]);
if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
if( pFirstTerm->eOperator==WO_EQ ){
whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
}else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
}
}
#endif /* SQLITE_ENABLE_STAT2 */
/* Adjust the number of rows and the cost downward to reflect rows
** that are excluded by range constraints.
*/
nRow = (nRow * (double)estBound) / (double)100;
cost = (cost * (double)estBound) / (double)100;
if( nRow<1 ) nRow = 1;
/* Assume constant cost to advance from one row to the next and
** logarithmic cost to do a binary search. Hence, the initial cost
** is the number of output rows plus log2(table-size) times the
** number of binary searches.
**
** Because fan-out on tables is so much higher than the fan-out on
** indices (because table btrees contain only integer keys in non-leaf
** nodes) we weight the cost of a table binary search as 1/10th the
/* Add in the estimated cost of sorting the result
** cost of an index binary search.
*/
if( pIdx ){
if( bSort ){
cost += cost*estLog(cost);
}
/* If all information can be taken directly from the index, we avoid
** doing table lookups. This reduces the cost by half. (Not really -
if( bLookup ){
/* For an index lookup followed by a table lookup:
** nInMul index searches to find the start of each index range
** + nRow steps through the index
** + nRow table searches to lookup the table entry using the rowid
*/
nSearch = nInMul + nRow/10;
}else{
/* For a covering index:
** nInMul binary searches to find the initial entry
** + nRow steps through the index
*/
nSearch = nInMul;
}
}else{
/* For a rowid primary key lookup:
** nInMult binary searches to find the initial entry scaled by 1/10th
** + nRow steps through the table
*/
nSearch = nInMul/10;
}
cost = nRow + nSearch*estLog(aiRowEst[0]);
/* Add in the estimated cost of sorting the result. This cost is expanded
** this needs to be fixed.)
** by a fudge factor of 3.0 to account for the fact that a sorting step
** involves a write and is thus more expensive than a lookup step.
*/
if( pIdx && bLookup==0 ){
cost /= (double)2;
if( bSort ){
cost += nRow*estLog(nRow)*(double)3;
}
/**** Cost of using this index has now been computed ****/
/* If there are additional constraints on this table that cannot
** be used with the current index, but which might lower the number
** of output rows, adjust the nRow value accordingly. This only
** matters if the current index is the least costly, so do not bother
** with this step if we already know this index will not be chosen.
|
︙ | | |
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
|
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
|
-
+
-
+
+
+
+
+
-
+
|
}else{
/* Assume each additional equality match reduces the result
** set size by a factor of 10 */
nRow /= 10;
}
}else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
if( nSkipRange ){
/* Ignore the first nBound range constraints since the index
/* Ignore the first nSkipRange range constraints since the index
** has already accounted for these */
nSkipRange--;
}else{
/* Assume each additional range constraint reduces the result
** set size by a factor of 3 */
** set size by a factor of 3. Indexed range constraints reduce
** the search space by a larger factor: 4. We make indexed range
** more selective intentionally because of the subjective
** observation that indexed range constraints really are more
** selective in practice, on average. */
nRow /= 3;
}
}else{
}else if( pTerm->eOperator!=WO_NOOP ){
/* Any other expression lowers the output row count by half */
nRow /= 2;
}
}
if( nRow<2 ) nRow = 2;
}
|
︙ | | |
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
|
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
|
+
-
+
+
|
start_constraints = pRangeStart || nEq>0;
/* Seek the index cursor to the start of the range. */
nConstraint = nEq;
if( pRangeStart ){
Expr *pRight = pRangeStart->pExpr->pRight;
sqlite3ExprCode(pParse, pRight, regBase+nEq);
if( (pRangeStart->wtFlags & TERM_VNULL)==0 ){
sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
}
if( zStartAff ){
if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){
/* Since the comparison is to be performed with no conversions
** applied to the operands, set the affinity to apply to pRight to
** SQLITE_AFF_NONE. */
zStartAff[nEq] = SQLITE_AFF_NONE;
}
|
︙ | | |
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
|
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
|
+
-
+
+
|
** range (if any).
*/
nConstraint = nEq;
if( pRangeEnd ){
Expr *pRight = pRangeEnd->pExpr->pRight;
sqlite3ExprCacheRemove(pParse, regBase+nEq, 1);
sqlite3ExprCode(pParse, pRight, regBase+nEq);
if( (pRangeEnd->wtFlags & TERM_VNULL)==0 ){
sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt);
}
if( zEndAff ){
if( sqlite3CompareAffinity(pRight, zEndAff[nEq])==SQLITE_AFF_NONE){
/* Since the comparison is to be performed with no conversions
** applied to the operands, set the affinity to apply to pRight to
** SQLITE_AFF_NONE. */
zEndAff[nEq] = SQLITE_AFF_NONE;
}
|
︙ | | |
Changes to test/analyze2.test.
Changes to test/analyze3.test.
Added test/analyze5.test.