/ Changes On Branch faster-analyze
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch faster-analyze Excluding Merge-Ins

This is equivalent to a diff from e6225a7bf7 to 85e2badeeb

2014-07-24
23:23
Improve the performance of the ANALYZE command by taking advantage of UNIQUE constraints on indices. (check-in: 114dcf3367 user: drh tags: trunk)
20:25
Avoid trying to allocation zero bytes when analyzing a unique non-null index. (Closed-Leaf check-in: 85e2badeeb user: drh tags: faster-analyze)
19:54
Avoid change tests when analyzing single-column unique indexes after getting past the initial NULL entries. (check-in: 4690e99c07 user: drh tags: faster-analyze)
2014-07-23
23:57
Add experimental "costmult" logic. Only enabled when compiled with -DSQLITE_ENABLE_COSTMULT. (check-in: 729ece4088 user: drh tags: trunk)
18:36
Improve the performance of the ANALYZE command by taking advantage of the fact that every row of a UNIQUE index is distinct. (check-in: 3e1e79e133 user: drh tags: faster-analyze)
15:51
Updated documentation on sqlite3_temp_directory. No changes to code. (check-in: e6225a7bf7 user: drh tags: trunk)
2014-07-22
22:46
When running ANALYZE, it is not necessary to check the right-most key column for changes since that column will always change if none of the previous columns have. (check-in: 48f40861db user: drh tags: trunk)

Changes to src/analyze.c.

367
368
369
370
371
372
373
374
375
376
377




378
379
380
381
382

383
384
385
386
387
388
389
#endif
  sqlite3DbFree(p->db, p);
}

/*
** Implementation of the stat_init(N,K,C) SQL function. The three parameters
** are:
**     N:    The number of columns in the index including the rowid/pk
**     K:    The number of columns in the index excluding the rowid/pk
**     C:    The number of rows in the index
**




** C is only used for STAT3 and STAT4.
**
** For ordinary rowid tables, N==K+1.  But for WITHOUT ROWID tables,
** N=K+P where P is the number of columns in the primary key.  For the
** covering index that implements the original WITHOUT ROWID table, N==K.

**
** This routine allocates the Stat4Accum object in heap memory. The return 
** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
** the size of the blob is sizeof(void*) bytes). 
*/
static void statInit(
  sqlite3_context *context,







|
|
|

>
>
>
>
|

|
|
|
>







367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#endif
  sqlite3DbFree(p->db, p);
}

/*
** Implementation of the stat_init(N,K,C) SQL function. The three parameters
** are:
**     N:    The number of columns in the index including the rowid/pk (note 1)
**     K:    The number of columns in the index excluding the rowid/pk.
**     C:    The number of rows in the index (note 2)
**
** Note 1:  In the special case of the covering index that implements a
** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the
** total number of columns in the table.
**
** Note 2:  C is only used for STAT3 and STAT4.
**
** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
** PRIMARY KEY of the table.  The covering index that implements the
** original WITHOUT ROWID table as N==K as a special case.
**
** This routine allocates the Stat4Accum object in heap memory. The return 
** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
** the size of the blob is sizeof(void*) bytes). 
*/
static void statInit(
  sqlite3_context *context,
685
686
687
688
689
690
691
692



693
694
695
696
697
698
699
** Arguments:
**
**    P     Pointer to the Stat4Accum object created by stat_init()
**    C     Index of left-most column to differ from previous row
**    R     Rowid for the current row.  Might be a key record for
**          WITHOUT ROWID tables.
**
** The SQL function always returns NULL.



**
** The R parameter is only used for STAT3 and STAT4
*/
static void statPush(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv







|
>
>
>







690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
** Arguments:
**
**    P     Pointer to the Stat4Accum object created by stat_init()
**    C     Index of left-most column to differ from previous row
**    R     Rowid for the current row.  Might be a key record for
**          WITHOUT ROWID tables.
**
** This SQL function always returns NULL.  It's purpose it to accumulate
** statistical data and/or samples in the Stat4Accum object about the
** index being analyzed.  The stat_get() SQL function will later be used to
** extract relevant information for constructing the sqlite_statN tables.
**
** The R parameter is only used for STAT3 and STAT4
*/
static void statPush(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
779
780
781
782
783
784
785
786



787
788
789
790
791
792
793
#define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
#define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
#define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
#define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */

/*
** Implementation of the stat_get(P,J) SQL function.  This routine is
** used to query the results.  Content is returned for parameter J



** which is one of the STAT_GET_xxxx values defined above.
**
** If neither STAT3 nor STAT4 are enabled, then J is always
** STAT_GET_STAT1 and is hence omitted and this routine becomes
** a one-parameter function, stat_get(P), that always returns the
** stat1 table entry information.
*/







|
>
>
>







787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
#define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
#define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
#define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
#define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */

/*
** Implementation of the stat_get(P,J) SQL function.  This routine is
** used to query statistical information that has been gathered into
** the Stat4Accum object by prior calls to stat_push().  The P parameter
** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
** The content to returned is determined by the parameter J
** which is one of the STAT_GET_xxxx values defined above.
**
** If neither STAT3 nor STAT4 are enabled, then J is always
** STAT_GET_STAT1 and is hence omitted and this routine becomes
** a one-parameter function, stat_get(P), that always returns the
** stat1 table entry information.
*/
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010

1011
1012
1013
1014
1015
1016

1017
1018
1019

1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
  iTabCur = iTab++;
  iIdxCur = iTab++;
  pParse->nTab = MAX(pParse->nTab, iTab);
  sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);

  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    int nCol;                     /* Number of columns indexed by pIdx */
    int *aGotoChng;               /* Array of jump instruction addresses */
    int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
    int addrGotoChng0;            /* Address of "Goto addr_chng_0" */
    int addrNextRow;              /* Address of "next_row:" */
    const char *zIdxName;         /* Name of the index */


    if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
    if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
    if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
      nCol = pIdx->nKeyCol;
      zIdxName = pTab->zName;

    }else{
      nCol = pIdx->nColumn;
      zIdxName = pIdx->zName;

    }
    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nCol+1));
    if( aGotoChng==0 ) continue;

    /* Populate the register containing the index name. */
    sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
    VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));

    /*
    ** Pseudo-code for loop that calls stat_push():







|
<

<


>






>



>

<
<







1009
1010
1011
1012
1013
1014
1015
1016

1017

1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032


1033
1034
1035
1036
1037
1038
1039
  iTabCur = iTab++;
  iIdxCur = iTab++;
  pParse->nTab = MAX(pParse->nTab, iTab);
  sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);

  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    int nCol;                     /* Number of columns in pIdx. "N" */

    int addrRewind;               /* Address of "OP_Rewind iIdxCur" */

    int addrNextRow;              /* Address of "next_row:" */
    const char *zIdxName;         /* Name of the index */
    int nColTest;                 /* Number of columns to test for changes */

    if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
    if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
    if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
      nCol = pIdx->nKeyCol;
      zIdxName = pTab->zName;
      nColTest = nCol - 1;
    }else{
      nCol = pIdx->nColumn;
      zIdxName = pIdx->zName;
      nColTest = pIdx->uniqNotNull ? pIdx->nKeyCol-1 : nCol-1;
    }



    /* Populate the register containing the index name. */
    sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
    VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));

    /*
    ** Pseudo-code for loop that calls stat_push():
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074


1075
1076

1077
1078
1079
1080
1081
1082
1083
1084
    **
    **  chng_addr_0:
    **   regPrev(0) = idx(0)
    **  chng_addr_1:
    **   regPrev(1) = idx(1)
    **  ...
    **
    **  chng_addr_N:
    **   regRowid = idx(rowid)
    **   stat_push(P, regChng, regRowid)
    **   Next csr
    **   if !eof(csr) goto next_row;
    **
    **  end_of_scan:
    */

    /* Make sure there are enough memory cells allocated to accommodate 
    ** the regPrev array and a trailing rowid (the rowid slot is required
    ** when building a record to insert into the sample column of 
    ** the sqlite_stat4 table.  */
    pParse->nMem = MAX(pParse->nMem, regPrev+nCol);

    /* Open a read-only cursor on the index being analyzed. */
    assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
    sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
    VdbeComment((v, "%s", pIdx->zName));

    /* Invoke the stat_init() function. The arguments are:
    ** 
    **    (1) the number of columns in the index including the rowid,


    **    (2) the number of rows in the index,
    **

    ** The second argument is only used for STAT3 and STAT4
    */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
#endif
    sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
    sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
    sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);







|












|









|
>
>
|

>
|







1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
    **
    **  chng_addr_0:
    **   regPrev(0) = idx(0)
    **  chng_addr_1:
    **   regPrev(1) = idx(1)
    **  ...
    **
    **  endDistinctTest:
    **   regRowid = idx(rowid)
    **   stat_push(P, regChng, regRowid)
    **   Next csr
    **   if !eof(csr) goto next_row;
    **
    **  end_of_scan:
    */

    /* Make sure there are enough memory cells allocated to accommodate 
    ** the regPrev array and a trailing rowid (the rowid slot is required
    ** when building a record to insert into the sample column of 
    ** the sqlite_stat4 table.  */
    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);

    /* Open a read-only cursor on the index being analyzed. */
    assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
    sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
    VdbeComment((v, "%s", pIdx->zName));

    /* Invoke the stat_init() function. The arguments are:
    ** 
    **    (1) the number of columns in the index including the rowid
    **        (or for a WITHOUT ROWID table, the number of PK columns),
    **    (2) the number of columns in the key without the rowid/pk
    **    (3) the number of rows in the index,
    **
    **
    ** The third argument is only used for STAT3 and STAT4
    */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
#endif
    sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
    sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
    sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111


























1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122

1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135


1136

1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
    **   regChng = 0
    **   goto next_push_0;
    **
    */
    addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
    VdbeCoverage(v);
    sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
    addrGotoChng0 = sqlite3VdbeAddOp0(v, OP_Goto);

    /*
    **  next_row:
    **   regChng = 0
    **   if( idx(0) != regPrev(0) ) goto chng_addr_0
    **   regChng = 1
    **   if( idx(1) != regPrev(1) ) goto chng_addr_1
    **   ...
    **   regChng = N
    **   goto chng_addr_N
    */
    addrNextRow = sqlite3VdbeCurrentAddr(v);


























    for(i=0; i<nCol-1; i++){
      char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
      sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
      aGotoChng[i] = 
      sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
      sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
      VdbeCoverage(v);
    }
    sqlite3VdbeAddOp2(v, OP_Integer, nCol-1, regChng);
    aGotoChng[nCol] = sqlite3VdbeAddOp0(v, OP_Goto);


    /*
    **  chng_addr_0:
    **   regPrev(0) = idx(0)
    **  chng_addr_1:
    **   regPrev(1) = idx(1)
    **  ...
    */
    sqlite3VdbeJumpHere(v, addrGotoChng0);
    for(i=0; i<nCol-1; i++){
      sqlite3VdbeJumpHere(v, aGotoChng[i]);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
    }




    /*
    **  chng_addr_N:
    **   regRowid = idx(rowid)            // STAT34 only
    **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
    **   Next csr
    **   if !eof(csr) goto next_row;
    */
    sqlite3VdbeJumpHere(v, aGotoChng[nCol]);
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    assert( regRowid==(regStat4+2) );
    if( HasRowid(pTab) ){
      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
    }else{
      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
      int j, k, regKey;







<
<
<
<
<
<
<
<
<
<
<
<

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
>







<







1105
1106
1107
1108
1109
1110
1111












1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174

1175
1176
1177
1178
1179
1180
1181
    **   regChng = 0
    **   goto next_push_0;
    **
    */
    addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
    VdbeCoverage(v);
    sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);












    addrNextRow = sqlite3VdbeCurrentAddr(v);

    if( nColTest>0 ){
      int endDistinctTest = sqlite3VdbeMakeLabel(v);
      int *aGotoChng;               /* Array of jump instruction addresses */
      aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*nColTest);
      if( aGotoChng==0 ) continue;

      /*
      **  next_row:
      **   regChng = 0
      **   if( idx(0) != regPrev(0) ) goto chng_addr_0
      **   regChng = 1
      **   if( idx(1) != regPrev(1) ) goto chng_addr_1
      **   ...
      **   regChng = N
      **   goto endDistinctTest
      */
      sqlite3VdbeAddOp0(v, OP_Goto);
      addrNextRow = sqlite3VdbeCurrentAddr(v);
      if( nColTest==1 && pIdx->nKeyCol==1 && pIdx->onError!=OE_None ){
        /* For a single-column UNIQUE index, once we have found a non-NULL
        ** row, we know that all the rest will be distinct, so skip 
        ** subsequent distinctness tests. */
        sqlite3VdbeAddOp2(v, OP_NotNull, regPrev, endDistinctTest);
        VdbeCoverage(v);
      }
      for(i=0; i<nColTest; i++){
        char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
        sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
        aGotoChng[i] = 
        sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
        sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
        VdbeCoverage(v);
      }
      sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng);
      sqlite3VdbeAddOp2(v, OP_Goto, 0, endDistinctTest);
  
  
      /*
      **  chng_addr_0:
      **   regPrev(0) = idx(0)
      **  chng_addr_1:
      **   regPrev(1) = idx(1)
      **  ...
      */
      sqlite3VdbeJumpHere(v, addrNextRow-1);
      for(i=0; i<nColTest; i++){
        sqlite3VdbeJumpHere(v, aGotoChng[i]);
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
      }
      sqlite3VdbeResolveLabel(v, endDistinctTest);
      sqlite3DbFree(db, aGotoChng);
    }
  
    /*
    **  chng_addr_N:
    **   regRowid = idx(rowid)            // STAT34 only
    **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
    **   Next csr
    **   if !eof(csr) goto next_row;
    */

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    assert( regRowid==(regStat4+2) );
    if( HasRowid(pTab) ){
      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
    }else{
      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
      int j, k, regKey;
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
      sqlite3VdbeAddOp2(v, OP_Goto, 1, addrNext); /* P1==1 for end-of-loop */
      sqlite3VdbeJumpHere(v, addrIsNull);
    }
#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */

    /* End of analysis */
    sqlite3VdbeJumpHere(v, addrRewind);
    sqlite3DbFree(db, aGotoChng);
  }


  /* Create a single sqlite_stat1 entry containing NULL as the index
  ** name and the row count as the content.
  */
  if( pOnlyIdx==0 && needTableCnt ){







<







1245
1246
1247
1248
1249
1250
1251

1252
1253
1254
1255
1256
1257
1258
      sqlite3VdbeAddOp2(v, OP_Goto, 1, addrNext); /* P1==1 for end-of-loop */
      sqlite3VdbeJumpHere(v, addrIsNull);
    }
#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */

    /* End of analysis */
    sqlite3VdbeJumpHere(v, addrRewind);

  }


  /* Create a single sqlite_stat1 entry containing NULL as the index
  ** name and the row count as the content.
  */
  if( pOnlyIdx==0 && needTableCnt ){