/ Changes On Branch better-distinct-agg
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch better-distinct-agg Excluding Merge-Ins

This is equivalent to a diff from 7b7a69d098 to 129083bd5e

2016-04-15
01:55
Progress toward getting DISTINCT to use a seek to advance to the next distinct row, when driven by an appropriate index. (check-in: 9e14aa14cf user: drh tags: skip-ahead-distinct)
2016-04-14
19:29
Work toward improving analysis and code generation for DISTINCT and aggregate queries. (Leaf check-in: 129083bd5e user: drh tags: better-distinct-agg)
17:29
In the ICU extension toupper() and tolower() SQL functions, avoid calling u_strToUpper() or u_strToLower() a second time if the buffer passed to the first invocation turns out to be large enough. (check-in: d23e581351 user: dan tags: trunk)
16:40
Add the SF_HasAgg constant (currently unused). Also enhance the comments on many other constant definitions to detail constraints on their values. (check-in: 7b7a69d098 user: drh tags: trunk)
15:44
Correctly interpret negative "PRAGMA cache_size" values when determining the cache-size used for sorting large amounts of data (i.e. the functionality in vdbesort.c). (check-in: 79147dca87 user: dan tags: trunk)

Changes to src/resolve.c.

387
388
389
390
391
392
393
394
395
396
397
398
399


400
401
402
403
404
405
406
    if( (pEList = pNC->pEList)!=0
     && zTab==0
     && cnt==0
    ){
      for(j=0; j<pEList->nExpr; j++){
        char *zAs = pEList->a[j].zName;
        if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
          Expr *pOrig;
          assert( pExpr->pLeft==0 && pExpr->pRight==0 );
          assert( pExpr->x.pList==0 );
          assert( pExpr->x.pSelect==0 );
          pOrig = pEList->a[j].pExpr;
          if( (pNC->ncFlags&NC_AllowAgg)==0 && ExprHasProperty(pOrig, EP_Agg) ){


            sqlite3ErrorMsg(pParse, "misuse of aliased aggregate %s", zAs);
            return WRC_Abort;
          }
          resolveAlias(pParse, pEList, j, pExpr, "", nSubquery);
          cnt = 1;
          pMatch = 0;
          assert( zTab==0 && zDb==0 );







<



<
|
>
>







387
388
389
390
391
392
393

394
395
396

397
398
399
400
401
402
403
404
405
406
    if( (pEList = pNC->pEList)!=0
     && zTab==0
     && cnt==0
    ){
      for(j=0; j<pEList->nExpr; j++){
        char *zAs = pEList->a[j].zName;
        if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){

          assert( pExpr->pLeft==0 && pExpr->pRight==0 );
          assert( pExpr->x.pList==0 );
          assert( pExpr->x.pSelect==0 );

          if( (pNC->ncFlags&NC_AllowAgg)==0
           && ExprHasProperty(pEList->a[j].pExpr, EP_Agg)
          ){
            sqlite3ErrorMsg(pParse, "misuse of aliased aggregate %s", zAs);
            return WRC_Abort;
          }
          resolveAlias(pParse, pEList, j, pExpr, "", nSubquery);
          cnt = 1;
          pMatch = 0;
          assert( zTab==0 && zDb==0 );
710
711
712
713
714
715
716



717
718
719
720


721
722
723
724
725
726
727
728
        if( (pDef->funcFlags & SQLITE_FUNC_CONSTANT)==0 ){
          /* Date/time functions that use 'now', and other functions like
          ** sqlite_version() that might change over time cannot be used
          ** in an index. */
          notValid(pParse, pNC, "non-deterministic functions", NC_IdxExpr);
        }
      }



      if( is_agg && (pNC->ncFlags & NC_AllowAgg)==0 ){
        sqlite3ErrorMsg(pParse, "misuse of aggregate function %.*s()", nId,zId);
        pNC->nErr++;
        is_agg = 0;


      }else if( no_such_func && pParse->db->init.busy==0 ){
        sqlite3ErrorMsg(pParse, "no such function: %.*s", nId, zId);
        pNC->nErr++;
      }else if( wrong_num_args ){
        sqlite3ErrorMsg(pParse,"wrong number of arguments to function %.*s()",
             nId, zId);
        pNC->nErr++;
      }







>
>
>




>
>
|







710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
        if( (pDef->funcFlags & SQLITE_FUNC_CONSTANT)==0 ){
          /* Date/time functions that use 'now', and other functions like
          ** sqlite_version() that might change over time cannot be used
          ** in an index. */
          notValid(pParse, pNC, "non-deterministic functions", NC_IdxExpr);
        }
      }
#if 0
      /* This error condition will be caught later, during code
      ** generation */
      if( is_agg && (pNC->ncFlags & NC_AllowAgg)==0 ){
        sqlite3ErrorMsg(pParse, "misuse of aggregate function %.*s()", nId,zId);
        pNC->nErr++;
        is_agg = 0;
      }else 
#endif
      if( no_such_func && pParse->db->init.busy==0 ){
        sqlite3ErrorMsg(pParse, "no such function: %.*s", nId, zId);
        pNC->nErr++;
      }else if( wrong_num_args ){
        sqlite3ErrorMsg(pParse,"wrong number of arguments to function %.*s()",
             nId, zId);
        pNC->nErr++;
      }
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240












1241
1242
1243
1244
1245
1246
1247
    sNC.ncFlags = NC_AllowAgg;
    sNC.pSrcList = p->pSrc;
    sNC.pNext = pOuterNC;
  
    /* Resolve names in the result set. */
    if( sqlite3ResolveExprListNames(&sNC, p->pEList) ) return WRC_Abort;
  
    /* If there are no aggregate functions in the result-set, and no GROUP BY 
    ** expression, do not allow aggregates in any of the other expressions.
    */
    assert( (p->selFlags & SF_Aggregate)==0 );
    pGroupBy = p->pGroupBy;
    if( pGroupBy || (sNC.ncFlags & NC_HasAgg)!=0 ){
      assert( NC_MinMaxAgg==SF_MinMaxAgg );
      p->selFlags |= SF_Aggregate | (sNC.ncFlags&NC_MinMaxAgg);
    }else{
      sNC.ncFlags &= ~NC_AllowAgg;
    }
  
    /* If a HAVING clause is present, then there must be a GROUP BY clause.
    */
    if( p->pHaving && !pGroupBy ){
      sqlite3ErrorMsg(pParse, "a GROUP BY clause is required before HAVING");
      return WRC_Abort;
    }
  
    /* Add the output column list to the name-context before parsing the
    ** other expressions in the SELECT statement. This is so that
    ** expressions in the WHERE clause (etc.) can refer to expressions by
    ** aliases in the result set.
    **
    ** Minor point: If this is the case, then the expression will be
    ** re-evaluated for each reference to it.
    */
    sNC.pEList = p->pEList;
    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;













    /* Resolve names in table-valued-function arguments */
    for(i=0; i<p->pSrc->nSrc; i++){
      struct SrcList_item *pItem = &p->pSrc->a[i];
      if( pItem->fg.isTabFunc
       && sqlite3ResolveExprListNames(&sNC, pItem->u1.pFuncArg) 
      ){







|
<

<

<
<
<
<
<
<
<
<
<














|
|
>
>
>
>
>
>
>
>
>
>
>
>







1209
1210
1211
1212
1213
1214
1215
1216

1217

1218









1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
    sNC.ncFlags = NC_AllowAgg;
    sNC.pSrcList = p->pSrc;
    sNC.pNext = pOuterNC;
  
    /* Resolve names in the result set. */
    if( sqlite3ResolveExprListNames(&sNC, p->pEList) ) return WRC_Abort;
  
    /* If a HAVING clause is present, then there must be a GROUP BY clause.

    */

    pGroupBy = p->pGroupBy;









    if( p->pHaving && !pGroupBy ){
      sqlite3ErrorMsg(pParse, "a GROUP BY clause is required before HAVING");
      return WRC_Abort;
    }
  
    /* Add the output column list to the name-context before parsing the
    ** other expressions in the SELECT statement. This is so that
    ** expressions in the WHERE clause (etc.) can refer to expressions by
    ** aliases in the result set.
    **
    ** Minor point: If this is the case, then the expression will be
    ** re-evaluated for each reference to it.
    */
    sNC.pEList = p->pEList;
    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;
    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;

    /* If there are no aggregate functions in the result-set, and no GROUP BY 
    ** expression, do not allow aggregates in any of the other expressions.
    */
    assert( (p->selFlags & SF_Aggregate)==0 );
    if( pGroupBy || (sNC.ncFlags & NC_HasAgg)!=0 ){
      assert( NC_MinMaxAgg==SF_MinMaxAgg );
      assert( NC_HasAgg==SF_HasAgg );
      p->selFlags |= SF_Aggregate | (sNC.ncFlags&(NC_MinMaxAgg|NC_HasAgg));
    }else{
      sNC.ncFlags &= ~NC_AllowAgg;
    }

    /* Resolve names in table-valued-function arguments */
    for(i=0; i<p->pSrc->nSrc; i++){
      struct SrcList_item *pItem = &p->pSrc->a[i];
      if( pItem->fg.isTabFunc
       && sqlite3ResolveExprListNames(&sNC, pItem->u1.pFuncArg) 
      ){

Changes to src/select.c.

4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
    ** columns in the SELECT on the RHS */
    if( pTab->nCol!=pSub->pEList->nExpr ){
      sqlite3ErrorMsg(pParse, "expected %d columns for '%s' but got %d",
                      pTab->nCol, pTab->zName, pSub->pEList->nExpr);
      goto select_end;
    }

    isAggSub = (pSub->selFlags & SF_Aggregate)!=0;
    if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){
      /* This subquery can be absorbed into its parent. */
      if( isAggSub ){
        isAgg = 1;
        p->selFlags |= SF_Aggregate;
      }
      i = -1;
    }
    pTabList = p->pSrc;
    if( db->mallocFailed ) goto select_end;
    if( !IgnorableOrderby(pDest) ){
      sSort.pOrderBy = p->pOrderBy;







|




|







4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
    ** columns in the SELECT on the RHS */
    if( pTab->nCol!=pSub->pEList->nExpr ){
      sqlite3ErrorMsg(pParse, "expected %d columns for '%s' but got %d",
                      pTab->nCol, pTab->zName, pSub->pEList->nExpr);
      goto select_end;
    }

    isAggSub = pSub->selFlags & (SF_Aggregate|SF_HasAgg);
    if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){
      /* This subquery can be absorbed into its parent. */
      if( isAggSub ){
        isAgg = 1;
        p->selFlags |= isAggSub;
      }
      i = -1;
    }
    pTabList = p->pSrc;
    if( db->mallocFailed ) goto select_end;
    if( !IgnorableOrderby(pDest) ){
      sSort.pOrderBy = p->pOrderBy;
5058
5059
5060
5061
5062
5063
5064

5065
5066
5067
5068
5069
5070
5071
  /* Various elements of the SELECT copied into local variables for
  ** convenience */
  pEList = p->pEList;
  pWhere = p->pWhere;
  pGroupBy = p->pGroupBy;
  pHaving = p->pHaving;
  sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0;


#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x400 ){
    SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif







>







5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
  /* Various elements of the SELECT copied into local variables for
  ** convenience */
  pEList = p->pEList;
  pWhere = p->pWhere;
  pGroupBy = p->pGroupBy;
  pHaving = p->pHaving;
  sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0;
  assert( (p->selFlags & SF_HasAgg)==0 || (p->selFlags & SF_Aggregate)!=0 );

#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x400 ){
    SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif
5266
5267
5268
5269
5270
5271
5272



5273
5274
5275
5276
5277
5278
5279
      sNC.ncFlags |= NC_InAggFunc;
      sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList);
      sNC.ncFlags &= ~NC_InAggFunc;
    }
    sAggInfo.mxReg = pParse->nMem;
    if( db->mallocFailed ) goto select_end;




    /* Processing for aggregates with GROUP BY is very different and
    ** much more complex than aggregates without a GROUP BY.
    */
    if( pGroupBy ){
      KeyInfo *pKeyInfo;  /* Keying information for the group by clause */
      int addr1;          /* A-vs-B comparision jump */
      int addrOutputRow;  /* Start of subroutine that outputs a result row */







>
>
>







5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
      sNC.ncFlags |= NC_InAggFunc;
      sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList);
      sNC.ncFlags &= ~NC_InAggFunc;
    }
    sAggInfo.mxReg = pParse->nMem;
    if( db->mallocFailed ) goto select_end;

    /* Not true if there are errors in the input SQL!:
    ** assert( (sAggInfo.nFunc>0)==((p->selFlags&SF_HasAgg)!=0) ); */

    /* Processing for aggregates with GROUP BY is very different and
    ** much more complex than aggregates without a GROUP BY.
    */
    if( pGroupBy ){
      KeyInfo *pKeyInfo;  /* Keying information for the group by clause */
      int addr1;          /* A-vs-B comparision jump */
      int addrOutputRow;  /* Start of subroutine that outputs a result row */
5652
5653
5654
5655
5656
5657
5658

5659
5660
5661
5662
5663
5664
5665
  /* Jump here to skip this query
  */
  sqlite3VdbeResolveLabel(v, iEnd);

  /* The SELECT has been coded. If there is an error in the Parse structure,
  ** set the return code to 1. Otherwise 0. */
  rc = (pParse->nErr>0);


  /* Control jumps to here if an error is encountered above, or upon
  ** successful coding of the SELECT.
  */
select_end:
  explainSetInteger(pParse->iSelectId, iRestoreSelectId);








>







5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
  /* Jump here to skip this query
  */
  sqlite3VdbeResolveLabel(v, iEnd);

  /* The SELECT has been coded. If there is an error in the Parse structure,
  ** set the return code to 1. Otherwise 0. */
  rc = (pParse->nErr>0);
  assert( (sAggInfo.nFunc>0)==((p->selFlags&SF_HasAgg)!=0) || rc!=SQLITE_OK );

  /* Control jumps to here if an error is encountered above, or upon
  ** successful coding of the SELECT.
  */
select_end:
  explainSetInteger(pParse->iSelectId, iRestoreSelectId);

Changes to test/aggnested.test.

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  db eval {
    SELECT value1, (SELECT sum(value2=value1) FROM t2)
      FROM t1
     WHERE value1 IN (SELECT max(value1) FROM t1 GROUP BY id1);
  }
} {12 2 34 4}
do_test aggnested-3.15 {
  # FIXME:  If case 3.16 works, then this case really ought to work too...
  catchsql {
    SELECT max(value1), (SELECT sum(value2=max(value1)) FROM t2)
      FROM t1
     GROUP BY id1;
  }
} {1 {misuse of aggregate function max()}}
do_test aggnested-3.16 {
  db eval {
    SELECT max(value1), (SELECT sum(value2=value1) FROM t2)
      FROM t1
     GROUP BY id1;
  }
} {12 2 34 4}







<





|







214
215
216
217
218
219
220

221
222
223
224
225
226
227
228
229
230
231
232
233
  db eval {
    SELECT value1, (SELECT sum(value2=value1) FROM t2)
      FROM t1
     WHERE value1 IN (SELECT max(value1) FROM t1 GROUP BY id1);
  }
} {12 2 34 4}
do_test aggnested-3.15 {

  catchsql {
    SELECT max(value1), (SELECT sum(value2=max(value1)) FROM t2)
      FROM t1
     GROUP BY id1;
  }
} {0 {12 2 34 4}}
do_test aggnested-3.16 {
  db eval {
    SELECT max(value1), (SELECT sum(value2=value1) FROM t2)
      FROM t1
     GROUP BY id1;
  }
} {12 2 34 4}

Changes to test/select1.test.

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
do_test select1-2.19 {
  set v [catch {execsql {SELECT SUM(min(f1,f2)) FROM test1}} msg]
  lappend v $msg
} {0 44}
do_test select1-2.20 {
  set v [catch {execsql {SELECT SUM(min(f1)) FROM test1}} msg]
  lappend v $msg
} {1 {misuse of aggregate function min()}}

# Ticket #2526
#
do_test select1-2.21 {
  catchsql {
     SELECT min(f1) AS m FROM test1 GROUP BY f1 HAVING max(m+5)<10
  }







|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
do_test select1-2.19 {
  set v [catch {execsql {SELECT SUM(min(f1,f2)) FROM test1}} msg]
  lappend v $msg
} {0 44}
do_test select1-2.20 {
  set v [catch {execsql {SELECT SUM(min(f1)) FROM test1}} msg]
  lappend v $msg
} {1 {misuse of aggregate: min()}}

# Ticket #2526
#
do_test select1-2.21 {
  catchsql {
     SELECT min(f1) AS m FROM test1 GROUP BY f1 HAVING max(m+5)<10
  }