SQLite
Check-in [fe7081e095]
Not logged in
Overview
Comment:Fix some problems with using window-functions in aggregate queries.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | exp-window-functions
Files: files | file ages | folders
SHA3-256:fe7081e0952950f577234fcbb58f3c1efa4579267654fd2f713dc4804e470e7e
User & Date: dan 2018-06-12 18:40:17
Context
2018-06-12
20:53
Fix another issue to do with window-functions in aggregate queries. check-in: 6413e38a17 user: dan tags: exp-window-functions
18:40
Fix some problems with using window-functions in aggregate queries. check-in: fe7081e095 user: dan tags: exp-window-functions
2018-06-11
20:50
Clarify the relationship between a Window object and its associated Expr. check-in: 0cd55e98a4 user: dan tags: exp-window-functions
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/select.c.

5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
....
5482
5483
5484
5485
5486
5487
5488

5489
5490
5491
5492
5493
5494
5495
  sqlite3SelectPrep(pParse, p, 0);
  memset(&sSort, 0, sizeof(sSort));
  sSort.pOrderBy = p->pOrderBy;
  if( pParse->nErr || db->mallocFailed ){
    goto select_end;
  }
  assert( p->pEList!=0 );
  isAgg = (p->selFlags & SF_Aggregate)!=0;
#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x104 ){
    SELECTTRACE(0x104,pParse,p, ("after name resolution:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif

................................................................................
#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x108 ){
    SELECTTRACE(0x104,pParse,p, ("after window rewrite:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif
  pTabList = p->pSrc;


  /* Try to various optimizations (flattening subqueries, and strength
  ** reduction of join operators) in the FROM clause up into the main query
  */
#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
  for(i=0; !p->pPrior && i<pTabList->nSrc; i++){
    struct SrcList_item *pItem = &pTabList->a[i];







<







 







>







5460
5461
5462
5463
5464
5465
5466

5467
5468
5469
5470
5471
5472
5473
....
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
  sqlite3SelectPrep(pParse, p, 0);
  memset(&sSort, 0, sizeof(sSort));
  sSort.pOrderBy = p->pOrderBy;
  if( pParse->nErr || db->mallocFailed ){
    goto select_end;
  }
  assert( p->pEList!=0 );

#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x104 ){
    SELECTTRACE(0x104,pParse,p, ("after name resolution:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif

................................................................................
#if SELECTTRACE_ENABLED
  if( sqlite3SelectTrace & 0x108 ){
    SELECTTRACE(0x104,pParse,p, ("after window rewrite:\n"));
    sqlite3TreeViewSelect(0, p, 0);
  }
#endif
  pTabList = p->pSrc;
  isAgg = (p->selFlags & SF_Aggregate)!=0;

  /* Try to various optimizations (flattening subqueries, and strength
  ** reduction of join operators) in the FROM clause up into the main query
  */
#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
  for(i=0; !p->pPrior && i<pTabList->nSrc; i++){
    struct SrcList_item *pItem = &pTabList->a[i];

Changes to src/window.c.

8
9
10
11
12
13
14























































































































15
16
17
18
19
20
21
...
430
431
432
433
434
435
436

437
438
439
440
441
442
443
...
591
592
593
594
595
596
597


598
599
600
601
602
603
604
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/
#include "sqliteInt.h"
























































































































/*
** Implementation of built-in window function row_number(). Assumes that the
** window frame has been coerced to:
**
**   ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
*/
static void row_numberStepFunc(
................................................................................
            assert( pWin->pOwner==pExpr );
            return WRC_Prune;
          }
        }
      }
      /* Fall through.  */


    case TK_COLUMN: {
      Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0);
      p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup);
      if( p->pSub ){
        assert( ExprHasProperty(pExpr, EP_Static)==0 );
        ExprSetProperty(pExpr, EP_Static);
        sqlite3ExprDelete(pParse->db, pExpr);
................................................................................
      ExprList *pList = 0;
      p->pSrc->a[0].pSelect = pSub;
      sqlite3SrcListAssignCursors(pParse, p->pSrc);
      if( sqlite3ExpandSubquery(pParse, &p->pSrc->a[0]) ){
        rc = SQLITE_NOMEM;
      }else{
        pSub->selFlags |= SF_Expanded;


      }
    }

    sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pMWin->iEphCsr, pSublist->nExpr);
  }

  return rc;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







>
>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
...
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
...
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/
#include "sqliteInt.h"

/*
** SELECT REWRITING
**
**   Any SELECT statement that contains one or more window functions in
**   either the select list or ORDER BY clause (the only two places window
**   functions may be used) is transformed by function sqlite3WindowRewrite()
**   in order to support window function processing. For example, with the
**   schema:
**
**     CREATE TABLE t1(a, b, c, d, e, f, g);
**
**   the statement:
**
**     SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM t1 ORDER BY e;
**
**   is transformed to:
**
**     SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM (
**         SELECT a, e, c, d, b FROM t1 ORDER BY c, d
**     ) ORDER BY e;
**
**   The flattening optimization is disabled when processing this transformed
**   SELECT statement. This allows the implementation of the window function
**   (in this case max()) to process rows sorted in order of (c, d), which
**   makes things easier for obvious reasons. More generally:
**
**     * FROM, WHERE, GROUP BY and HAVING clauses are all moved to 
**       the sub-query.
**
**     * ORDER BY, LIMIT and OFFSET remain part of the parent query.
**
**     * Terminals from each of the expression trees that make up the 
**       select-list and ORDER BY expressions in the parent query are
**       selected by the sub-query. For the purposes of the transformation,
**       terminals are column references and aggregate functions.
**
**   If there is more than one window function in the SELECT that uses
**   the same window declaration (the OVER bit), then a single scan may
**   be used to process more than one window function. For example:
**
**     SELECT max(b) OVER (PARTITION BY c ORDER BY d), 
**            min(e) OVER (PARTITION BY c ORDER BY d) 
**     FROM t1;
**
**   is transformed in the same way as the example above. However:
**
**     SELECT max(b) OVER (PARTITION BY c ORDER BY d), 
**            min(e) OVER (PARTITION BY a ORDER BY b) 
**     FROM t1;
**
**   Must be transformed to:
**
**     SELECT max(b) OVER (PARTITION BY c ORDER BY d) FROM (
**         SELECT e, min(e) OVER (PARTITION BY a ORDER BY b), c, d, b FROM
**           SELECT a, e, c, d, b FROM t1 ORDER BY a, b
**         ) ORDER BY c, d
**     ) ORDER BY e;
**
**   so that both min() and max() may process rows in the order defined by
**   their respective window declarations.
**
** INTERFACE WITH SELECT.C
**
**   When processing the rewritten SELECT statement, code in select.c calls
**   sqlite3WhereBegin() to begin iterating through the results of the
**   sub-query, which is always implemented as a co-routine. It then calls
**   sqlite3WindowCodeStep() to process rows and finish the scan by calling
**   sqlite3WhereEnd().
**
**   sqlite3WindowCodeStep() generates VM code so that, for each row returned
**   by the sub-query a sub-routine (OP_Gosub) coded by select.c is invoked.
**   When the sub-routine is invoked:
**
**     * The results of all window-functions for the row are stored
**       in the associated Window.regResult registers.
**
**     * The required terminal values are stored in the current row of
**       temp table Window.iEphCsr.
**
**   In some cases, depending on the window frame and the specific window
**   functions invoked, sqlite3WindowCodeStep() caches each entire partition
**   in a temp table before returning any rows. In other cases it does not.
**   This detail is encapsulated within this file, the code generated by
**   select.c is the same in either case.
**
** BUILT-IN WINDOW FUNCTIONS
**
**   This implementation features the following built-in window functions:
**
**     row_number()
**     rank()
**     dense_rank()
**     percent_rank()
**     cume_dist()
**     ntile(N)
**     lead(expr [, offset [, default]])
**     lag(expr [, offset [, default]])
**     first_value(expr)
**     last_value(expr)
**     nth_value(expr, N)
**   
**   These are the same built-in window functions supported by Postgres. 
**   Although the behaviour of aggregate window functions (functions that
**   can be used as either aggregates or window funtions) allows them to
**   be implemented using an API, built-in window functions are much more
**   esoteric. Additionally, some window functions (e.g. nth_value()) 
**   may only be implemented by caching the entire partition in memory.
**   As such, some built-in window functions use the same API as aggregate
**   window functions and some are implemented directly using VDBE 
**   instructions. Additionally, for those functions that use the API, the
**   window frame is sometimes modified before the SELECT statement is
**   rewritten. For example, regardless of the specified window frame, the
**   row_number() function always uses:
**
**     ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
**
**   See sqlite3WindowUpdate() for details.
*/

/*
** Implementation of built-in window function row_number(). Assumes that the
** window frame has been coerced to:
**
**   ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
*/
static void row_numberStepFunc(
................................................................................
            assert( pWin->pOwner==pExpr );
            return WRC_Prune;
          }
        }
      }
      /* Fall through.  */

    case TK_AGG_FUNCTION:
    case TK_COLUMN: {
      Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0);
      p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup);
      if( p->pSub ){
        assert( ExprHasProperty(pExpr, EP_Static)==0 );
        ExprSetProperty(pExpr, EP_Static);
        sqlite3ExprDelete(pParse->db, pExpr);
................................................................................
      ExprList *pList = 0;
      p->pSrc->a[0].pSelect = pSub;
      sqlite3SrcListAssignCursors(pParse, p->pSrc);
      if( sqlite3ExpandSubquery(pParse, &p->pSrc->a[0]) ){
        rc = SQLITE_NOMEM;
      }else{
        pSub->selFlags |= SF_Expanded;
        p->selFlags &= ~SF_Aggregate;
        sqlite3SelectPrep(pParse, pSub, 0);
      }
    }

    sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pMWin->iEphCsr, pSublist->nExpr);
  }

  return rc;

Changes to test/window4.tcl.

123
124
125
126
127
128
129





















130
131
132
133
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING)
  FROM t5
}
execsql_test 3.6.3 {
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
  FROM t5
}























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING)
  FROM t5
}
execsql_test 3.6.3 {
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
  FROM t5
}

#=========================================================================
execsql_test 4.0 {
  DROP TABLE IF EXISTS ttt;
  CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER);
  INSERT INTO ttt VALUES(1, 1, 1);
  INSERT INTO ttt VALUES(2, 2, 2);
  INSERT INTO ttt VALUES(3, 3, 3);

  INSERT INTO ttt VALUES(4, 1, 2);
  INSERT INTO ttt VALUES(5, 2, 3);
  INSERT INTO ttt VALUES(6, 3, 4);

  INSERT INTO ttt VALUES(7, 1, 3);
  INSERT INTO ttt VALUES(8, 2, 4);
  INSERT INTO ttt VALUES(9, 3, 5);
}

execsql_test 4.1 {
  SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b;
}


finish_test

Changes to test/window4.test.

206
207
208
209
210
211
212
213




















214
  FROM t5
} {1 two   2 three   3 four   4 five   5 {}}

do_execsql_test 3.6.3 {
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
  FROM t5
} {1 one   2 two   3 three   4 four   5 five}





















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  FROM t5
} {1 two   2 three   3 four   4 five   5 {}}

do_execsql_test 3.6.3 {
  SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
  FROM t5
} {1 one   2 two   3 three   4 four   5 five}

do_execsql_test 4.0 {
  DROP TABLE IF EXISTS ttt;
  CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER);
  INSERT INTO ttt VALUES(1, 1, 1);
  INSERT INTO ttt VALUES(2, 2, 2);
  INSERT INTO ttt VALUES(3, 3, 3);

  INSERT INTO ttt VALUES(4, 1, 2);
  INSERT INTO ttt VALUES(5, 2, 3);
  INSERT INTO ttt VALUES(6, 3, 4);

  INSERT INTO ttt VALUES(7, 1, 3);
  INSERT INTO ttt VALUES(8, 2, 4);
  INSERT INTO ttt VALUES(9, 3, 5);
} {}

do_execsql_test 4.1 {
  SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b;
} {3 1   4 2   5 3}

finish_test