SQLite

Check-in [2ea4a9f75f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The "x IN (?)" optimization in check-ins [2ff3b25f40] and [e68b427afb] is incorrect, as demonstrated by the in4-5.1 test case in this check-in. The "COLLATE binary" that was being added to the RHS of IN was overriding the implicit collating sequence of the LHS. This change defines the EP_Generic expression node property that blocks all affinity or collating sequence information in the expression subtree and adds that property to the expression taken from RHS of the IN operator.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2ea4a9f75f46eaa928ba17e9e91bc0432750d46d
User & Date: drh 2014-03-20 17:03:30.667
References
2014-06-26
14:46 New ticket [9a8b09f8e6] Affinity problem on "x IN (?)". (artifact: 7a0caf42b8 user: drh)
Context
2014-03-20
18:45
Fix the EXPLAIN indenter in the command-line shell to correctly handle NextIfOpen and PrevIfOpen opcodes. (check-in: 01944c53f5 user: drh tags: trunk)
17:03
The "x IN (?)" optimization in check-ins [2ff3b25f40] and [e68b427afb] is incorrect, as demonstrated by the in4-5.1 test case in this check-in. The "COLLATE binary" that was being added to the RHS of IN was overriding the implicit collating sequence of the LHS. This change defines the EP_Generic expression node property that blocks all affinity or collating sequence information in the expression subtree and adds that property to the expression taken from RHS of the IN operator. (check-in: 2ea4a9f75f user: drh tags: trunk)
15:14
Fix harmless compiler warnings. (check-in: b1435f26b0 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/expr.c.
29
30
31
32
33
34
35

36
37
38
39
40
41
42
** SELECT * FROM t1 WHERE a;
** SELECT a AS b FROM t1 WHERE b;
** SELECT * FROM t1 WHERE (select a from t1);
*/
char sqlite3ExprAffinity(Expr *pExpr){
  int op;
  pExpr = sqlite3ExprSkipCollate(pExpr);

  op = pExpr->op;
  if( op==TK_SELECT ){
    assert( pExpr->flags&EP_xIsSelect );
    return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr);
  }
#ifndef SQLITE_OMIT_CAST
  if( op==TK_CAST ){







>







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
** SELECT * FROM t1 WHERE a;
** SELECT a AS b FROM t1 WHERE b;
** SELECT * FROM t1 WHERE (select a from t1);
*/
char sqlite3ExprAffinity(Expr *pExpr){
  int op;
  pExpr = sqlite3ExprSkipCollate(pExpr);
  if( pExpr->flags & EP_Generic ) return SQLITE_AFF_NONE;
  op = pExpr->op;
  if( op==TK_SELECT ){
    assert( pExpr->flags&EP_xIsSelect );
    return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr);
  }
#ifndef SQLITE_OMIT_CAST
  if( op==TK_CAST ){
118
119
120
121
122
123
124

125
126
127
128
129
130
131
*/
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr){
  sqlite3 *db = pParse->db;
  CollSeq *pColl = 0;
  Expr *p = pExpr;
  while( p ){
    int op = p->op;

    if( op==TK_CAST || op==TK_UPLUS ){
      p = p->pLeft;
      continue;
    }
    if( op==TK_COLLATE || (op==TK_REGISTER && p->op2==TK_COLLATE) ){
      pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken);
      break;







>







119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
*/
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr){
  sqlite3 *db = pParse->db;
  CollSeq *pColl = 0;
  Expr *p = pExpr;
  while( p ){
    int op = p->op;
    if( p->flags & EP_Generic ) break;
    if( op==TK_CAST || op==TK_UPLUS ){
      p = p->pLeft;
      continue;
    }
    if( op==TK_COLLATE || (op==TK_REGISTER && p->op2==TK_COLLATE) ){
      pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken);
      break;
Changes to src/parse.y.
1022
1023
1024
1025
1026
1027
1028
1029

1030
1031
1032





1033
1034
1035
1036
1037



1038
1039
1040
1041
1042
1043
1044
      sqlite3ExprDelete(pParse->db, X.pExpr);
    }else if( Y->nExpr==1 ){
      /* Expressions of the form:
      **
      **      expr1 IN (?1)
      **      expr1 NOT IN (?2)
      **
      ** with exactly one value on the RHS can be simplified to:

      **
      **      expr1 == (+?1 COLLATE binary)
      **      expr1 <> (+?2 COLLATE binary)





      */
      Expr *pRHS = sqlite3ExprAddCollateString(pParse, Y->a[0].pExpr, "binary");
      Y->a[0].pExpr = 0;
      sqlite3ExprListDelete(pParse->db, Y);
      pRHS = sqlite3PExpr(pParse, TK_UPLUS, pRHS, 0, 0);



      A.pExpr = sqlite3PExpr(pParse, N ? TK_NE : TK_EQ, X.pExpr, pRHS, 0);
    }else{
      A.pExpr = sqlite3PExpr(pParse, TK_IN, X.pExpr, 0, 0);
      if( A.pExpr ){
        A.pExpr->x.pList = Y;
        sqlite3ExprSetHeight(pParse, A.pExpr);
      }else{







|
>

|
|
>
>
>
>
>

|


|
>
>
>







1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
      sqlite3ExprDelete(pParse->db, X.pExpr);
    }else if( Y->nExpr==1 ){
      /* Expressions of the form:
      **
      **      expr1 IN (?1)
      **      expr1 NOT IN (?2)
      **
      ** with exactly one value on the RHS can be simplified to something
      ** like this:
      **
      **      expr1 == ?1
      **      expr1 <> ?2
      **
      ** But, the RHS of the == or <> is marked with the EP_Generic flag
      ** so that it may not contribute to the computation of comparison
      ** affinity or the collating sequence to use for comparison.  Otherwise,
      ** the semantics would be subtly different from IN or NOT IN.
      */
      Expr *pRHS = Y->a[0].pExpr;
      Y->a[0].pExpr = 0;
      sqlite3ExprListDelete(pParse->db, Y);
      if( pRHS ){
        pRHS->flags &= ~EP_Collate;
        pRHS->flags |= EP_Generic;
      }
      A.pExpr = sqlite3PExpr(pParse, N ? TK_NE : TK_EQ, X.pExpr, pRHS, 0);
    }else{
      A.pExpr = sqlite3PExpr(pParse, TK_IN, X.pExpr, 0, 0);
      if( A.pExpr ){
        A.pExpr->x.pList = Y;
        sqlite3ExprSetHeight(pParse, A.pExpr);
      }else{
Changes to src/sqliteInt.h.
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
#define EP_Agg       0x000002 /* Contains one or more aggregate functions */
#define EP_Resolved  0x000004 /* IDs have been resolved to COLUMNs */
#define EP_Error     0x000008 /* Expression contains one or more errors */
#define EP_Distinct  0x000010 /* Aggregate function with DISTINCT keyword */
#define EP_VarSelect 0x000020 /* pSelect is correlated, not constant */
#define EP_DblQuoted 0x000040 /* token.z was originally in "..." */
#define EP_InfixFunc 0x000080 /* True for an infix function: LIKE, GLOB, etc */
#define EP_Collate   0x000100 /* Tree contains a TK_COLLATE opeartor */
      /* unused      0x000200 */
#define EP_IntValue  0x000400 /* Integer value contained in u.iValue */
#define EP_xIsSelect 0x000800 /* x.pSelect is valid (otherwise x.pList is) */
#define EP_Skip      0x001000 /* COLLATE, AS, or UNLIKELY */
#define EP_Reduced   0x002000 /* Expr struct EXPR_REDUCEDSIZE bytes only */
#define EP_TokenOnly 0x004000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */
#define EP_Static    0x008000 /* Held in memory not obtained from malloc() */
#define EP_MemToken  0x010000 /* Need to sqlite3DbFree() Expr.zToken */







|
|







1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
#define EP_Agg       0x000002 /* Contains one or more aggregate functions */
#define EP_Resolved  0x000004 /* IDs have been resolved to COLUMNs */
#define EP_Error     0x000008 /* Expression contains one or more errors */
#define EP_Distinct  0x000010 /* Aggregate function with DISTINCT keyword */
#define EP_VarSelect 0x000020 /* pSelect is correlated, not constant */
#define EP_DblQuoted 0x000040 /* token.z was originally in "..." */
#define EP_InfixFunc 0x000080 /* True for an infix function: LIKE, GLOB, etc */
#define EP_Collate   0x000100 /* Tree contains a TK_COLLATE operator */
#define EP_Generic   0x000200 /* Ignore COLLATE or affinity on this tree */
#define EP_IntValue  0x000400 /* Integer value contained in u.iValue */
#define EP_xIsSelect 0x000800 /* x.pSelect is valid (otherwise x.pList is) */
#define EP_Skip      0x001000 /* COLLATE, AS, or UNLIKELY */
#define EP_Reduced   0x002000 /* Expr struct EXPR_REDUCEDSIZE bytes only */
#define EP_TokenOnly 0x004000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */
#define EP_Static    0x008000 /* Held in memory not obtained from malloc() */
#define EP_MemToken  0x010000 /* Need to sqlite3DbFree() Expr.zToken */
Changes to test/in4.test.
298
299
300
301
302
303
304








305



306



















307
308
309
do_execsql_test in4-4.18 {
  SELECT c FROM t4b WHERE b IN (a);
} {4}
do_execsql_test in4-4.19 {
  SELECT c FROM t4b WHERE +b IN (a);
} {}



































finish_test







>
>
>
>
>
>
>
>

>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
do_execsql_test in4-4.18 {
  SELECT c FROM t4b WHERE b IN (a);
} {4}
do_execsql_test in4-4.19 {
  SELECT c FROM t4b WHERE +b IN (a);
} {}

do_execsql_test in4-5.1 {
  CREATE TABLE t5(c INTEGER PRIMARY KEY, d TEXT COLLATE nocase);
  INSERT INTO t5 VALUES(17, 'fuzz');
  SELECT 1 FROM t5 WHERE 'fuzz' IN (d);  -- match
  SELECT 2 FROM t5 WHERE 'FUZZ' IN (d);  -- no match
  SELECT 3 FROM t5 WHERE d IN ('fuzz');  -- match
  SELECT 4 FROM t5 WHERE d IN ('FUZZ');  -- match
} {1 3 4}

# An expression of the form "x IN (y)" can be used as "x=y" by the
# query planner when computing transitive constraints or to run the
# query using an index on y.
#
do_execsql_test in4-6.1 {
  CREATE TABLE t6a(a INTEGER PRIMARY KEY, b);
  INSERT INTO t6a VALUES(1,2),(3,4),(5,6);
  CREATE TABLE t6b(c INTEGER PRIMARY KEY, d);
  INSERT INTO t6b VALUES(4,44),(5,55),(6,66);

  SELECT * FROM t6a, t6b WHERE a=3 AND b IN (c);
} {3 4 4 44}
do_execsql_test in4-6.1-eqp {
  EXPLAIN QUERY PLAN
  SELECT * FROM t6a, t6b WHERE a=3 AND b IN (c);
} {~/SCAN/}
do_execsql_test in4-6.2 {
  SELECT * FROM t6a, t6b WHERE a=3 AND c IN (b);
} {3 4 4 44}
do_execsql_test in4-6.2-eqp {
  EXPLAIN QUERY PLAN
  SELECT * FROM t6a, t6b WHERE a=3 AND c IN (b);
} {~/SCAN/}


finish_test