Index: ext/rtree/rtree.c ================================================================== --- ext/rtree/rtree.c +++ ext/rtree/rtree.c @@ -10,10 +10,49 @@ ** ************************************************************************* ** This file contains code for implementations of the r-tree and r*-tree ** algorithms packaged as an SQLite virtual table module. */ + +/* +** Database Format of R-Tree Tables +** -------------------------------- +** +** The data structure for a single virtual r-tree table is stored in three +** native SQLite tables declared as follows. In each case, the '%' character +** in the table name is replaced with the user-supplied name of the r-tree +** table. +** +** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) +** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) +** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) +** +** The data for each node of the r-tree structure is stored in the %_node +** table. For each node that is not the root node of the r-tree, there is +** an entry in the %_parent table associating the node with its parent. +** And for each row of data in the table, there is an entry in the %_rowid +** table that maps from the entries rowid to the id of the node that it +** is stored on. +** +** The root node of an r-tree always exists, even if the r-tree table is +** empty. The nodeno of the root node is always 1. All other nodes in the +** table must be the same size as the root node. The content of each node +** is formatted as follows: +** +** 1. If the node is the root node (node 1), then the first 2 bytes +** of the node contain the tree depth as a big-endian integer. +** For non-root nodes, the first 2 bytes are left unused. +** +** 2. The next 2 bytes contain the number of entries currently +** stored in the node. +** +** 3. The remainder of the node contains the node entries. Each entry +** consists of a single 8-byte integer followed by an even number +** of 4-byte coordinates. For leaf nodes the integer is the rowid +** of a record. For internal nodes it is the node number of a +** child page. +*/ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) /* ** This file contains an implementation of a couple of different variants @@ -51,10 +90,13 @@ #endif #if VARIANT_RSTARTREE_SPLIT #define AssignCells splitNodeStartree #endif +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 +#endif #ifndef SQLITE_CORE #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #else @@ -61,13 +103,13 @@ #include "sqlite3.h" #endif #include #include -#include #ifndef SQLITE_AMALGAMATION +#include "sqlite3rtree.h" typedef sqlite3_int64 i64; typedef unsigned char u8; typedef unsigned int u32; #endif @@ -74,10 +116,12 @@ typedef struct Rtree Rtree; typedef struct RtreeCursor RtreeCursor; typedef struct RtreeNode RtreeNode; typedef struct RtreeCell RtreeCell; typedef struct RtreeConstraint RtreeConstraint; +typedef struct RtreeMatchArg RtreeMatchArg; +typedef struct RtreeGeomCallback RtreeGeomCallback; typedef union RtreeCoord RtreeCoord; /* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ #define RTREE_MAX_DIMENSIONS 5 @@ -143,10 +187,19 @@ */ #define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) #define RTREE_REINSERT(p) RTREE_MINCELLS(p) #define RTREE_MAXCELLS 51 +/* +** The smallest possible node-size is (512-64)==448 bytes. And the largest +** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). +** Therefore all non-root nodes must contain at least 3 entries. Since +** 2^40 is greater than 2^64, an r-tree structure always has a depth of +** 40 or less. +*/ +#define RTREE_MAX_DEPTH 40 + /* ** An rtree cursor object. */ struct RtreeCursor { sqlite3_vtab_cursor base; @@ -175,39 +228,27 @@ /* ** A search constraint. */ struct RtreeConstraint { - int iCoord; /* Index of constrained coordinate */ - int op; /* Constraining operation */ - double rValue; /* Constraint value. */ + int iCoord; /* Index of constrained coordinate */ + int op; /* Constraining operation */ + double rValue; /* Constraint value. */ + int (*xGeom)(sqlite3_rtree_geometry *, int, double *, int *); + sqlite3_rtree_geometry *pGeom; /* Constraint callback argument for a MATCH */ }; /* Possible values for RtreeConstraint.op */ -#define RTREE_EQ 0x41 -#define RTREE_LE 0x42 -#define RTREE_LT 0x43 -#define RTREE_GE 0x44 -#define RTREE_GT 0x45 +#define RTREE_EQ 0x41 +#define RTREE_LE 0x42 +#define RTREE_LT 0x43 +#define RTREE_GE 0x44 +#define RTREE_GT 0x45 +#define RTREE_MATCH 0x46 /* ** An rtree structure node. -** -** Data format (RtreeNode.zData): -** -** 1. If the node is the root node (node 1), then the first 2 bytes -** of the node contain the tree depth as a big-endian integer. -** For non-root nodes, the first 2 bytes are left unused. -** -** 2. The next 2 bytes contain the number of entries currently -** stored in the node. -** -** 3. The remainder of the node contains the node entries. Each entry -** consists of a single 8-byte integer followed by an even number -** of 4-byte coordinates. For leaf nodes the integer is the rowid -** of a record. For internal nodes it is the node number of a -** child page. */ struct RtreeNode { RtreeNode *pParent; /* Parent node */ i64 iNode; int nRef; @@ -223,10 +264,44 @@ struct RtreeCell { i64 iRowid; RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; }; + +/* +** Value for the first field of every RtreeMatchArg object. The MATCH +** operator tests that the first field of a blob operand matches this +** value to avoid operating on invalid blobs (which could cause a segfault). +*/ +#define RTREE_GEOMETRY_MAGIC 0x891245AB + +/* +** An instance of this structure must be supplied as a blob argument to +** the right-hand-side of an SQL MATCH operator used to constrain an +** r-tree query. +*/ +struct RtreeMatchArg { + u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ + int (*xGeom)(sqlite3_rtree_geometry *, int, double *, int *); + void *pContext; + int nParam; + double aParam[1]; +}; + +/* +** When a geometry callback is created (see sqlite3_rtree_geometry_callback), +** a single instance of the following structure is allocated. It is used +** as the context for the user-function created by by s_r_g_c(). The object +** is eventually deleted by the destructor mechanism provided by +** sqlite3_create_function_v2() (which is called by s_r_g_c() to create +** the geometry callback function). +*/ +struct RtreeGeomCallback { + int (*xGeom)(sqlite3_rtree_geometry *, int, double *, int *); + void *pContext; +}; + #ifndef MAX # define MAX(x,y) ((x) < (y) ? (y) : (x)) #endif #ifndef MIN # define MIN(x,y) ((x) > (y) ? (y) : (x)) @@ -305,14 +380,12 @@ /* ** Clear the content of node p (set all bytes to 0x00). */ static void nodeZero(Rtree *pRtree, RtreeNode *p){ - if( p ){ - memset(&p->zData[2], 0, pRtree->iNodeSize-2); - p->isDirty = 1; - } + memset(&p->zData[2], 0, pRtree->iNodeSize-2); + p->isDirty = 1; } /* ** Given a node number iNode, return the corresponding key to use ** in the Rtree.aHash table. @@ -328,26 +401,23 @@ ** Search the node hash table for node iNode. If found, return a pointer ** to it. Otherwise, return 0. */ static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ RtreeNode *p; - assert( iNode!=0 ); for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); return p; } /* ** Add node pNode to the node hash table. */ static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ - if( pNode ){ - int iHash; - assert( pNode->pNext==0 ); - iHash = nodeHash(pNode->iNode); - pNode->pNext = pRtree->aHash[iHash]; - pRtree->aHash[iHash] = pNode; - } + int iHash; + assert( pNode->pNext==0 ); + iHash = nodeHash(pNode->iNode); + pNode->pNext = pRtree->aHash[iHash]; + pRtree->aHash[iHash] = pNode; } /* ** Remove node pNode from the node hash table. */ @@ -365,15 +435,15 @@ ** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), ** indicating that node has not yet been assigned a node number. It is ** assigned a node number when nodeWrite() is called to write the ** node contents out to the database. */ -static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent, int zero){ +static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ RtreeNode *pNode; pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); if( pNode ){ - memset(pNode, 0, sizeof(RtreeNode) + (zero?pRtree->iNodeSize:0)); + memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); pNode->zData = (u8 *)&pNode[1]; pNode->nRef = 1; pNode->pParent = pParent; pNode->isDirty = 1; nodeReference(pParent); @@ -390,10 +460,11 @@ i64 iNode, /* Node number to load */ RtreeNode *pParent, /* Either the parent node or NULL */ RtreeNode **ppNode /* OUT: Acquired node */ ){ int rc; + int rc2 = SQLITE_OK; RtreeNode *pNode; /* Check if the requested node is already in the hash table. If so, ** increase its reference count and return it. */ @@ -406,43 +477,67 @@ pNode->nRef++; *ppNode = pNode; return SQLITE_OK; } - pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); - if( !pNode ){ - *ppNode = 0; - return SQLITE_NOMEM; - } - pNode->pParent = pParent; - pNode->zData = (u8 *)&pNode[1]; - pNode->nRef = 1; - pNode->iNode = iNode; - pNode->isDirty = 0; - pNode->pNext = 0; - sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); rc = sqlite3_step(pRtree->pReadNode); if( rc==SQLITE_ROW ){ const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); - assert( sqlite3_column_bytes(pRtree->pReadNode, 0)==pRtree->iNodeSize ); - memcpy(pNode->zData, zBlob, pRtree->iNodeSize); - nodeReference(pParent); + if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ + pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); + if( !pNode ){ + rc2 = SQLITE_NOMEM; + }else{ + pNode->pParent = pParent; + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pNode->iNode = iNode; + pNode->isDirty = 0; + pNode->pNext = 0; + memcpy(pNode->zData, zBlob, pRtree->iNodeSize); + nodeReference(pParent); + } + } + } + rc = sqlite3_reset(pRtree->pReadNode); + if( rc==SQLITE_OK ) rc = rc2; + + /* If the root node was just loaded, set pRtree->iDepth to the height + ** of the r-tree structure. A height of zero means all data is stored on + ** the root node. A height of one means the children of the root node + ** are the leaves, and so on. If the depth as specified on the root node + ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. + */ + if( pNode && iNode==1 ){ + pRtree->iDepth = readInt16(pNode->zData); + if( pRtree->iDepth>RTREE_MAX_DEPTH ){ + rc = SQLITE_CORRUPT; + } + } + + /* If no error has occurred so far, check if the "number of entries" + ** field on the node is too large. If so, set the return code to + ** SQLITE_CORRUPT. + */ + if( pNode && rc==SQLITE_OK ){ + if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ + rc = SQLITE_CORRUPT; + } + } + + if( rc==SQLITE_OK ){ + if( pNode!=0 ){ + nodeHashInsert(pRtree, pNode); + }else{ + rc = SQLITE_CORRUPT; + } + *ppNode = pNode; }else{ sqlite3_free(pNode); - pNode = 0; - } - - *ppNode = pNode; - rc = sqlite3_reset(pRtree->pReadNode); - - if( rc==SQLITE_OK && iNode==1 ){ - pRtree->iDepth = readInt16(pNode->zData); - } - - assert( (rc==SQLITE_OK && pNode) || (pNode==0 && rc!=SQLITE_OK) ); - nodeHashInsert(pRtree, pNode); + *ppNode = 0; + } return rc; } /* @@ -491,12 +586,11 @@ int nMaxCell; /* Maximum number of cells for pNode */ nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; nCell = NCELL(pNode); - assert(nCell<=nMaxCell); - + assert( nCell<=nMaxCell ); if( nCellzData[2], nCell+1); pNode->isDirty = 1; } @@ -712,18 +806,37 @@ *ppCursor = (sqlite3_vtab_cursor *)pCsr; return rc; } + +/* +** Free the RtreeCursor.aConstraint[] array and its contents. +*/ +static void freeCursorConstraints(RtreeCursor *pCsr){ + if( pCsr->aConstraint ){ + int i; /* Used to iterate through constraint array */ + for(i=0; inConstraint; i++){ + sqlite3_rtree_geometry *pGeom = pCsr->aConstraint[i].pGeom; + if( pGeom ){ + if( pGeom->xDelUser ) pGeom->xDelUser(pGeom->pUser); + sqlite3_free(pGeom); + } + } + sqlite3_free(pCsr->aConstraint); + pCsr->aConstraint = 0; + } +} + /* ** Rtree virtual table module xClose method. */ static int rtreeClose(sqlite3_vtab_cursor *cur){ Rtree *pRtree = (Rtree *)(cur->pVtab); int rc; RtreeCursor *pCsr = (RtreeCursor *)cur; - sqlite3_free(pCsr->aConstraint); + freeCursorConstraints(pCsr); rc = nodeRelease(pRtree, pCsr->pNode); sqlite3_free(pCsr); return rc; } @@ -735,18 +848,44 @@ */ static int rtreeEof(sqlite3_vtab_cursor *cur){ RtreeCursor *pCsr = (RtreeCursor *)cur; return (pCsr->pNode==0); } + +/* +** The r-tree constraint passed as the second argument to this function is +** guaranteed to be a MATCH constraint. +*/ +static int testRtreeGeom( + Rtree *pRtree, /* R-Tree object */ + RtreeConstraint *pConstraint, /* MATCH constraint to test */ + RtreeCell *pCell, /* Cell to test */ + int *pbRes /* OUT: Test result */ +){ + int i; + double aCoord[RTREE_MAX_DIMENSIONS*2]; + int nCoord = pRtree->nDim*2; + + assert( pConstraint->op==RTREE_MATCH ); + assert( pConstraint->pGeom ); + + for(i=0; iaCoord[i]); + } + return pConstraint->xGeom(pConstraint->pGeom, nCoord, aCoord, pbRes); +} /* ** Cursor pCursor currently points to a cell in a non-leaf page. -** Return true if the sub-tree headed by the cell is filtered +** Set *pbEof to true if the sub-tree headed by the cell is filtered ** (excluded) by the constraints in the pCursor->aConstraint[] ** array, or false otherwise. +** +** Return SQLITE_OK if successful or an SQLite error code if an error +** occurs within a geometry callback. */ -static int testRtreeCell(Rtree *pRtree, RtreeCursor *pCursor){ +static int testRtreeCell(Rtree *pRtree, RtreeCursor *pCursor, int *pbEof){ RtreeCell cell; int ii; int bRes = 0; nodeGetCell(pRtree, pCursor->pNode, pCursor->iCell, &cell); @@ -754,56 +893,92 @@ RtreeConstraint *p = &pCursor->aConstraint[ii]; double cell_min = DCOORD(cell.aCoord[(p->iCoord>>1)*2]); double cell_max = DCOORD(cell.aCoord[(p->iCoord>>1)*2+1]); assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ + || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_MATCH ); switch( p->op ){ - case RTREE_LE: case RTREE_LT: bRes = p->rValuerValue>cell_max; break; - case RTREE_EQ: + case RTREE_LE: case RTREE_LT: + bRes = p->rValuerValue>cell_max; + break; + + case RTREE_EQ: bRes = (p->rValue>cell_max || p->rValueop==RTREE_MATCH ); + rc = testRtreeGeom(pRtree, p, &cell, &bRes); + if( rc!=SQLITE_OK ){ + return rc; + } + bRes = !bRes; + break; + } } } - return bRes; + *pbEof = bRes; + return SQLITE_OK; } /* -** Return true if the cell that cursor pCursor currently points to +** Test if the cell that cursor pCursor currently points to ** would be filtered (excluded) by the constraints in the -** pCursor->aConstraint[] array, or false otherwise. +** pCursor->aConstraint[] array. If so, set *pbEof to true before +** returning. If the cell is not filtered (excluded) by the constraints, +** set pbEof to zero. +** +** Return SQLITE_OK if successful or an SQLite error code if an error +** occurs within a geometry callback. ** ** This function assumes that the cell is part of a leaf node. */ -static int testRtreeEntry(Rtree *pRtree, RtreeCursor *pCursor){ +static int testRtreeEntry(Rtree *pRtree, RtreeCursor *pCursor, int *pbEof){ RtreeCell cell; int ii; + *pbEof = 0; nodeGetCell(pRtree, pCursor->pNode, pCursor->iCell, &cell); for(ii=0; iinConstraint; ii++){ RtreeConstraint *p = &pCursor->aConstraint[ii]; double coord = DCOORD(cell.aCoord[p->iCoord]); int res; assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ + || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_MATCH ); switch( p->op ){ case RTREE_LE: res = (coord<=p->rValue); break; case RTREE_LT: res = (coordrValue); break; case RTREE_GE: res = (coord>=p->rValue); break; case RTREE_GT: res = (coord>p->rValue); break; case RTREE_EQ: res = (coord==p->rValue); break; + default: { + int rc; + assert( p->op==RTREE_MATCH ); + rc = testRtreeGeom(pRtree, p, &cell, &res); + if( rc!=SQLITE_OK ){ + return rc; + } + break; + } } - if( !res ) return 1; + if( !res ){ + *pbEof = 1; + return SQLITE_OK; + } } - return 0; + return SQLITE_OK; } /* ** Cursor pCursor currently points at a node that heads a sub-tree of ** height iHeight (if iHeight==0, then the node is a leaf). Descend @@ -826,17 +1001,17 @@ int iSavedCell = pCursor->iCell; assert( iHeight>=0 ); if( iHeight==0 ){ - isEof = testRtreeEntry(pRtree, pCursor); + rc = testRtreeEntry(pRtree, pCursor, &isEof); }else{ - isEof = testRtreeCell(pRtree, pCursor); + rc = testRtreeCell(pRtree, pCursor, &isEof); } - if( isEof || iHeight==0 ){ + if( rc!=SQLITE_OK || isEof || iHeight==0 ){ *pEof = isEof; - return SQLITE_OK; + return rc; } iRowid = nodeGetRowid(pRtree, pCursor->pNode, pCursor->iCell); rc = nodeAcquire(pRtree, iRowid, pCursor->pNode, &pChild); if( rc!=SQLITE_OK ){ @@ -868,45 +1043,59 @@ /* ** One of the cells in node pNode is guaranteed to have a 64-bit ** integer value equal to iRowid. Return the index of this cell. */ -static int nodeRowidIndex(Rtree *pRtree, RtreeNode *pNode, i64 iRowid){ +static int nodeRowidIndex( + Rtree *pRtree, + RtreeNode *pNode, + i64 iRowid, + int *piIndex +){ int ii; - for(ii=0; nodeGetRowid(pRtree, pNode, ii)!=iRowid; ii++){ - assert( ii<(NCELL(pNode)-1) ); + int nCell = NCELL(pNode); + for(ii=0; iipParent; if( pParent ){ - return nodeRowidIndex(pRtree, pParent, pNode->iNode); + return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); } - return -1; + *piIndex = -1; + return SQLITE_OK; } /* ** Rtree virtual table module xNext method. */ static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ Rtree *pRtree = (Rtree *)(pVtabCursor->pVtab); RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; int rc = SQLITE_OK; + + /* RtreeCursor.pNode must not be NULL. If is is NULL, then this cursor is + ** already at EOF. It is against the rules to call the xNext() method of + ** a cursor that has already reached EOF. + */ + assert( pCsr->pNode ); if( pCsr->iStrategy==1 ){ /* This "scan" is a direct lookup by rowid. There is no next entry. */ nodeRelease(pRtree, pCsr->pNode); pCsr->pNode = 0; - } - - else if( pCsr->pNode ){ + }else{ /* Move to the next entry that matches the configured constraints. */ int iHeight = 0; while( pCsr->pNode ){ RtreeNode *pNode = pCsr->pNode; int nCell = NCELL(pNode); @@ -916,11 +1105,14 @@ if( rc!=SQLITE_OK || !isEof ){ return rc; } } pCsr->pNode = pNode->pParent; - pCsr->iCell = nodeParentIndex(pRtree, pNode); + rc = nodeParentIndex(pRtree, pNode, &pCsr->iCell); + if( rc!=SQLITE_OK ){ + return rc; + } nodeReference(pCsr->pNode); nodeRelease(pRtree, pNode); iHeight++; } } @@ -984,10 +1176,55 @@ rc = sqlite3_reset(pRtree->pReadRowid); } return rc; } +/* +** This function is called to configure the RtreeConstraint object passed +** as the second argument for a MATCH constraint. The value passed as the +** first argument to this function is the right-hand operand to the MATCH +** operator. +*/ +static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ + RtreeMatchArg *p; + sqlite3_rtree_geometry *pGeom; + int nBlob; + + /* Check that value is actually a blob. */ + if( !sqlite3_value_type(pValue)==SQLITE_BLOB ) return SQLITE_ERROR; + + /* Check that the blob is roughly the right size. */ + nBlob = sqlite3_value_bytes(pValue); + if( nBlobmagic!=RTREE_GEOMETRY_MAGIC + || nBlob!=(sizeof(RtreeMatchArg) + (p->nParam-1)*sizeof(double)) + ){ + sqlite3_free(pGeom); + return SQLITE_ERROR; + } + + pGeom->pContext = p->pContext; + pGeom->nParam = p->nParam; + pGeom->aParam = p->aParam; + + pCons->xGeom = p->xGeom; + pCons->pGeom = pGeom; + return SQLITE_OK; +} /* ** Rtree virtual table module xFilter method. */ static int rtreeFilter( @@ -1002,22 +1239,22 @@ int ii; int rc = SQLITE_OK; rtreeReference(pRtree); - sqlite3_free(pCsr->aConstraint); - pCsr->aConstraint = 0; + freeCursorConstraints(pCsr); pCsr->iStrategy = idxNum; if( idxNum==1 ){ /* Special case - lookup by rowid. */ RtreeNode *pLeaf; /* Leaf on which the required cell resides */ i64 iRowid = sqlite3_value_int64(argv[0]); rc = findLeafNode(pRtree, iRowid, &pLeaf); pCsr->pNode = pLeaf; - if( pLeaf && rc==SQLITE_OK ){ - pCsr->iCell = nodeRowidIndex(pRtree, pLeaf, iRowid); + if( pLeaf ){ + assert( rc==SQLITE_OK ); + rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &pCsr->iCell); } }else{ /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array ** with the configured constraints. */ @@ -1025,16 +1262,28 @@ pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); pCsr->nConstraint = argc; if( !pCsr->aConstraint ){ rc = SQLITE_NOMEM; }else{ + memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); assert( (idxStr==0 && argc==0) || strlen(idxStr)==argc*2 ); for(ii=0; iiaConstraint[ii]; p->op = idxStr[ii*2]; p->iCoord = idxStr[ii*2+1]-'a'; - p->rValue = sqlite3_value_double(argv[ii]); + if( p->op==RTREE_MATCH ){ + /* A MATCH operator. The right-hand-side must be a blob that + ** can be cast into an RtreeMatchArg object. One created using + ** an sqlite3_rtree_geometry_callback() SQL user function. + */ + rc = deserializeGeometry(argv[ii], p); + if( rc!=SQLITE_OK ){ + break; + } + }else{ + p->rValue = sqlite3_value_double(argv[ii]); + } } } } if( rc==SQLITE_OK ){ @@ -1071,15 +1320,14 @@ ** least desirable): ** ** idxNum idxStr Strategy ** ------------------------------------------------ ** 1 Unused Direct lookup by rowid. -** 2 See below R-tree query. -** 3 Unused Full table scan. +** 2 See below R-tree query or full-table scan. ** ------------------------------------------------ ** -** If strategy 1 or 3 is used, then idxStr is not meaningful. If strategy +** If strategy 1 is used, then idxStr is not meaningful. If strategy ** 2 is used, idxStr is formatted to contain 2 bytes for each ** constraint used. The first two bytes of idxStr correspond to ** the constraint in sqlite3_index_info.aConstraintUsage[] with ** (argvIndex==1) etc. ** @@ -1091,10 +1339,11 @@ ** = 0x41 ('A') ** <= 0x42 ('B') ** < 0x43 ('C') ** >= 0x44 ('D') ** > 0x45 ('E') +** MATCH 0x46 ('F') ** ---------------------- ** ** The second of each pair of bytes identifies the coordinate column ** to which the constraint applies. The leftmost coordinate column ** is 'a', the second from the left 'b' etc. @@ -1129,43 +1378,47 @@ */ pIdxInfo->estimatedCost = 10.0; return SQLITE_OK; } - if( p->usable && p->iColumn>0 ){ + if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ + int j, opmsk; + static const unsigned char compatible[] = { 0, 0, 1, 1, 2, 2 }; u8 op = 0; switch( p->op ){ case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; - } - if( op ){ - /* Make sure this particular constraint has not been used before. - ** If it has been used before, ignore it. - ** - ** A <= or < can be used if there is a prior >= or >. - ** A >= or > can be used if there is a prior < or <=. - ** A <= or < is disqualified if there is a prior <=, <, or ==. - ** A >= or > is disqualified if there is a prior >=, >, or ==. - ** A == is disqualifed if there is any prior constraint. - */ - int j, opmsk; - static const unsigned char compatible[] = { 0, 0, 1, 1, 2, 2 }; - assert( compatible[RTREE_EQ & 7]==0 ); - assert( compatible[RTREE_LT & 7]==1 ); - assert( compatible[RTREE_LE & 7]==1 ); - assert( compatible[RTREE_GT & 7]==2 ); - assert( compatible[RTREE_GE & 7]==2 ); - cCol = p->iColumn - 1 + 'a'; - opmsk = compatible[op & 7]; - for(j=0; jop==SQLITE_INDEX_CONSTRAINT_MATCH ); + op = RTREE_MATCH; + break; + } + assert( op!=0 ); + + /* Make sure this particular constraint has not been used before. + ** If it has been used before, ignore it. + ** + ** A <= or < can be used if there is a prior >= or >. + ** A >= or > can be used if there is a prior < or <=. + ** A <= or < is disqualified if there is a prior <=, <, or ==. + ** A >= or > is disqualified if there is a prior >=, >, or ==. + ** A == is disqualifed if there is any prior constraint. + */ + assert( compatible[RTREE_EQ & 7]==0 ); + assert( compatible[RTREE_LT & 7]==1 ); + assert( compatible[RTREE_LE & 7]==1 ); + assert( compatible[RTREE_GT & 7]==2 ); + assert( compatible[RTREE_GE & 7]==2 ); + cCol = p->iColumn - 1 + 'a'; + opmsk = compatible[op & 7]; + for(j=0; jnDim*2); jj+=2){ double x1; double x2; @@ -1362,26 +1620,35 @@ /* Select the child node which will be enlarged the least if pCell ** is inserted into it. Resolve ties by choosing the entry with ** the smallest area. */ for(iCell=0; iCelliDepth-1) ){ overlap = cellOverlapEnlargement(pRtree,&cell,pCell,aCell,nCell,iCell); } -#endif if( (iCell==0) || (overlappParent ){ - RtreeCell cell; RtreeNode *pParent = p->pParent; - int iCell = nodeParentIndex(pRtree, p); + RtreeCell cell; + int iCell; + + if( nodeParentIndex(pRtree, p, &iCell) ){ + return SQLITE_CORRUPT; + } nodeGetCell(pRtree, pParent, iCell, &cell); if( !cellContains(pRtree, &cell, pCell) ){ cellUnion(pRtree, &cell, pCell); nodeOverwriteCell(pRtree, pParent, &cell, iCell); } p = pParent; } + return SQLITE_OK; } /* ** Write mapping (iRowid->iNode) to the _rowid table. */ @@ -1947,18 +2219,18 @@ nodeZero(pRtree, pNode); memcpy(&aCell[nCell], pCell, sizeof(RtreeCell)); nCell++; if( pNode->iNode==1 ){ - pRight = nodeNew(pRtree, pNode, 1); - pLeft = nodeNew(pRtree, pNode, 1); + pRight = nodeNew(pRtree, pNode); + pLeft = nodeNew(pRtree, pNode); pRtree->iDepth++; pNode->isDirty = 1; writeInt16(pNode->zData, pRtree->iDepth); }else{ pLeft = pNode; - pRight = nodeNew(pRtree, pLeft->pParent, 1); + pRight = nodeNew(pRtree, pLeft->pParent); nodeReference(pLeft); } if( !pLeft || !pRight ){ rc = SQLITE_NOMEM; @@ -1971,12 +2243,16 @@ rc = AssignCells(pRtree, aCell, nCell, pLeft, pRight, &leftbbox, &rightbbox); if( rc!=SQLITE_OK ){ goto splitnode_out; } - /* Ensure both child nodes have node numbers assigned to them. */ - if( (0==pRight->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pRight))) + /* Ensure both child nodes have node numbers assigned to them by calling + ** nodeWrite(). Node pRight always needs a node number, as it was created + ** by nodeNew() above. But node pLeft sometimes already has a node number. + ** In this case avoid the all to nodeWrite(). + */ + if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) ){ goto splitnode_out; } @@ -1988,13 +2264,19 @@ if( rc!=SQLITE_OK ){ goto splitnode_out; } }else{ RtreeNode *pParent = pLeft->pParent; - int iCell = nodeParentIndex(pRtree, pLeft); - nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); - AdjustTree(pRtree, pParent, &leftbbox); + int iCell; + rc = nodeParentIndex(pRtree, pLeft, &iCell); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); + rc = AdjustTree(pRtree, pParent, &leftbbox); + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } } if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ goto splitnode_out; } @@ -2034,44 +2316,73 @@ nodeRelease(pRtree, pLeft); sqlite3_free(aCell); return rc; } +/* +** If node pLeaf is not the root of the r-tree and its pParent pointer is +** still NULL, load all ancestor nodes of pLeaf into memory and populate +** the pLeaf->pParent chain all the way up to the root node. +** +** This operation is required when a row is deleted (or updated - an update +** is implemented as a delete followed by an insert). SQLite provides the +** rowid of the row to delete, which can be used to find the leaf on which +** the entry resides (argument pLeaf). Once the leaf is located, this +** function is called to determine its ancestry. +*/ static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ int rc = SQLITE_OK; - if( pLeaf->iNode!=1 && pLeaf->pParent==0 ){ - sqlite3_bind_int64(pRtree->pReadParent, 1, pLeaf->iNode); - if( sqlite3_step(pRtree->pReadParent)==SQLITE_ROW ){ - i64 iNode = sqlite3_column_int64(pRtree->pReadParent, 0); - rc = nodeAcquire(pRtree, iNode, 0, &pLeaf->pParent); - }else{ - rc = SQLITE_ERROR; - } - sqlite3_reset(pRtree->pReadParent); - if( rc==SQLITE_OK ){ - rc = fixLeafParent(pRtree, pLeaf->pParent); - } + RtreeNode *pChild = pLeaf; + while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ + int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); + rc = sqlite3_step(pRtree->pReadParent); + if( rc==SQLITE_ROW ){ + RtreeNode *pTest; /* Used to test for reference loops */ + i64 iNode; /* Node number of parent node */ + + /* Before setting pChild->pParent, test that we are not creating a + ** loop of references (as we would if, say, pChild==pParent). We don't + ** want to do this as it leads to a memory leak when trying to delete + ** the referenced counted node structures. + */ + iNode = sqlite3_column_int64(pRtree->pReadParent, 0); + for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); + if( !pTest ){ + rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); + } + } + rc = sqlite3_reset(pRtree->pReadParent); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT; + pChild = pChild->pParent; } return rc; } static int deleteCell(Rtree *, RtreeNode *, int, int); static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ int rc; + int rc2; RtreeNode *pParent; int iCell; assert( pNode->nRef==1 ); /* Remove the entry in the parent cell. */ - iCell = nodeParentIndex(pRtree, pNode); - pParent = pNode->pParent; - pNode->pParent = 0; - if( SQLITE_OK!=(rc = deleteCell(pRtree, pParent, iCell, iHeight+1)) - || SQLITE_OK!=(rc = nodeRelease(pRtree, pParent)) - ){ + rc = nodeParentIndex(pRtree, pNode, &iCell); + if( rc==SQLITE_OK ){ + pParent = pNode->pParent; + pNode->pParent = 0; + rc = deleteCell(pRtree, pParent, iCell, iHeight+1); + } + rc2 = nodeRelease(pRtree, pParent); + if( rc==SQLITE_OK ){ + rc = rc2; + } + if( rc!=SQLITE_OK ){ return rc; } /* Remove the xxx_node entry. */ sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); @@ -2097,12 +2408,13 @@ pRtree->pDeleted = pNode; return SQLITE_OK; } -static void fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ +static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ RtreeNode *pParent = pNode->pParent; + int rc = SQLITE_OK; if( pParent ){ int ii; int nCell = NCELL(pNode); RtreeCell box; /* Bounding box for pNode */ nodeGetCell(pRtree, pNode, 0, &box); @@ -2110,21 +2422,25 @@ RtreeCell cell; nodeGetCell(pRtree, pNode, ii, &cell); cellUnion(pRtree, &box, &cell); } box.iRowid = pNode->iNode; - ii = nodeParentIndex(pRtree, pNode); - nodeOverwriteCell(pRtree, pParent, &box, ii); - fixBoundingBox(pRtree, pParent); + rc = nodeParentIndex(pRtree, pNode, &ii); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &box, ii); + rc = fixBoundingBox(pRtree, pParent); + } } + return rc; } /* ** Delete the cell at index iCell of node pNode. After removing the ** cell, adjust the r-tree data structure if required. */ static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ + RtreeNode *pParent; int rc; if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ return rc; } @@ -2137,18 +2453,17 @@ /* If the node is not the tree root and now has less than the minimum ** number of cells, remove it from the tree. Otherwise, update the ** cell in the parent node so that it tightly contains the updated ** node. */ - if( pNode->iNode!=1 ){ - RtreeNode *pParent = pNode->pParent; - if( (pParent->iNode!=1 || NCELL(pParent)!=1) - && (NCELL(pNode)pParent; + assert( pParent || pNode->iNode==1 ); + if( pParent ){ + if( NCELL(pNode)iRowid, pNode->iNode); } } } if( rc==SQLITE_OK ){ - fixBoundingBox(pRtree, pNode); + rc = fixBoundingBox(pRtree, pNode); } for(; rc==SQLITE_OK && iiiNode currently contains ** the height of the sub-tree headed by the cell. */ @@ -2281,15 +2596,17 @@ } #else rc = SplitNode(pRtree, pNode, pCell, iHeight); #endif }else{ - AdjustTree(pRtree, pNode, pCell); - if( iHeight==0 ){ - rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); - }else{ - rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + rc = AdjustTree(pRtree, pNode, pCell); + if( rc==SQLITE_OK ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + } } } return rc; } @@ -2355,11 +2672,10 @@ int rc = SQLITE_OK; rtreeReference(pRtree); assert(nData>=1); - assert(hashIsEmpty(pRtree)); /* If azData[0] is not an SQL NULL value, it is the rowid of a ** record to delete from the r-tree table. The following block does ** just that. */ @@ -2381,12 +2697,14 @@ } /* Delete the cell in question from the leaf node. */ if( rc==SQLITE_OK ){ int rc2; - iCell = nodeRowidIndex(pRtree, pLeaf, iDelete); - rc = deleteCell(pRtree, pLeaf, iCell, 0); + rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); + if( rc==SQLITE_OK ){ + rc = deleteCell(pRtree, pLeaf, iCell, 0); + } rc2 = nodeRelease(pRtree, pLeaf); if( rc==SQLITE_OK ){ rc = rc2; } } @@ -2404,23 +2722,24 @@ ** ** This is equivalent to copying the contents of the child into ** the root node (the operation that Gutman's paper says to perform ** in this scenario). */ - if( rc==SQLITE_OK && pRtree->iDepth>0 ){ - if( rc==SQLITE_OK && NCELL(pRoot)==1 ){ - RtreeNode *pChild; - i64 iChild = nodeGetRowid(pRtree, pRoot, 0); - rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); - if( rc==SQLITE_OK ){ - rc = removeNode(pRtree, pChild, pRtree->iDepth-1); - } - if( rc==SQLITE_OK ){ - pRtree->iDepth--; - writeInt16(pRoot->zData, pRtree->iDepth); - pRoot->isDirty = 1; - } + if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ + int rc2; + RtreeNode *pChild; + i64 iChild = nodeGetRowid(pRtree, pRoot, 0); + rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); + if( rc==SQLITE_OK ){ + rc = removeNode(pRtree, pChild, pRtree->iDepth-1); + } + rc2 = nodeRelease(pRtree, pChild); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK ){ + pRtree->iDepth--; + writeInt16(pRoot->zData, pRtree->iDepth); + pRoot->isDirty = 1; } } /* Re-insert the contents of any underfull nodes removed from the tree. */ for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ @@ -2482,10 +2801,11 @@ rc = SQLITE_CONSTRAINT; goto constraint; } rc = sqlite3_reset(pRtree->pReadRowid); } + *pRowid = cell.iRowid; if( rc==SQLITE_OK ){ rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); } if( rc==SQLITE_OK ){ @@ -2705,11 +3025,11 @@ ){ int rc = SQLITE_OK; Rtree *pRtree; int nDb; /* Length of string argv[1] */ int nName; /* Length of string argv[2] */ - int eCoordType = (int)(intptr_t)pAux; + int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); const char *aErrMsg[] = { 0, /* 0 */ "Wrong number of columns for an rtree table", /* 1 */ "Too few columns for an rtree table", /* 2 */ @@ -2851,16 +3171,14 @@ ** Register the r-tree module with database handle db. This creates the ** virtual table module "rtree" and the debugging/analysis scalar ** function "rtreenode". */ int sqlite3RtreeInit(sqlite3 *db){ - int rc = SQLITE_OK; + const int utf8 = SQLITE_UTF8; + int rc; - if( rc==SQLITE_OK ){ - int utf8 = SQLITE_UTF8; - rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); - } + rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); if( rc==SQLITE_OK ){ int utf8 = SQLITE_UTF8; rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); } if( rc==SQLITE_OK ){ @@ -2872,10 +3190,77 @@ rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); } return rc; } + +/* +** A version of sqlite3_free() that can be used as a callback. This is used +** in two places - as the destructor for the blob value returned by the +** invocation of a geometry function, and as the destructor for the geometry +** functions themselves. +*/ +static void doSqlite3Free(void *p){ + sqlite3_free(p); +} + +/* +** Each call to sqlite3_rtree_geometry_callback() creates an ordinary SQLite +** scalar user function. This C function is the callback used for all such +** registered SQL functions. +** +** The scalar user functions return a blob that is interpreted by r-tree +** table MATCH operators. +*/ +static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ + RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); + RtreeMatchArg *pBlob; + int nBlob; + + nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(double); + pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); + if( !pBlob ){ + sqlite3_result_error_nomem(ctx); + }else{ + int i; + pBlob->magic = RTREE_GEOMETRY_MAGIC; + pBlob->xGeom = pGeomCtx->xGeom; + pBlob->pContext = pGeomCtx->pContext; + pBlob->nParam = nArg; + for(i=0; iaParam[i] = sqlite3_value_double(aArg[i]); + } + sqlite3_result_blob(ctx, pBlob, nBlob, doSqlite3Free); + } +} + +/* +** Register a new geometry function for use with the r-tree MATCH operator. +*/ +int sqlite3_rtree_geometry_callback( + sqlite3 *db, + const char *zGeom, + int (*xGeom)(sqlite3_rtree_geometry *, int, double *, int *), + void *pContext +){ +#if 0 + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = xGeom; + pGeomCtx->pContext = pContext; + + /* Create the new user-function. Register a destructor function to delete + ** the context object when it is no longer required. */ + return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, doSqlite3Free + ); +#endif + return SQLITE_MISUSE; +} #if !SQLITE_CORE int sqlite3_extension_init( sqlite3 *db, char **pzErrMsg, ADDED ext/rtree/sqlite3rtree.h Index: ext/rtree/sqlite3rtree.h ================================================================== --- /dev/null +++ ext/rtree/sqlite3rtree.h @@ -0,0 +1,56 @@ +/* +** 2010 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#ifndef _SQLITE3RTREE_H_ +#define _SQLITE3RTREE_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry; + +/* +** Register a geometry callback named zGeom that can be used as part of an +** R-Tree geometry query as follows: +** +** SELECT ... FROM WHERE MATCH $zGeom(... params ...) +*/ +int sqlite3_rtree_geometry_callback( + sqlite3 *db, + const char *zGeom, + int (*xGeom)(sqlite3_rtree_geometry *, int nCoord, double *aCoord, int *pRes), + void *pContext +); + + +/* +** A pointer to a structure of the following type is passed as the first +** argument to callbacks registered using rtree_geometry_callback(). +*/ +struct sqlite3_rtree_geometry { + void *pContext; /* Copy of pContext passed to s_r_g_c() */ + int nParam; /* Size of array aParam[] */ + double *aParam; /* Parameters passed to SQL geom function */ + void *pUser; /* Callback implementation user data */ + void (*xDelUser)(void *); /* Called by SQLite to clean up pUser */ +}; + + +#ifdef __cplusplus +} /* end of the 'extern "C"' block */ +#endif + +#endif /* ifndef _SQLITE3RTREE_H_ */ Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -4728,10 +4728,14 @@ } if( k==0 ){ if( !pPrevTrunk ){ memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); }else{ + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + if( rc!=SQLITE_OK ){ + goto end_allocate_page; + } memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); } }else{ /* The trunk page is required by the caller but it contains ** pointers to free-list leaves. The first leaf becomes a trunk Index: src/expr.c ================================================================== --- src/expr.c +++ src/expr.c @@ -52,30 +52,37 @@ assert( pExpr->pTab && jpTab->nCol ); return pExpr->pTab->aCol[j].affinity; } return pExpr->affinity; } + +/* +** Set the explicit collating sequence for an expression to the +** collating sequence supplied in the second argument. +*/ +Expr *sqlite3ExprSetColl(Expr *pExpr, CollSeq *pColl){ + if( pExpr && pColl ){ + pExpr->pColl = pColl; + pExpr->flags |= EP_ExpCollate; + } + return pExpr; +} /* ** Set the collating sequence for expression pExpr to be the collating ** sequence named by pToken. Return a pointer to the revised expression. ** The collating sequence is marked as "explicit" using the EP_ExpCollate ** flag. An explicit collating sequence will override implicit ** collating sequences. */ -Expr *sqlite3ExprSetColl(Parse *pParse, Expr *pExpr, Token *pCollName){ +Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr *pExpr, Token *pCollName){ char *zColl = 0; /* Dequoted name of collation sequence */ CollSeq *pColl; sqlite3 *db = pParse->db; zColl = sqlite3NameFromToken(db, pCollName); - if( pExpr && zColl ){ - pColl = sqlite3LocateCollSeq(pParse, zColl); - if( pColl ){ - pExpr->pColl = pColl; - pExpr->flags |= EP_ExpCollate; - } - } + pColl = sqlite3LocateCollSeq(pParse, zColl); + sqlite3ExprSetColl(pExpr, pColl); sqlite3DbFree(db, zColl); return pExpr; } /* Index: src/parse.y ================================================================== --- src/parse.y +++ src/parse.y @@ -778,11 +778,11 @@ spanExpr(&A, pParse, TK_VARIABLE, &X); sqlite3ExprAssignVarNumber(pParse, A.pExpr); spanSet(&A, &X, &X); } expr(A) ::= expr(E) COLLATE ids(C). { - A.pExpr = sqlite3ExprSetColl(pParse, E.pExpr, &C); + A.pExpr = sqlite3ExprSetCollByToken(pParse, E.pExpr, &C); A.zStart = E.zStart; A.zEnd = &C.z[C.n]; } %ifndef SQLITE_OMIT_CAST expr(A) ::= CAST(X) LP expr(E) AS typetoken(T) RP(Y). { @@ -1089,11 +1089,11 @@ idxlist_opt(A) ::= LP idxlist(X) RP. {A = X;} idxlist(A) ::= idxlist(X) COMMA nm(Y) collate(C) sortorder(Z). { Expr *p = 0; if( C.n>0 ){ p = sqlite3Expr(pParse->db, TK_COLUMN, 0); - sqlite3ExprSetColl(pParse, p, &C); + sqlite3ExprSetCollByToken(pParse, p, &C); } A = sqlite3ExprListAppend(pParse,X, p); sqlite3ExprListSetName(pParse,A,&Y,1); sqlite3ExprListCheckLength(pParse, A, "index"); if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z; @@ -1100,11 +1100,11 @@ } idxlist(A) ::= nm(Y) collate(C) sortorder(Z). { Expr *p = 0; if( C.n>0 ){ p = sqlite3PExpr(pParse, TK_COLUMN, 0, 0, 0); - sqlite3ExprSetColl(pParse, p, &C); + sqlite3ExprSetCollByToken(pParse, p, &C); } A = sqlite3ExprListAppend(pParse,0, p); sqlite3ExprListSetName(pParse, A, &Y, 1); sqlite3ExprListCheckLength(pParse, A, "index"); if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z; Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -2845,11 +2845,12 @@ const char *sqlite3ErrStr(int); int sqlite3ReadSchema(Parse *pParse); CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int); CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName); CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr); -Expr *sqlite3ExprSetColl(Parse *pParse, Expr *, Token *); +Expr *sqlite3ExprSetColl(Expr*, CollSeq*); +Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr*, Token*); int sqlite3CheckCollSeq(Parse *, CollSeq *); int sqlite3CheckObjectName(Parse *, const char *); void sqlite3VdbeSetChanges(sqlite3 *, int); const void *sqlite3ValueText(sqlite3_value*, u8); Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -631,11 +631,10 @@ Expr *pRight, *pLeft; /* Right and left size of LIKE operator */ ExprList *pList; /* List of operands to the LIKE operator */ int c; /* One character in z[] */ int cnt; /* Number of non-wildcard prefix characters */ char wc[3]; /* Wildcard characters */ - CollSeq *pColl; /* Collating sequence for LHS */ sqlite3 *db = pParse->db; /* Database connection */ sqlite3_value *pVal = 0; int op; /* Opcode of pRight */ if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){ @@ -650,23 +649,10 @@ /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must ** be the name of an indexed column with TEXT affinity. */ return 0; } assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */ - pColl = sqlite3ExprCollSeq(pParse, pLeft); - if( pColl==0 ) return 0; /* Happens when LHS has an undefined collation */ - if( (pColl->type!=SQLITE_COLL_BINARY || *pnoCase) && - (pColl->type!=SQLITE_COLL_NOCASE || !*pnoCase) ){ - /* IMP: R-09003-32046 For the GLOB operator, the column must use the - ** default BINARY collating sequence. - ** IMP: R-41408-28306 For the LIKE operator, if case_sensitive_like mode - ** is enabled then the column must use the default BINARY collating - ** sequence, or if case_sensitive_like mode is disabled then the column - ** must use the built-in NOCASE collating sequence. - */ - return 0; - } pRight = pList->a[0].pExpr; op = pRight->op; if( op==TK_REGISTER ){ op = pRight->op2; @@ -685,13 +671,13 @@ if( z ){ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; } - if( cnt!=0 && c!=0 && 255!=(u8)z[cnt-1] ){ + if( cnt!=0 && 255!=(u8)z[cnt-1] ){ Expr *pPrefix; - *pisComplete = z[cnt]==wc[0] && z[cnt+1]==0; + *pisComplete = c==wc[0] && z[cnt+1]==0; pPrefix = sqlite3Expr(db, TK_STRING, z); if( pPrefix ) pPrefix->u.zToken[cnt] = 0; *ppPrefix = pPrefix; if( op==TK_VARIABLE ){ Vdbe *v = pParse->pVdbe; @@ -1242,10 +1228,11 @@ Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ Expr *pNewExpr1; Expr *pNewExpr2; int idxNew1; int idxNew2; + CollSeq *pColl; /* Collating sequence to use */ pLeft = pExpr->x.pList->a[1].pExpr; pStr2 = sqlite3ExprDup(db, pStr1, 0); if( !db->mallocFailed ){ u8 c, *pC; /* Last character before the first wildcard */ @@ -1262,15 +1249,20 @@ c = sqlite3UpperToLower[c]; } *pC = c + 1; } - pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft,0),pStr1,0); + pColl = sqlite3FindCollSeq(db, SQLITE_UTF8, noCase ? "NOCASE" : "BINARY",0); + pNewExpr1 = sqlite3PExpr(pParse, TK_GE, + sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl), + pStr1, 0); idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC); testcase( idxNew1==0 ); exprAnalyze(pSrc, pWC, idxNew1); - pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprDup(db,pLeft,0),pStr2,0); + pNewExpr2 = sqlite3PExpr(pParse, TK_LT, + sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl), + pStr2, 0); idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC); testcase( idxNew2==0 ); exprAnalyze(pSrc, pWC, idxNew2); pTerm = &pWC->a[idxTerm]; if( isComplete ){ Index: test/analyze3.test ================================================================== --- test/analyze3.test +++ test/analyze3.test @@ -266,10 +266,26 @@ } {101 0 100} do_test analyze3-2.5 { set like "%a" sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE $like } } {999 999 100} +do_test analyze3-2.6 { + set like "a" + sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE $like } +} {101 0 0} +do_test analyze3-2.7 { + set like "ab" + sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE $like } +} {11 0 0} +do_test analyze3-2.8 { + set like "abc" + sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE $like } +} {2 0 1} +do_test analyze3-2.9 { + set like "a_c" + sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE $like } +} {101 0 10} #------------------------------------------------------------------------- # This block of tests checks that statements are correctly marked as # expired when the values bound to any parameters that may affect the Index: test/like.test ================================================================== --- test/like.test +++ test/like.test @@ -191,10 +191,35 @@ } } {abc abcd nosort {} i1} do_test like-3.4 { set sqlite_like_count } 0 + +# The LIKE optimization still works when the RHS is a string with no +# wildcard. Ticket [e090183531fc2747] +# +do_test like-3.4.2 { + queryplan { + SELECT x FROM t1 WHERE x LIKE 'a' ORDER BY 1; + } +} {a nosort {} i1} +do_test like-3.4.3 { + queryplan { + SELECT x FROM t1 WHERE x LIKE 'ab' ORDER BY 1; + } +} {ab nosort {} i1} +do_test like-3.4.4 { + queryplan { + SELECT x FROM t1 WHERE x LIKE 'abcd' ORDER BY 1; + } +} {abcd nosort {} i1} +do_test like-3.4.5 { + queryplan { + SELECT x FROM t1 WHERE x LIKE 'abcde' ORDER BY 1; + } +} {nosort {} i1} + # Partial optimization when the pattern does not end in '%' # do_test like-3.5 { set sqlite_like_count 0 @@ -306,10 +331,30 @@ } } {abd acd nosort {} i1} do_test like-3.24 { set sqlite_like_count } 6 + +# GLOB optimization when there is no wildcard. Ticket [e090183531fc2747] +# +do_test like-3.25 { + queryplan { + SELECT x FROM t1 WHERE x GLOB 'a' ORDER BY 1; + } +} {a nosort {} i1} +do_test like-3.26 { + queryplan { + SELECT x FROM t1 WHERE x GLOB 'abcd' ORDER BY 1; + } +} {abcd nosort {} i1} +do_test like-3.27 { + queryplan { + SELECT x FROM t1 WHERE x GLOB 'abcde' ORDER BY 1; + } +} {nosort {} i1} + + # No optimization if the LHS of the LIKE is not a column name or # if the RHS is not a string. # do_test like-4.1 { @@ -730,8 +775,97 @@ do_test like-10.15 { count { SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY a; } } {12 123 scan 5 like 6} + +# LIKE and GLOB where the default collating sequence is not appropriate +# but an index with the appropriate collating sequence exists. +# +do_test like-11.0 { + execsql { + CREATE TABLE t11( + a INTEGER PRIMARY KEY, + b TEXT COLLATE nocase, + c TEXT COLLATE binary + ); + INSERT INTO t11 VALUES(1, 'a','a'); + INSERT INTO t11 VALUES(2, 'ab','ab'); + INSERT INTO t11 VALUES(3, 'abc','abc'); + INSERT INTO t11 VALUES(4, 'abcd','abcd'); + INSERT INTO t11 VALUES(5, 'A','A'); + INSERT INTO t11 VALUES(6, 'AB','AB'); + INSERT INTO t11 VALUES(7, 'ABC','ABC'); + INSERT INTO t11 VALUES(8, 'ABCD','ABCD'); + INSERT INTO t11 VALUES(9, 'x','x'); + INSERT INTO t11 VALUES(10, 'yz','yz'); + INSERT INTO t11 VALUES(11, 'X','X'); + INSERT INTO t11 VALUES(12, 'YZ','YZ'); + SELECT count(*) FROM t11; + } +} {12} +do_test like-11.1 { + queryplan { + PRAGMA case_sensitive_like=OFF; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD nosort t11 *} +do_test like-11.2 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd nosort t11 *} +do_test like-11.3 { + queryplan { + PRAGMA case_sensitive_like=OFF; + CREATE INDEX t11b ON t11(b); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11b} +do_test like-11.4 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd nosort t11 *} +do_test like-11.5 { + queryplan { + PRAGMA case_sensitive_like=OFF; + DROP INDEX t11b; + CREATE INDEX t11bnc ON t11(b COLLATE nocase); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11bnc} +do_test like-11.6 { + queryplan { + CREATE INDEX t11bb ON t11(b COLLATE binary); + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11bnc} +do_test like-11.7 { + queryplan { + PRAGMA case_sensitive_like=ON; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + } +} {abc abcd sort {} t11bb} +do_test like-11.8 { + queryplan { + PRAGMA case_sensitive_like=OFF; + SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY a; + } +} {abc abcd sort {} t11bb} +do_test like-11.9 { + queryplan { + CREATE INDEX t11cnc ON t11(c COLLATE nocase); + CREATE INDEX t11cb ON t11(c COLLATE binary); + SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY a; + } +} {abc abcd ABC ABCD sort {} t11cnc} +do_test like-11.10 { + queryplan { + SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY a; + } +} {abc abcd sort {} t11cb} finish_test ADDED test/tkt-5e10420e8d.test Index: test/tkt-5e10420e8d.test ================================================================== --- /dev/null +++ test/tkt-5e10420e8d.test @@ -0,0 +1,58 @@ +# 2010 August 23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_test tkt-5e10420e8d.1 { + db eval { + PRAGMA page_size = 1024; + PRAGMA auto_vacuum = incremental; + + CREATE TABLE t1(x); + CREATE TABLE t2(x); + CREATE TABLE t3(x); + } +} {} + +do_test tkt-5e10420e8d.2 { + db eval { + INSERT INTO t3 VALUES(randomblob(500 + 1024*248)); + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t2 VALUES(randomblob(500 + 1024*248)); + + DELETE FROM t3; + DELETE FROM t2; + DELETE FROM t1; + } +} {} + +do_test tkt-5e10420e8d.3 { + db eval { + PRAGMA incremental_vacuum(248) + } +} {} + +do_test tkt-5e10420e8d.4 { + db eval { + PRAGMA incremental_vacuum(1) + } +} {} + +db close +sqlite3 db test.db + +do_test tkt-5e10420e8d.5 { + db eval {PRAGMA integrity_check;} +} {ok} + +finish_test Index: tool/mksqlite3h.tcl ================================================================== --- tool/mksqlite3h.tcl +++ tool/mksqlite3h.tcl @@ -63,34 +63,41 @@ # Set up patterns for recognizing API declarations. # set varpattern {^[a-zA-Z][a-zA-Z_0-9 *]+sqlite3_[_a-zA-Z0-9]+(\[|;| =)} set declpattern {^ *[a-zA-Z][a-zA-Z_0-9 ]+ \**sqlite3_[_a-zA-Z0-9]+\(} -# Process the src/sqlite.h.in file. -# -set in [open $TOP/src/sqlite.h.in] -while {![eof $in]} { - - set line [gets $in] - - regsub -- --VERS-- $line $zVersion line - regsub -- --VERSION-NUMBER-- $line $nVersion line - regsub -- --SOURCE-ID-- $line "$zDate $zUuid" line - - if {[regexp {define SQLITE_EXTERN extern} $line]} { - puts $line - puts [gets $in] - puts "" - puts "#ifndef SQLITE_API" - puts "# define SQLITE_API" - puts "#endif" - set line "" - } - - if {([regexp $varpattern $line] && ![regexp {^ *typedef} $line]) - || ([regexp $declpattern $line]) - } { - set line "SQLITE_API $line" - } - puts $line -} -close $in +# Process the src/sqlite.h.in ext/rtree/sqlite3rtree.h files. +# +foreach file [list $TOP/src/sqlite.h.in $TOP/ext/rtree/sqlite3rtree.h] { + set in [open $file] + while {![eof $in]} { + + set line [gets $in] + + # File sqlite3rtree.h contains a line "#include ". Omit this + # line when copying sqlite3rtree.h into sqlite3.h. + # + if {[string match {*#include**} $line]} continue + + regsub -- --VERS-- $line $zVersion line + regsub -- --VERSION-NUMBER-- $line $nVersion line + regsub -- --SOURCE-ID-- $line "$zDate $zUuid" line + + if {[regexp {define SQLITE_EXTERN extern} $line]} { + puts $line + puts [gets $in] + puts "" + puts "#ifndef SQLITE_API" + puts "# define SQLITE_API" + puts "#endif" + set line "" + } + + if {([regexp $varpattern $line] && ![regexp {^ *typedef} $line]) + || ([regexp $declpattern $line]) + } { + set line "SQLITE_API $line" + } + puts $line + } + close $in +}