Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -453,32 +453,47 @@ int iStart, /* Begin with this column of pList */ int nExtra /* Add this many extra columns to the end */ ); /* -** Insert code into "v" that will push the record in register regData -** into the sorter. +** Generate code that will push the record in registers regData +** through regData+nData-1 onto the sorter. */ static void pushOntoSorter( Parse *pParse, /* Parser context */ SortCtx *pSort, /* Information about the ORDER BY clause */ Select *pSelect, /* The whole SELECT statement */ - int regData /* Register holding data to be sorted */ + int regData, /* First register holding data to be sorted */ + int nData, /* Number of elements in the data array */ + int nPrefixReg /* No. of reg prior to regData available for use */ ){ - Vdbe *v = pParse->pVdbe; - int nExpr = pSort->pOrderBy->nExpr; - int regRecord = ++pParse->nMem; - int regBase = pParse->nMem+1; - int nOBSat = pSort->nOBSat; - int op; - - pParse->nMem += nExpr+2; /* nExpr+2 registers allocated at regBase */ - sqlite3ExprCacheClear(pParse); - sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, 0); - sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr); - sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1); - sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nExpr+2-nOBSat,regRecord); + Vdbe *v = pParse->pVdbe; /* Stmt under construction */ + int bSeq = ((pSort->sortFlags & SORTFLAG_UseSorter)==0); + int nExpr = pSort->pOrderBy->nExpr; /* No. of ORDER BY terms */ + int nBase = nExpr + bSeq + nData; /* Fields in sorter record */ + int regBase; /* Regs for sorter record */ + int regRecord = ++pParse->nMem; /* Assembled sorter record */ + int nOBSat = pSort->nOBSat; /* ORDER BY terms to skip */ + int op; /* Opcode to add sorter record to sorter */ + + assert( bSeq==0 || bSeq==1 ); + if( nPrefixReg ){ + assert( nPrefixReg==nExpr+bSeq ); + regBase = regData - nExpr - bSeq; + }else{ + regBase = pParse->nMem + 1; + pParse->nMem += nBase; + } + sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, SQLITE_ECEL_DUP); + if( bSeq ){ + sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr); + } + if( nPrefixReg==0 ){ + sqlite3VdbeAddOp3(v, OP_Move, regData, regBase+nExpr+bSeq, nData); + } + + sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nBase-nOBSat, regRecord); if( nOBSat>0 ){ int regPrevKey; /* The first nOBSat columns of the previous row */ int addrFirst; /* Address of the OP_IfNot opcode */ int addrJmp; /* Address of the OP_Jump opcode */ VdbeOp *pOp; /* Opcode that opens the sorter */ @@ -485,16 +500,21 @@ int nKey; /* Number of sorting key columns, including OP_Sequence */ KeyInfo *pKI; /* Original KeyInfo on the sorter table */ regPrevKey = pParse->nMem+1; pParse->nMem += pSort->nOBSat; - nKey = nExpr - pSort->nOBSat + 1; - addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); VdbeCoverage(v); + nKey = nExpr - pSort->nOBSat + bSeq; + if( bSeq ){ + addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); + }else{ + addrFirst = sqlite3VdbeAddOp1(v, OP_SequenceTest, pSort->iECursor); + } + VdbeCoverage(v); sqlite3VdbeAddOp3(v, OP_Compare, regPrevKey, regBase, pSort->nOBSat); pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex); if( pParse->db->mallocFailed ) return; - pOp->p2 = nKey + 1; + pOp->p2 = nKey + nData; pKI = pOp->p4.pKeyInfo; memset(pKI->aSortOrder, 0, pKI->nField); /* Makes OP_Jump below testable */ sqlite3VdbeChangeP4(v, -1, (char*)pKI, P4_KEYINFO); pOp->p4.pKeyInfo = keyInfoFromExprList(pParse, pSort->pOrderBy, nOBSat, 1); addrJmp = sqlite3VdbeCurrentAddr(v); @@ -625,10 +645,11 @@ int hasDistinct; /* True if the DISTINCT keyword is present */ int regResult; /* Start of memory holding result set */ int eDest = pDest->eDest; /* How to dispose of results */ int iParm = pDest->iSDParm; /* First argument to disposal method */ int nResultCol; /* Number of result columns */ + int nPrefixReg = 0; /* Number of extra registers before regResult */ assert( v ); assert( pEList!=0 ); hasDistinct = pDistinct ? pDistinct->eTnctType : WHERE_DISTINCT_NOOP; if( pSort && pSort->pOrderBy==0 ) pSort = 0; @@ -640,10 +661,15 @@ /* Pull the requested columns. */ nResultCol = pEList->nExpr; if( pDest->iSdst==0 ){ + if( pSort ){ + nPrefixReg = pSort->pOrderBy->nExpr; + if( !(pSort->sortFlags & SORTFLAG_UseSorter) ) nPrefixReg++; + pParse->nMem += nPrefixReg; + } pDest->iSdst = pParse->nMem+1; pParse->nMem += nResultCol; }else if( pDest->iSdst+nResultCol > pParse->nMem ){ /* This is an error condition that can result, for example, when a SELECT ** on the right-hand side of an INSERT contains more result columns than @@ -756,14 +782,14 @@ */ case SRT_Fifo: case SRT_DistFifo: case SRT_Table: case SRT_EphemTab: { - int r1 = sqlite3GetTempReg(pParse); + int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1); testcase( eDest==SRT_Table ); testcase( eDest==SRT_EphemTab ); - sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1); + sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg); #ifndef SQLITE_OMIT_CTE if( eDest==SRT_DistFifo ){ /* If the destination is DistFifo, then cursor (iParm+1) is open ** on an ephemeral index. If the current row is already present ** in the index, do not write it to the output. If not, add the @@ -774,19 +800,19 @@ sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1); assert( pSort==0 ); } #endif if( pSort ){ - pushOntoSorter(pParse, pSort, p, r1); + pushOntoSorter(pParse, pSort, p, r1+nPrefixReg, 1, nPrefixReg); }else{ int r2 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, r2); sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, r2); sqlite3VdbeChangeP5(v, OPFLAG_APPEND); sqlite3ReleaseTempReg(pParse, r2); } - sqlite3ReleaseTempReg(pParse, r1); + sqlite3ReleaseTempRange(pParse, r1, nPrefixReg+1); break; } #ifndef SQLITE_OMIT_SUBQUERY /* If we are creating a set for an "expr IN (SELECT ...)" construct, @@ -800,11 +826,11 @@ if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ - pushOntoSorter(pParse, pSort, p, regResult); + pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg); }else{ int r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult,1,r1, &pDest->affSdst, 1); sqlite3ExprCacheAffinityChange(pParse, regResult, 1); sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1); @@ -826,11 +852,11 @@ ** of the scan loop. */ case SRT_Mem: { assert( nResultCol==1 ); if( pSort ){ - pushOntoSorter(pParse, pSort, p, regResult); + pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg); }else{ sqlite3ExprCodeMove(pParse, regResult, iParm, 1); /* The LIMIT clause will jump out of the loop for us */ } break; @@ -840,14 +866,11 @@ case SRT_Coroutine: /* Send data to a co-routine */ case SRT_Output: { /* Return the results */ testcase( eDest==SRT_Coroutine ); testcase( eDest==SRT_Output ); if( pSort ){ - int r1 = sqlite3GetTempReg(pParse); - sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1); - pushOntoSorter(pParse, pSort, p, r1); - sqlite3ReleaseTempReg(pParse, r1); + pushOntoSorter(pParse, pSort, p, regResult, nResultCol, nPrefixReg); }else if( eDest==SRT_Coroutine ){ sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); }else{ sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, nResultCol); sqlite3ExprCacheAffinityChange(pParse, regResult, nResultCol); @@ -1123,50 +1146,66 @@ int addrBreak = sqlite3VdbeMakeLabel(v); /* Jump here to exit loop */ int addrContinue = sqlite3VdbeMakeLabel(v); /* Jump here for next cycle */ int addr; int addrOnce = 0; int iTab; - int pseudoTab = 0; ExprList *pOrderBy = pSort->pOrderBy; int eDest = pDest->eDest; int iParm = pDest->iSDParm; int regRow; int regRowid; int nKey; + int iSortTab; /* Sorter cursor to read from */ + int nSortData; /* Trailing values to read from sorter */ + u8 p5; /* p5 parameter for 1st OP_Column */ + int i; + int bSeq; /* True if sorter record includes seq. no. */ +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS + struct ExprList_item *aOutEx = p->pEList->a; +#endif if( pSort->labelBkOut ){ sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut); sqlite3VdbeAddOp2(v, OP_Goto, 0, addrBreak); sqlite3VdbeResolveLabel(v, pSort->labelBkOut); - addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v); } iTab = pSort->iECursor; - regRow = sqlite3GetTempReg(pParse); if( eDest==SRT_Output || eDest==SRT_Coroutine ){ - pseudoTab = pParse->nTab++; - sqlite3VdbeAddOp3(v, OP_OpenPseudo, pseudoTab, regRow, nColumn); regRowid = 0; + regRow = pDest->iSdst; + nSortData = nColumn; }else{ regRowid = sqlite3GetTempReg(pParse); + regRow = sqlite3GetTempReg(pParse); + nSortData = 1; } nKey = pOrderBy->nExpr - pSort->nOBSat; if( pSort->sortFlags & SORTFLAG_UseSorter ){ int regSortOut = ++pParse->nMem; - int ptab2 = pParse->nTab++; - sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, nKey+2); + iSortTab = pParse->nTab++; + if( pSort->labelBkOut ){ + addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v); + } + sqlite3VdbeAddOp3(v, OP_OpenPseudo, iSortTab, regSortOut, nKey+1+nSortData); if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce); addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak); VdbeCoverage(v); codeOffset(v, p->iOffset, addrContinue); sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut); - sqlite3VdbeAddOp3(v, OP_Column, ptab2, nKey+1, regRow); - sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE); + p5 = OPFLAG_CLEARCACHE; + bSeq = 0; }else{ - if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce); addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); VdbeCoverage(v); codeOffset(v, p->iOffset, addrContinue); - sqlite3VdbeAddOp3(v, OP_Column, iTab, nKey+1, regRow); + iSortTab = iTab; + p5 = 0; + bSeq = 1; + } + for(i=0; iiSdst+i ); - sqlite3VdbeAddOp3(v, OP_Column, pseudoTab, i, pDest->iSdst+i); - if( i==0 ){ - sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE); - } - } if( eDest==SRT_Output ){ sqlite3VdbeAddOp2(v, OP_ResultRow, pDest->iSdst, nColumn); sqlite3ExprCacheAffinityChange(pParse, pDest->iSdst, nColumn); }else{ sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); } break; } } - sqlite3ReleaseTempReg(pParse, regRow); - sqlite3ReleaseTempReg(pParse, regRowid); - + if( regRowid ){ + sqlite3ReleaseTempReg(pParse, regRow); + sqlite3ReleaseTempReg(pParse, regRowid); + } /* The bottom of the loop */ sqlite3VdbeResolveLabel(v, addrContinue); if( pSort->sortFlags & SORTFLAG_UseSorter ){ sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); VdbeCoverage(v); @@ -1224,13 +1256,10 @@ }else{ sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v); } if( pSort->regReturn ) sqlite3VdbeAddOp1(v, OP_Return, pSort->regReturn); sqlite3VdbeResolveLabel(v, addrBreak); - if( eDest==SRT_Output || eDest==SRT_Coroutine ){ - sqlite3VdbeAddOp2(v, OP_Close, pseudoTab, 0); - } } /* ** Return a pointer to a string containing the 'declaration type' of the ** expression pExpr. The string may be treated as static by the caller. @@ -4756,12 +4785,13 @@ KeyInfo *pKeyInfo; pKeyInfo = keyInfoFromExprList(pParse, sSort.pOrderBy, 0, 0); sSort.iECursor = pParse->nTab++; sSort.addrSortIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, - sSort.iECursor, sSort.pOrderBy->nExpr+2, 0, - (char*)pKeyInfo, P4_KEYINFO); + sSort.iECursor, sSort.pOrderBy->nExpr+1+pEList->nExpr, 0, + (char*)pKeyInfo, P4_KEYINFO + ); }else{ sSort.addrSortIndex = -1; } /* If the output is destined for a temporary table, open that table. @@ -4888,11 +4918,11 @@ memset(&sNC, 0, sizeof(sNC)); sNC.pParse = pParse; sNC.pSrcList = pTabList; sNC.pAggInfo = &sAggInfo; sAggInfo.mnReg = pParse->nMem+1; - sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr+1 : 0; + sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0; sAggInfo.pGroupBy = pGroupBy; sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); if( pHaving ){ sqlite3ExprAnalyzeAggregates(&sNC, pHaving); @@ -4981,23 +5011,22 @@ (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ? "DISTINCT" : "GROUP BY"); groupBySort = 1; nGroupBy = pGroupBy->nExpr; - nCol = nGroupBy + 1; - j = nGroupBy+1; + nCol = nGroupBy; + j = nGroupBy; for(i=0; i=j ){ nCol++; j++; } } regBase = sqlite3GetTempRange(pParse, nCol); sqlite3ExprCacheClear(pParse); sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0); - sqlite3VdbeAddOp2(v, OP_Sequence, sAggInfo.sortingIdx,regBase+nGroupBy); - j = nGroupBy+1; + j = nGroupBy; for(i=0; iiSorterColumn>=j ){ int r1 = j + regBase; int r2; Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -3407,10 +3407,28 @@ assert( pCx->pKeyInfo->db==db ); assert( pCx->pKeyInfo->enc==ENC(db) ); rc = sqlite3VdbeSorterInit(db, pCx); break; } + +/* Opcode: SequenceTest P1 P2 * * * +** Synopsis: if( cursor[P1].ctr++ ) pc = P2 +** +** P1 is a sorter cursor. If the sequence counter is currently zero, jump +** to P2. Regardless of whether or not the jump is taken, increment the +** the sequence value. +*/ +case OP_SequenceTest: { + VdbeCursor *pC; + assert( pOp->p1>=0 && pOp->p1nCursor ); + pC = p->apCsr[pOp->p1]; + assert( pC->pSorter ); + if( (pC->seqCount++)==0 ){ + pc = pOp->p2 - 1; + } + break; +} /* Opcode: OpenPseudo P1 P2 P3 * * ** Synopsis: P3 columns in r[P2] ** ** Open a new cursor that points to a fake table that contains a single Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -3135,14 +3135,18 @@ ** as the sqlite3VdbeRecordCompare() routine. Unlike VdbeRecordCompare(), ** this function deserializes and compares values using the ** sqlite3VdbeSerialGet() and sqlite3MemCompare() functions. It is used ** in assert() statements to ensure that the optimized code in ** sqlite3VdbeRecordCompare() returns results with these two primitives. +** +** Return true if the result of comparison is equivalent to desiredResult. +** Return false if there is a disagreement. */ static int vdbeRecordCompareDebug( int nKey1, const void *pKey1, /* Left key */ - const UnpackedRecord *pPKey2 /* Right key */ + const UnpackedRecord *pPKey2, /* Right key */ + int desiredResult /* Correct answer */ ){ u32 d1; /* Offset into aKey[] of next data element */ u32 idx1; /* Offset into aKey[] of next header element */ u32 szHdr1; /* Number of bytes in header */ int i = 0; @@ -3200,11 +3204,11 @@ if( rc!=0 ){ assert( mem1.zMalloc==0 ); /* See comment below */ if( pKeyInfo->aSortOrder[i] ){ rc = -rc; /* Invert the result for DESC sort order. */ } - return rc; + goto debugCompareEnd; } i++; }while( idx1nField ); /* No memory allocation is ever used on mem1. Prove this using @@ -3214,11 +3218,19 @@ assert( mem1.zMalloc==0 ); /* rc==0 here means that one of the keys ran out of fields and ** all the fields up to that point were equal. Return the the default_rc ** value. */ - return pPKey2->default_rc; + rc = pPKey2->default_rc; + +debugCompareEnd: + if( desiredResult==0 && rc==0 ) return 1; + if( desiredResult<0 && rc<0 ) return 1; + if( desiredResult>0 && rc>0 ) return 1; + if( CORRUPT_DB ) return 1; + if( pKeyInfo->db->mallocFailed ) return 1; + return 0; } #endif /* ** Both *pMem1 and *pMem2 contain string values. Compare the two values @@ -3562,15 +3574,11 @@ if( rc!=0 ){ if( pKeyInfo->aSortOrder[i] ){ rc = -rc; } - assert( CORRUPT_DB - || (rc<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0) - || (rc>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0) - || pKeyInfo->db->mallocFailed - ); + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) ); assert( mem1.zMalloc==0 ); /* See comment below */ return rc; } i++; @@ -3585,13 +3593,11 @@ assert( mem1.zMalloc==0 ); /* rc==0 here means that one or both of the keys ran out of fields and ** all the fields up to that point were equal. Return the the default_rc ** value. */ - assert( CORRUPT_DB - || pPKey2->default_rc==vdbeRecordCompareDebug(nKey1, pKey1, pPKey2) - ); + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) ); return pPKey2->default_rc; } /* ** This function is an optimized version of sqlite3VdbeRecordCompare() @@ -3684,15 +3690,11 @@ /* The first fields of the two keys are equal and there are no trailing ** fields. Return pPKey2->default_rc in this case. */ res = pPKey2->default_rc; } - assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0) - || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0) - || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0) - || CORRUPT_DB - ); + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) ); return res; } /* ** This function is an optimized version of sqlite3VdbeRecordCompare() @@ -3748,15 +3750,11 @@ }else{ res = pPKey2->r1; } } - assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0) - || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0) - || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0) - || CORRUPT_DB - ); + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) ); return res; } /* ** Return a pointer to an sqlite3VdbeRecordCompare() compatible function Index: src/vdbesort.c ================================================================== --- src/vdbesort.c +++ src/vdbesort.c @@ -1,7 +1,7 @@ /* -** 2011 July 9 +** 2011-07-09 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. @@ -8,13 +8,59 @@ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains code for the VdbeSorter object, used in concert with -** a VdbeCursor to sort large numbers of keys (as may be required, for -** example, by CREATE INDEX statements on tables too large to fit in main -** memory). +** a VdbeCursor to sort large numbers of keys for CREATE TABLE statements +** or by SELECT statements with ORDER BY clauses that cannot be satisfied +** using indexes and without LIMIT clauses. +** +** The VdbeSorter object implements a external merge sort +** algorithm that is efficient even if the aggregate size of +** the elements being sorted exceeds the available memory. +** +** Here is the (internal, non-API) interface between this module and the +** rest of the SQLite system: +** +** sqlite3VdbeSorterInit() Create a new VdbeSorter object. +** +** sqlite3VdbeSorterWrite() Add a single new row to the VdbeSorter +** object. The row is a binary blob in the +** OP_MakeRecord format that contains both +** the ORDER BY key columns and result columns +** in the case of a SELECT w/ ORDER BY, or +** the complete record for an index entry +** in the case of a CREATE INDEX. +** +** sqlite3VdbeSorterRewind() Sort all content previously added. +** Position the read cursor on the +** first sorted element. +** +** sqlite3VdbeSorterNext() Advance the read cursor to the next sorted +** element. +** +** sqlite3VdbeSorterRowkey() Return the complete binary blob for the +** row currently under the read cursor. +** +** sqlite3VdbeSorterCompare() Compare the binary blob for the row +** currently under the read cursor against +** another binary blob X and report if +** X is strictly less than the read cursor. +** Used to enforce uniqueness in a +** CREATE UNIQUE INDEX statement. +** +** sqlite3VdbeSorterClose() Close the VdbeSorter object and reclaim +** all resources. +** +** sqlite3VdbeSorterReset() Refurbish the VdbeSorter for reuse. This +** is like Close() followed by Init() only +** much faster. +** +** The interfaces above must be called in a particular order. Write() can +** only occur in between Init()/Reset() and Rewind(). Next(), Rowkey(), and +** Compare() can only occur in between Rewind() and Close()/Reset(). +** */ #include "sqliteInt.h" #include "vdbeInt.h" @@ -103,10 +149,13 @@ VdbeSorterIter *aIter; /* Array of iterators to merge */ int *aTree; /* Current state of incremental merge */ sqlite3_file *pTemp1; /* PMA file 1 */ SorterRecord *pRecord; /* Head of in-memory record list */ UnpackedRecord *pUnpacked; /* Used to unpack keys */ + u8* aMemory; /* Block to allocate records from */ + int iMemory; /* Offset of free space in aMemory */ + int nMemory; /* Current size of allocation at aMemory */ }; /* ** The following type is an iterator for a PMA. It caches the current key in ** variables nKey/aKey. If the iterator is at EOF, pFile==0. @@ -119,10 +168,11 @@ sqlite3_file *pFile; /* File iterator is reading from */ u8 *aAlloc; /* Allocated space */ u8 *aKey; /* Pointer to current key */ u8 *aBuffer; /* Current read buffer */ int nBuffer; /* Size of read buffer in bytes */ + u8 *aMap; /* Pointer to mapping of pFile */ }; /* ** An instance of this structure is used to organize the stream of records ** being written to files by the merge-sort code into aligned, page-sized @@ -139,18 +189,40 @@ sqlite3_file *pFile; /* File to write to */ }; /* ** A structure to store a single record. All in-memory records are connected -** together into a linked list headed at VdbeSorter.pRecord using the -** SorterRecord.pNext pointer. +** together into a linked list headed at VdbeSorter.pRecord. +** +** How the linked list is connected depends on how memory is being managed +** by this module. If using a separate allocation for each in-memory record +** (VdbeSorter.aMemory==0), then the list is always connected using the +** SorterRecord.u.pNext pointers. +** +** Or, if using the single large allocation method (VdbeSorter.aMemory!=0), +** then while records are being accumulated the list is linked using the +** SorterRecord.u.iNext offset. This is because the aMemory[] array may +** be sqlite3Realloc()ed while records are being accumulated. Once the VM +** has finished passing records to the sorter, or when the in-memory buffer +** is full, the list is sorted. As part of the sorting process, it is +** converted to use the SorterRecord.u.pNext pointers. See function +** vdbeSorterSort() for details. */ struct SorterRecord { - void *pVal; int nVal; - SorterRecord *pNext; + union { + SorterRecord *pNext; /* Pointer to next record in list */ + int iNext; /* Offset within aMemory of next record */ + } u; }; + +/* Return a pointer to the buffer containing the record data for SorterRecord +** object p. Should be used as if: +** +** void *SRVAL(SorterRecord *p) { return (void*)&p[1]; } +*/ +#define SRVAL(p) ((void*)((SorterRecord*)(p) + 1)) /* Minimum allowable value for the VdbeSorter.nWorking variable */ #define SORTER_MIN_WORKING 10 /* Maximum number of segments to merge in a single pass. */ @@ -161,10 +233,11 @@ ** argument. All structure fields are set to zero before returning. */ static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){ sqlite3DbFree(db, pIter->aAlloc); sqlite3DbFree(db, pIter->aBuffer); + if( pIter->aMap ) sqlite3OsUnfetch(pIter->pFile, 0, pIter->aMap); memset(pIter, 0, sizeof(VdbeSorterIter)); } /* ** Read nByte bytes of data from the stream of data iterated by object p. @@ -181,10 +254,17 @@ int nByte, /* Bytes of data to read */ u8 **ppOut /* OUT: Pointer to buffer containing data */ ){ int iBuf; /* Offset within buffer to read from */ int nAvail; /* Bytes of data available in buffer */ + + if( p->aMap ){ + *ppOut = &p->aMap[p->iReadOff]; + p->iReadOff += nByte; + return SQLITE_OK; + } + assert( p->aBuffer ); /* If there is no more data to be read from the buffer, read the next ** p->nBuffer bytes of data from the file into it. Or, if there are less ** than p->nBuffer bytes remaining in the PMA, read all remaining data. */ @@ -262,22 +342,26 @@ ** the value read. */ static int vdbeSorterIterVarint(sqlite3 *db, VdbeSorterIter *p, u64 *pnOut){ int iBuf; - iBuf = p->iReadOff % p->nBuffer; - if( iBuf && (p->nBuffer-iBuf)>=9 ){ - p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut); + if( p->aMap ){ + p->iReadOff += sqlite3GetVarint(&p->aMap[p->iReadOff], pnOut); }else{ - u8 aVarint[16], *a; - int i = 0, rc; - do{ - rc = vdbeSorterIterRead(db, p, 1, &a); - if( rc ) return rc; - aVarint[(i++)&0xf] = a[0]; - }while( (a[0]&0x80)!=0 ); - sqlite3GetVarint(aVarint, pnOut); + iBuf = p->iReadOff % p->nBuffer; + if( iBuf && (p->nBuffer-iBuf)>=9 ){ + p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut); + }else{ + u8 aVarint[16], *a; + int i = 0, rc; + do{ + rc = vdbeSorterIterRead(db, p, 1, &a); + if( rc ) return rc; + aVarint[(i++)&0xf] = a[0]; + }while( (a[0]&0x80)!=0 ); + sqlite3GetVarint(aVarint, pnOut); + } } return SQLITE_OK; } @@ -321,10 +405,11 @@ VdbeSorterIter *pIter, /* Iterator to populate */ i64 *pnByte /* IN/OUT: Increment this value by PMA size */ ){ int rc = SQLITE_OK; int nBuf; + void *pMap; nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt); assert( pSorter->iWriteOff>iStart ); assert( pIter->aAlloc==0 ); @@ -331,37 +416,45 @@ assert( pIter->aBuffer==0 ); pIter->pFile = pSorter->pTemp1; pIter->iReadOff = iStart; pIter->nAlloc = 128; pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc); - pIter->nBuffer = nBuf; - pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf); - if( !pIter->aBuffer ){ - rc = SQLITE_NOMEM; + /* See if this PMA can be read using xFetch. */ + rc = sqlite3OsFetch(pIter->pFile, 0, pSorter->iWriteOff, &pMap); + if( rc!=SQLITE_OK ) return rc; + if( pMap ){ + pIter->aMap = (u8*)pMap; }else{ - int iBuf; - - iBuf = iStart % nBuf; - if( iBuf ){ - int nRead = nBuf - iBuf; - if( (iStart + nRead) > pSorter->iWriteOff ){ - nRead = (int)(pSorter->iWriteOff - iStart); - } - rc = sqlite3OsRead( - pSorter->pTemp1, &pIter->aBuffer[iBuf], nRead, iStart - ); - assert( rc!=SQLITE_IOERR_SHORT_READ ); - } - - if( rc==SQLITE_OK ){ - u64 nByte; /* Size of PMA in bytes */ - pIter->iEof = pSorter->iWriteOff; - rc = vdbeSorterIterVarint(db, pIter, &nByte); - pIter->iEof = pIter->iReadOff + nByte; - *pnByte += nByte; - } + pIter->nBuffer = nBuf; + pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf); + + if( !pIter->aBuffer ){ + rc = SQLITE_NOMEM; + }else{ + int iBuf; + + iBuf = iStart % nBuf; + if( iBuf ){ + int nRead = nBuf - iBuf; + if( (iStart + nRead) > pSorter->iWriteOff ){ + nRead = (int)(pSorter->iWriteOff - iStart); + } + rc = sqlite3OsRead( + pSorter->pTemp1, &pIter->aBuffer[iBuf], nRead, iStart + ); + assert( rc!=SQLITE_IOERR_SHORT_READ ); + } + } + } + + if( rc==SQLITE_OK ){ + u64 nByte; /* Size of PMA in bytes */ + pIter->iEof = pSorter->iWriteOff; + rc = vdbeSorterIterVarint(db, pIter, &nByte); + pIter->iEof = pIter->iReadOff + nByte; + *pnByte += nByte; } if( rc==SQLITE_OK ){ rc = vdbeSorterIterNext(db, pIter); } @@ -478,17 +571,28 @@ } pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d); if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM; assert( pSorter->pUnpacked==(UnpackedRecord *)d ); + pSorter->pUnpacked->nField = pCsr->pKeyInfo->nField; if( !sqlite3TempInMemory(db) ){ pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt); pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz; mxCache = db->aDb[0].pSchema->cache_size; if( mxCachemxPmaSize = mxCache * pgsz; + + /* If the application is using memsys3 or memsys5, use a separate + ** allocation for each sort-key in memory. Otherwise, use a single big + ** allocation at pSorter->aMemory for all sort-keys. */ + if( sqlite3GlobalConfig.pHeap==0 ){ + assert( pSorter->iMemory==0 ); + pSorter->nMemory = pgsz; + pSorter->aMemory = (u8*)sqlite3Malloc(pSorter->nMemory); + if( !pSorter->aMemory ) return SQLITE_NOMEM; + } } return SQLITE_OK; } @@ -497,11 +601,11 @@ */ static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){ SorterRecord *p; SorterRecord *pNext; for(p=pRecord; p; p=pNext){ - pNext = p->pNext; + pNext = p->u.pNext; sqlite3DbFree(db, p); } } /* @@ -518,18 +622,21 @@ } if( pSorter->pTemp1 ){ sqlite3OsCloseFree(pSorter->pTemp1); pSorter->pTemp1 = 0; } - vdbeSorterRecordFree(db, pSorter->pRecord); + if( pSorter->aMemory==0 ){ + vdbeSorterRecordFree(db, pSorter->pRecord); + } pSorter->pRecord = 0; pSorter->iWriteOff = 0; pSorter->iReadOff = 0; pSorter->nInMemory = 0; pSorter->nTree = 0; pSorter->nPMA = 0; pSorter->aTree = 0; + pSorter->iMemory = 0; } /* ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines. @@ -537,10 +644,11 @@ void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){ VdbeSorter *pSorter = pCsr->pSorter; if( pSorter ){ sqlite3VdbeSorterReset(db, pSorter); sqlite3DbFree(db, pSorter->pUnpacked); + sqlite3DbFree(db, pSorter->aMemory); sqlite3DbFree(db, pSorter); pCsr->pSorter = 0; } } @@ -548,46 +656,54 @@ ** Allocate space for a file-handle and open a temporary file. If successful, ** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK. ** Otherwise, set *ppFile to 0 and return an SQLite error code. */ static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){ - int dummy; - return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile, + int rc; + rc = sqlite3OsOpenMalloc(db->pVfs, 0, ppFile, SQLITE_OPEN_TEMP_JOURNAL | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | - SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &dummy + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &rc ); + if( rc==SQLITE_OK ){ + i64 max = SQLITE_MAX_MMAP_SIZE; + sqlite3OsFileControlHint( *ppFile, SQLITE_FCNTL_MMAP_SIZE, (void*)&max); + } + return rc; } /* ** Merge the two sorted lists p1 and p2 into a single list. ** Set *ppOut to the head of the new list. +** +** In cases where key values are equal, keys from list p1 are considered +** to be smaller than list p2. */ static void vdbeSorterMerge( const VdbeCursor *pCsr, /* For pKeyInfo */ SorterRecord *p1, /* First list to merge */ SorterRecord *p2, /* Second list to merge */ SorterRecord **ppOut /* OUT: Head of merged list */ ){ SorterRecord *pFinal = 0; SorterRecord **pp = &pFinal; - void *pVal2 = p2 ? p2->pVal : 0; + void *pVal2 = p2 ? SRVAL(p2) : 0; while( p1 && p2 ){ int res; - vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res); + vdbeSorterCompare(pCsr, 0, SRVAL(p1), p1->nVal, pVal2, p2->nVal, &res); if( res<=0 ){ *pp = p1; - pp = &p1->pNext; - p1 = p1->pNext; + pp = &p1->u.pNext; + p1 = p1->u.pNext; pVal2 = 0; }else{ *pp = p2; - pp = &p2->pNext; - p2 = p2->pNext; + pp = &p2->u.pNext; + p2 = p2->u.pNext; if( p2==0 ) break; - pVal2 = p2->pVal; + pVal2 = SRVAL(p2); } } *pp = p1 ? p1 : p2; *ppOut = pFinal; } @@ -594,10 +710,15 @@ /* ** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK ** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error ** occurs. +** +** The sort is required to be stable - if two elements compare as equal +** then the one added to the sorter first is considered the smaller. +** Currently, the list is sorted from newest to oldest - pSorter->pRecord +** points to the most recently added sort key. */ static int vdbeSorterSort(const VdbeCursor *pCsr){ int i; SorterRecord **aSlot; SorterRecord *p; @@ -608,12 +729,22 @@ return SQLITE_NOMEM; } p = pSorter->pRecord; while( p ){ - SorterRecord *pNext = p->pNext; - p->pNext = 0; + SorterRecord *pNext; + if( pSorter->aMemory ){ + if( (u8*)p==pSorter->aMemory ){ + pNext = 0; + }else{ + assert( p->u.iNextnMemory ); + pNext = (SorterRecord*)&pSorter->aMemory[p->u.iNext]; + } + }else{ + pNext = p->u.pNext; + } + p->u.pNext = 0; for(i=0; aSlot[i]; i++){ vdbeSorterMerge(pCsr, p, aSlot[i], &p); aSlot[i] = 0; } aSlot[i] = p; @@ -714,10 +845,32 @@ u8 aByte[10]; nByte = sqlite3PutVarint(aByte, iVal); fileWriterWrite(p, aByte, nByte); } +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** The first argument is a file-handle open on a temporary file. The file +** is guaranteed to be nByte bytes or smaller in size. This function +** attempts to extend the file to nByte bytes in size and to ensure that +** the VFS has memory mapped it. +** +** Whether or not the file does end up memory mapped of course depends on +** the specific VFS implementation. +*/ +static void vdbeSorterExtendFile(sqlite3_file *pFile, i64 nByte){ + int rc = sqlite3OsTruncate(pFile, nByte); + if( rc==SQLITE_OK ){ + void *p = 0; + sqlite3OsFetch(pFile, 0, nByte, &p); + sqlite3OsUnfetch(pFile, 0, p); + } +} +#else +# define vdbeSorterExtendFile(x,y) +#endif + /* ** Write the current contents of the in-memory linked-list to a PMA. Return ** SQLITE_OK if successful, or an SQLite error code otherwise. ** ** The format of a PMA is: @@ -748,79 +901,131 @@ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); assert( rc!=SQLITE_OK || pSorter->pTemp1 ); assert( pSorter->iWriteOff==0 ); assert( pSorter->nPMA==0 ); } + + /* Try to get the file to memory map */ + if( rc==SQLITE_OK ){ + vdbeSorterExtendFile( + pSorter->pTemp1, pSorter->iWriteOff + pSorter->nInMemory + 9 + ); + } if( rc==SQLITE_OK ){ SorterRecord *p; SorterRecord *pNext = 0; fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff); pSorter->nPMA++; fileWriterWriteVarint(&writer, pSorter->nInMemory); for(p=pSorter->pRecord; p; p=pNext){ - pNext = p->pNext; + pNext = p->u.pNext; fileWriterWriteVarint(&writer, p->nVal); - fileWriterWrite(&writer, p->pVal, p->nVal); - sqlite3DbFree(db, p); + fileWriterWrite(&writer, SRVAL(p), p->nVal); + if( pSorter->aMemory==0 ) sqlite3DbFree(db, p); } pSorter->pRecord = p; rc = fileWriterFinish(db, &writer, &pSorter->iWriteOff); } + if( pSorter->aMemory ) pSorter->pRecord = 0; + assert( pSorter->pRecord==0 || rc!=SQLITE_OK ); return rc; } /* ** Add a record to the sorter. */ int sqlite3VdbeSorterWrite( sqlite3 *db, /* Database handle */ - const VdbeCursor *pCsr, /* Sorter cursor */ + const VdbeCursor *pCsr, /* Sorter cursor */ Mem *pVal /* Memory cell containing record */ ){ VdbeSorter *pSorter = pCsr->pSorter; int rc = SQLITE_OK; /* Return Code */ SorterRecord *pNew; /* New list element */ + int bFlush; /* True to flush contents of memory to PMA */ + int nReq; /* Bytes of memory required */ + int nPMA; /* Bytes of PMA space required */ + assert( pSorter ); - pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n; - - pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord)); - if( pNew==0 ){ - rc = SQLITE_NOMEM; - }else{ - pNew->pVal = (void *)&pNew[1]; - memcpy(pNew->pVal, pVal->z, pVal->n); - pNew->nVal = pVal->n; - pNew->pNext = pSorter->pRecord; - pSorter->pRecord = pNew; - } - - /* See if the contents of the sorter should now be written out. They - ** are written out when either of the following are true: + + /* Figure out whether or not the current contents of memory should be + ** flushed to a PMA before continuing. If so, do so. + ** + ** If using the single large allocation mode (pSorter->aMemory!=0), then + ** flush the contents of memory to a new PMA if (a) at least one value is + ** already in memory and (b) the new value will not fit in memory. + ** + ** Or, if using separate allocations for each record, flush the contents + ** of memory to a PMA if either of the following are true: ** ** * The total memory allocated for the in-memory list is greater ** than (page-size * cache-size), or ** ** * The total memory allocated for the in-memory list is greater ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true. */ - if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && ( - (pSorter->nInMemory>pSorter->mxPmaSize) - || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull()) - )){ + nReq = pVal->n + sizeof(SorterRecord); + nPMA = pVal->n + sqlite3VarintLen(pVal->n); + if( pSorter->aMemory ){ + bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize; + }else{ + bFlush = ( + (pSorter->nInMemory > pSorter->mxPmaSize) + || (pSorter->nInMemory > pSorter->mnPmaSize && sqlite3HeapNearlyFull()) + ); + } + if( bFlush ){ #ifdef SQLITE_DEBUG i64 nExpect = pSorter->iWriteOff - + sqlite3VarintLen(pSorter->nInMemory) - + pSorter->nInMemory; + + sqlite3VarintLen(pSorter->nInMemory) + + pSorter->nInMemory; #endif rc = vdbeSorterListToPMA(db, pCsr); pSorter->nInMemory = 0; + pSorter->iMemory = 0; assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) ); + assert( rc!=SQLITE_OK || pSorter->pRecord==0 ); + } + + pSorter->nInMemory += nPMA; + + if( pSorter->aMemory ){ + int nMin = pSorter->iMemory + nReq; + + if( nMin>pSorter->nMemory ){ + u8 *aNew; + int nNew = pSorter->nMemory * 2; + while( nNew < nMin ) nNew = nNew*2; + if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize; + if( nNew < nMin ) nNew = nMin; + + aNew = sqlite3Realloc(pSorter->aMemory, nNew); + if( !aNew ) return SQLITE_NOMEM; + pSorter->pRecord = (SorterRecord*) + (aNew + ((u8*)pSorter->pRecord - pSorter->aMemory)); + pSorter->aMemory = aNew; + pSorter->nMemory = nNew; + } + + pNew = (SorterRecord*)&pSorter->aMemory[pSorter->iMemory]; + pSorter->iMemory += ROUND8(nReq); + pNew->u.iNext = (u8*)(pSorter->pRecord) - pSorter->aMemory; + }else{ + pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n+sizeof(SorterRecord)); + if( pNew==0 ){ + return SQLITE_NOMEM; + } + pNew->u.pNext = pSorter->pRecord; } + + memcpy(SRVAL(pNew), pVal->z, pVal->n); + pNew->nVal = pVal->n; + pSorter->pRecord = pNew; return rc; } /* @@ -834,11 +1039,11 @@ VdbeSorter *pSorter = pCsr->pSorter; int rc = SQLITE_OK; /* Return code */ int i; /* Used to iterator through aIter[] */ i64 nByte = 0; /* Total bytes in all opened PMAs */ - /* Initialize the iterators. */ + /* Initialize the iterators. Iterator 0 contains the oldest data. */ for(i=0; iaIter[i]; rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte); pSorter->iReadOff = pIter->iEof; assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff ); @@ -891,11 +1096,11 @@ pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte); if( !pSorter->aIter ) return SQLITE_NOMEM; pSorter->aTree = (int *)&pSorter->aIter[N]; pSorter->nTree = N; - do { + while(1){ int iNew; /* Index of new, merged, PMA */ for(iNew=0; rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNTnPMA; iNew++ @@ -923,10 +1128,13 @@ /* Open the second temp file, if it is not already open. */ if( pTemp2==0 ){ assert( iWrite2==0 ); rc = vdbeSorterOpenTempFile(db, &pTemp2); + if( rc==SQLITE_OK ){ + vdbeSorterExtendFile(pTemp2, pSorter->iWriteOff); + } } if( rc==SQLITE_OK ){ int bEof = 0; fileWriterInit(db, pTemp2, &writer, iWrite2); @@ -941,10 +1149,11 @@ } rc2 = fileWriterFinish(db, &writer, &iWrite2); if( rc==SQLITE_OK ) rc = rc2; } } + if( rc ) break; if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ break; }else{ sqlite3_file *pTmp = pSorter->pTemp1; @@ -953,11 +1162,11 @@ pTemp2 = pTmp; pSorter->iWriteOff = iWrite2; pSorter->iReadOff = 0; iWrite2 = 0; } - }while( rc==SQLITE_OK ); + } if( pTemp2 ){ sqlite3OsCloseFree(pTemp2); } *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); @@ -1006,29 +1215,35 @@ ** ** Alternatively, if pIter2 contains the smaller of the two values, ** set aTree[i] to its index and update pIter1. If vdbeSorterCompare() ** was actually called above, then pSorter->pUnpacked now contains ** a value equivalent to pIter2. So set pKey2 to NULL to prevent - ** vdbeSorterCompare() from decoding pIter2 again. */ - if( iRes<=0 ){ + ** vdbeSorterCompare() from decoding pIter2 again. + ** + ** If the two values were equal, then the value from the oldest + ** PMA should be considered smaller. The VdbeSorter.aIter[] array + ** is sorted from oldest to newest, so pIter1 contains older values + ** than pIter2 iff (pIter1aTree[i] = (int)(pIter1 - pSorter->aIter); pIter2 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ]; pKey2 = pIter2->aKey; }else{ if( pIter1->pFile ) pKey2 = 0; pSorter->aTree[i] = (int)(pIter2 - pSorter->aIter); pIter1 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ]; } - } *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); } }else{ SorterRecord *pFree = pSorter->pRecord; - pSorter->pRecord = pFree->pNext; - pFree->pNext = 0; - vdbeSorterRecordFree(db, pFree); + pSorter->pRecord = pFree->u.pNext; + pFree->u.pNext = 0; + if( pSorter->aMemory==0 ){ + vdbeSorterRecordFree(db, pFree); + } *pbEof = !pSorter->pRecord; rc = SQLITE_OK; } return rc; } @@ -1047,11 +1262,11 @@ pIter = &pSorter->aIter[ pSorter->aTree[1] ]; *pnKey = pIter->nKey; pKey = pIter->aKey; }else{ *pnKey = pSorter->pRecord->nVal; - pKey = pSorter->pRecord->pVal; + pKey = SRVAL(pSorter->pRecord); } return pKey; } /* Index: test/sort.test ================================================================== --- test/sort.test +++ test/sort.test @@ -461,7 +461,30 @@ insert into b values (3, 1, 'yyy'); select a.id, b.id, b.text from a join b on (a.id = b.aId) order by a.id, b.text; } } {1 2 xxx 1 3 yyy 1 1 zzz} + + +#------------------------------------------------------------------------- +# Check that the sorter in vdbesort.c sorts in a stable fashion. +# +do_execsql_test sort-13.0 { + CREATE TABLE t10(a, b); +} +do_test sort-13.1 { + db transaction { + for {set i 0} {$i < 100000} {incr i} { + execsql { INSERT INTO t10 VALUES( $i/10, $i%10 ) } + } + } +} {} +do_execsql_test sort-13.2 { + SELECT a, b FROM t10 ORDER BY a; +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}] +do_execsql_test sort-13.3 { + PRAGMA cache_size = 5; + SELECT a, b FROM t10 ORDER BY a; +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}] + finish_test Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -1074,10 +1074,11 @@ set D "" } foreach opcode { Seek SeekGe SeekGt SeekLe SeekLt NotFound Last Rewind NoConflict Next Prev VNext VPrev VFilter + SorterSort SorterNext } { set color($opcode) $B } foreach opcode {ResultRow} { set color($opcode) $G @@ -1096,10 +1097,11 @@ set bSeenGoto 1 } if {$opcode=="Next" || $opcode=="Prev" || $opcode=="VNext" || $opcode=="VPrev" + || $opcode=="SorterNext" } { for {set i $p2} {$i<$addr} {incr i} { incr x($i) 2 } }