/ Changes On Branch server-edition
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch server-edition Excluding Merge-Ins

This is equivalent to a diff from b9a58daca8 to 093b9108ea

2017-06-28
20:21
Merge tserver fixes with this branch. (check-in: 58a0aab8fd user: dan tags: server-process-edition)
20:12
Fix bugs in test program tserver.c. (Leaf check-in: 093b9108ea user: dan tags: server-edition)
2017-06-20
19:20
Ensure that test tool "tserver" finalizes all statements before attempting to close a database handle. (check-in: d8568aacf0 user: dan tags: server-edition)
2017-05-06
17:12
Fix requirements marks and harmless compiler warnings. (check-in: 198ff4c01d user: drh tags: trunk)
16:04
Update this branch with latest trunk changes. (check-in: ed6bad67f5 user: dan tags: server-edition)
2017-05-04
11:13
Fix a collision of the "B0" identifier name between the termios.h header file and the SHA3 implementation in the shell. (check-in: b9a58daca8 user: drh tags: trunk)
2017-05-03
19:36
Remove the unused "sqlite3_stack_used" TCL command from the test harness. (check-in: e24b73820c user: drh tags: trunk)

Changes to main.mk.

65
66
67
68
69
70
71

72
73
74
75
76
77
78
         icu.o insert.o json1.o legacy.o loadext.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
         notify.o opcodes.o os.o os_unix.o os_win.o \
         pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \
         random.o resolve.o rowset.o rtree.o select.o sqlite3rbu.o status.o \

         table.o threads.o tokenize.o treeview.o trigger.o \
         update.o userauth.o util.o vacuum.o \
         vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
	 vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \
         utf.o vtab.o

LIBOBJ += sqlite3session.o







>







65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
         icu.o insert.o json1.o legacy.o loadext.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
         notify.o opcodes.o os.o os_unix.o os_win.o \
         pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \
         random.o resolve.o rowset.o rtree.o select.o sqlite3rbu.o status.o \
	 server.o \
         table.o threads.o tokenize.o treeview.o trigger.o \
         update.o userauth.o util.o vacuum.o \
         vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
	 vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \
         utf.o vtab.o

LIBOBJ += sqlite3session.o
140
141
142
143
144
145
146


147
148
149
150
151
152
153
  $(TOP)/src/pragma.h \
  $(TOP)/src/prepare.c \
  $(TOP)/src/printf.c \
  $(TOP)/src/random.c \
  $(TOP)/src/resolve.c \
  $(TOP)/src/rowset.c \
  $(TOP)/src/select.c \


  $(TOP)/src/status.c \
  $(TOP)/src/shell.c \
  $(TOP)/src/sqlite.h.in \
  $(TOP)/src/sqlite3ext.h \
  $(TOP)/src/sqliteInt.h \
  $(TOP)/src/sqliteLimit.h \
  $(TOP)/src/table.c \







>
>







141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  $(TOP)/src/pragma.h \
  $(TOP)/src/prepare.c \
  $(TOP)/src/printf.c \
  $(TOP)/src/random.c \
  $(TOP)/src/resolve.c \
  $(TOP)/src/rowset.c \
  $(TOP)/src/select.c \
  $(TOP)/src/server.c \
  $(TOP)/src/server.h \
  $(TOP)/src/status.c \
  $(TOP)/src/shell.c \
  $(TOP)/src/sqlite.h.in \
  $(TOP)/src/sqlite3ext.h \
  $(TOP)/src/sqliteInt.h \
  $(TOP)/src/sqliteLimit.h \
  $(TOP)/src/table.c \

Changes to src/btree.c.

5612
5613
5614
5615
5616
5617
5618

























































































































































































































































5619
5620
5621
5622
5623
5624
5625
  ){
    return btreePrevious(pCur, pRes);
  }
  pCur->ix--;
  return SQLITE_OK;
}


























































































































































































































































/*
** Allocate a new page from the database file.
**
** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
** has already been called on the new page.)  The new page has also
** been referenced and the calling routine is responsible for calling
** sqlite3PagerUnref() on the new page when it is done.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
  ){
    return btreePrevious(pCur, pRes);
  }
  pCur->ix--;
  return SQLITE_OK;
}

#ifdef SQLITE_SERVER_EDITION

#define SERVER_DEFAULT_FREELISTS      16
#define SERVER_DEFAULT_FREELIST_SIZE 128

/*
** Allocate the free-node and the first SERVER_DEFAULT_FREELISTS 
** trunk pages.
*/
static int allocateServerFreenode(BtShared *pBt){
  int rc;
  MemPage *pPage1 = pBt->pPage1;

  rc = sqlite3PagerWrite(pPage1->pDbPage);
  if( rc==SQLITE_OK ){
    Pgno pgnoNode = (++pBt->nPage);
    MemPage *pNode = 0;
    int i;

    put4byte(&pPage1->aData[32], pgnoNode);
    rc = btreeGetUnusedPage(pBt, pgnoNode, &pNode, PAGER_GET_NOCONTENT);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerWrite(pNode->pDbPage);
    }
    if( rc==SQLITE_OK ){
      put4byte(&pNode->aData[0], 0);
      put4byte(&pNode->aData[4], SERVER_DEFAULT_FREELISTS);
    }
    for(i=0; rc==SQLITE_OK && i<SERVER_DEFAULT_FREELISTS; i++){
      MemPage *pTrunk = 0;
      Pgno pgnoTrunk;
      if( ++pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;
      pgnoTrunk = pBt->nPage;

      rc = btreeGetUnusedPage(pBt, pgnoTrunk, &pTrunk, PAGER_GET_NOCONTENT);
      if( rc==SQLITE_OK ){
        rc = sqlite3PagerWrite(pTrunk->pDbPage);
      }
      if( rc==SQLITE_OK ){
        memset(pTrunk->aData, 0, 8);
        put4byte(&pNode->aData[8+i*4], pgnoTrunk);
      }
      releasePage(pTrunk);
    }
    releasePage(pNode);
  }

  return rc;
}

/*
** Return a reference to the first trunk page in one of the database free-lists.
** Allocate the database free-lists if required.
*/
static int findServerTrunk(BtShared *pBt, int bAlloc, MemPage **ppTrunk){
  MemPage *pPage1 = pBt->pPage1;
  MemPage *pNode = 0;             /* The node page */
  MemPage *pTrunk = 0;            /* The returned page */
  Pgno iNode;                     /* Page number of node page */
  int rc = SQLITE_OK;

  /* If the node page and free-list trunks have not yet been allocated, allocate
  ** them now.  */
  pPage1 = pBt->pPage1;
  iNode = get4byte(&pPage1->aData[32]);
  if( iNode==0 ){
    rc = allocateServerFreenode(pBt);
    iNode = get4byte(&pPage1->aData[32]);
  }

  /* Grab the node page */
  if( rc==SQLITE_OK ){
    rc = btreeGetUnusedPage(pBt, iNode, &pNode, 0);
  }
  if( rc==SQLITE_OK ){
    int nList;                    /* Number of free-lists in this db */
    int i;

    /* Try to lock a free-list trunk. If bAlloc is true, it has to be a
    ** free-list trunk with at least one entry in the free-list. */
    nList = (int)get4byte(&pNode->aData[4]);
    for(i=0; i<nList; i++){
      Pgno iTrunk = get4byte(&pNode->aData[8+i*4]);
      if( SQLITE_OK==sqlite3PagerPagelock(pBt->pPager, iTrunk, 1) ){
        rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0);
        if( rc==SQLITE_OK && bAlloc ){
          if( !get4byte(&pTrunk->aData[0]) && !get4byte(&pTrunk->aData[4]) ){
            releasePage(pTrunk);
            pTrunk = 0;
          }
        }
        if( rc!=SQLITE_OK || pTrunk ) break;
      }
    }

    /* No free pages in any free-list. Or perhaps we were locked out. In 
    ** either case, try to allocate more from the end of the file now.  */
    if( i==nList ){
      assert( rc==SQLITE_OK && pTrunk==0 );
      rc = sqlite3PagerWrite(pPage1->pDbPage);
      for(i=0; rc==SQLITE_OK && i<nList; i++){
        /* Add some free pages to each free-list. No server-locks are required
        ** to do this as we have a write-lock on page 1 - guaranteeing
        ** exclusive access to the db file.  */
        MemPage *pT = 0;
        Pgno iTrunk = get4byte(&pNode->aData[8+i*4]);
        rc = btreeGetUnusedPage(pBt, iTrunk, &pT, 0);
        if( rc==SQLITE_OK ){
          rc = sqlite3PagerWrite(pT->pDbPage);
        }
        if( rc==SQLITE_OK ){
          int iPg = get4byte(&pT->aData[4]);
          for(/*no-op*/; iPg<SERVER_DEFAULT_FREELIST_SIZE; iPg++){
            if( ++pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;
            put4byte(&pT->aData[8+iPg*4], pBt->nPage);
          }
          put4byte(&pT->aData[4], iPg);
          if( pTrunk==0 ){
            pTrunk = pT;
            pT = 0;
          }
        }
        releasePage(pT);
      }
      if( rc==SQLITE_OK ){
        MemPage *pLast = 0;
        rc = btreeGetUnusedPage(pBt, pBt->nPage, &pLast, 0);
        if( rc==SQLITE_OK ){
          rc = sqlite3PagerWrite(pLast->pDbPage);
          releasePage(pLast);
          put4byte(28 + (u8*)pPage1->aData, pBt->nPage);
        }
      }
    }
  }

  releasePage(pNode);
  if( rc==SQLITE_OK ){
    assert( pTrunk );
    rc = sqlite3PagerWrite(pTrunk->pDbPage);
  }
  if( rc!=SQLITE_OK ){
    releasePage(pTrunk);
    pTrunk = 0;
  }
  *ppTrunk = pTrunk;
  return rc;
}

static int allocateServerPage(
  BtShared *pBt,         /* The btree */
  MemPage **ppPage,      /* Store pointer to the allocated page here */
  Pgno *pPgno,           /* Store the page number here */
  Pgno nearby,           /* Search for a page near this one */
  u8 eMode               /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */
){
  int rc;                         /* Return code */
  MemPage *pTrunk = 0;            /* The node page */
  Pgno pgnoNew = 0;

#ifdef SQLITE_DEBUG
  int nRef = sqlite3PagerRefcount(pBt->pPager);
#endif

  assert( eMode==BTALLOC_ANY );
  assert( sqlite3_mutex_held(pBt->mutex) );

  *ppPage = 0;
  rc = findServerTrunk(pBt, 1, &pTrunk);
  if( rc==SQLITE_OK ){
    int nFree;              /* Number of free pages on this trunk page */
    nFree = (int)get4byte(&pTrunk->aData[4]);
    if( nFree==0 ){
      pgnoNew = get4byte(&pTrunk->aData[0]);
      assert( pgnoNew );
    }else{
      nFree--;
      pgnoNew = get4byte(&pTrunk->aData[8+4*nFree]);
      put4byte(&pTrunk->aData[4], (u32)nFree);
      releasePage(pTrunk);
      pTrunk = 0;
    }
  }

  if( rc==SQLITE_OK ){
    MemPage *pNew = 0;
    int flags = pTrunk ? 0 : PAGER_GET_NOCONTENT;
    rc = btreeGetUnusedPage(pBt, pgnoNew, &pNew, flags);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerWrite(pNew->pDbPage);
      if( rc!=SQLITE_OK ){
        releasePage(pNew);
        pNew = 0;
      }
    }
    if( rc==SQLITE_OK && pTrunk ){
      memcpy(pTrunk->aData, pNew->aData, pBt->usableSize);
    }
    *ppPage = pNew;
    *pPgno = pgnoNew;
  }

  releasePage(pTrunk);
  assert( (rc==SQLITE_OK)==(*ppPage!=0) );
  assert( sqlite3PagerRefcount(pBt->pPager)==(nRef+(*ppPage!=0)) );
  return rc;
}

static int freeServerPage2(BtShared *pBt, MemPage *pPage, Pgno iPage){
  int rc;                         /* Return code */
  MemPage *pTrunk = 0;            /* The node page */
#ifdef SQLITE_DEBUG
  int nRef = sqlite3PagerRefcount(pBt->pPager);
#endif

  assert( sqlite3_mutex_held(pBt->mutex) );
  rc = findServerTrunk(pBt, 0, &pTrunk);
  if( rc==SQLITE_OK ){
    int nFree;              /* Number of free pages on this trunk page */
    nFree = (int)get4byte(&pTrunk->aData[4]);
    if( nFree>=((pBt->usableSize / 4) - 2) ){
      if( pPage==0 ){
        rc = btreeGetUnusedPage(pBt, iPage, &pPage, 0);
      }else{
        sqlite3PagerRef(pPage->pDbPage);
      }
      rc = sqlite3PagerWrite(pPage->pDbPage);
      if( rc==SQLITE_OK ){
        memcpy(pPage->aData, pTrunk->aData, pBt->usableSize);
        put4byte(&pTrunk->aData[0], iPage);
        put4byte(&pTrunk->aData[4], 0);
      }
      releasePage(pPage);
    }else{
      put4byte(&pTrunk->aData[8+nFree*4], iPage);
      put4byte(&pTrunk->aData[4], (u32)nFree+1);
    }
    releasePage(pTrunk);
  }

  assert( nRef==sqlite3PagerRefcount(pBt->pPager) );
  return rc;
}

#else
# define allocateServerPage(v, w, x, y, z) SQLITE_OK
# define freeServerPage2(x, y, z) SQLITE_OK
#endif /* SQLITE_SERVER_EDITION */

/*
** Allocate a new page from the database file.
**
** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
** has already been called on the new page.)  The new page has also
** been referenced and the calling routine is responsible for calling
** sqlite3PagerUnref() on the new page when it is done.
5648
5649
5650
5651
5652
5653
5654




5655
5656
5657
5658
5659
5660
5661
  MemPage *pPage1;
  int rc;
  u32 n;     /* Number of pages on the freelist */
  u32 k;     /* Number of leaves on the trunk of the freelist */
  MemPage *pTrunk = 0;
  MemPage *pPrevTrunk = 0;
  Pgno mxPage;     /* Total size of the database file */





  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
  pPage1 = pBt->pPage1;
  mxPage = btreePagecount(pBt);
  /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
  ** stores stores the total number of pages on the freelist. */







>
>
>
>







5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
  MemPage *pPage1;
  int rc;
  u32 n;     /* Number of pages on the freelist */
  u32 k;     /* Number of leaves on the trunk of the freelist */
  MemPage *pTrunk = 0;
  MemPage *pPrevTrunk = 0;
  Pgno mxPage;     /* Total size of the database file */

  if( sqlite3PagerIsServer(pBt->pPager) ){
    return allocateServerPage(pBt, ppPage, pPgno, nearby, eMode); 
  }

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
  pPage1 = pBt->pPage1;
  mxPage = btreePagecount(pBt);
  /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
  ** stores stores the total number of pages on the freelist. */
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999











6000
6001
6002
6003
6004
6005
6006
  if( pMemPage ){
    pPage = pMemPage;
    sqlite3PagerRef(pPage->pDbPage);
  }else{
    pPage = btreePageLookup(pBt, iPage);
  }

  /* Increment the free page count on pPage1 */
  rc = sqlite3PagerWrite(pPage1->pDbPage);
  if( rc ) goto freepage_out;
  nFree = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], nFree+1);

  if( pBt->btsFlags & BTS_SECURE_DELETE ){
    /* If the secure_delete option is enabled, then
    ** always fully overwrite deleted information with zeros.
    */
    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
     ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
    ){
      goto freepage_out;
    }
    memset(pPage->aData, 0, pPage->pBt->pageSize);
  }












  /* If the database supports auto-vacuum, write an entry in the pointer-map
  ** to indicate that the page is free.
  */
  if( ISAUTOVACUUM ){
    ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc);
    if( rc ) goto freepage_out;







<
<
<
<
<
<











>
>
>
>
>
>
>
>
>
>
>







6229
6230
6231
6232
6233
6234
6235






6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
  if( pMemPage ){
    pPage = pMemPage;
    sqlite3PagerRef(pPage->pDbPage);
  }else{
    pPage = btreePageLookup(pBt, iPage);
  }







  if( pBt->btsFlags & BTS_SECURE_DELETE ){
    /* If the secure_delete option is enabled, then
    ** always fully overwrite deleted information with zeros.
    */
    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
     ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
    ){
      goto freepage_out;
    }
    memset(pPage->aData, 0, pPage->pBt->pageSize);
  }
  
  if( sqlite3PagerIsServer(pBt->pPager) ){
    rc = freeServerPage2(pBt, pPage, iPage);
    goto freepage_out;
  }

  /* Increment the free page count on pPage1 */
  rc = sqlite3PagerWrite(pPage1->pDbPage);
  if( rc ) goto freepage_out;
  nFree = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], nFree+1);

  /* If the database supports auto-vacuum, write an entry in the pointer-map
  ** to indicate that the page is free.
  */
  if( ISAUTOVACUUM ){
    ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc);
    if( rc ) goto freepage_out;
9440
9441
9442
9443
9444
9445
9446











































9447
9448
9449
9450
9451
9452
9453
  pCheck->v1 = saved_v1;
  pCheck->v2 = saved_v2;
  return depth+1;
}
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */

#ifndef SQLITE_OMIT_INTEGRITY_CHECK











































/*
** This routine does a complete check of the given BTree file.  aRoot[] is
** an array of pages numbers were each page number is the root page of
** a table.  nRoot is the number of entries in aRoot.
**
** A read-only or read-write transaction must be opened before calling
** this function.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
  pCheck->v1 = saved_v1;
  pCheck->v2 = saved_v2;
  return depth+1;
}
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */

#ifndef SQLITE_OMIT_INTEGRITY_CHECK

#if !defined(SQLITE_OMIT_INTEGRITY_CHECK) && defined(SQLITE_SERVER_EDITION)
static void checkServerList(IntegrityCk *pCheck){
  u32 pgnoNode = get4byte(&pCheck->pBt->pPage1->aData[32]);
  if( pgnoNode ){
    DbPage *pNode = 0;
    u8 *aNodeData;
    u32 nList;                    /* Number of free-lists */
    int i;

    checkRef(pCheck, pgnoNode);
    if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoNode, &pNode, 0) ){
      checkAppendMsg(pCheck, "failed to get node page %d", pgnoNode);
      return;
    }
    aNodeData = sqlite3PagerGetData(pNode);
    nList = get4byte(&aNodeData[4]);
    for(i=0; i<nList; i++){
      u32 pgnoTrunk = get4byte(&aNodeData[8+4*i]);
      while( pgnoTrunk ){
        DbPage *pTrunk = 0;
        checkRef(pCheck, pgnoTrunk);
        if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoTrunk, &pTrunk, 0) ){
          checkAppendMsg(pCheck, "failed to get page %d", pgnoTrunk);
          pgnoTrunk = 0;
        }else{
          u8 *aTrunkData = sqlite3PagerGetData(pTrunk);
          int nLeaf = (int)get4byte(&aTrunkData[4]);
          int iLeaf;
          for(iLeaf=0; iLeaf<nLeaf; iLeaf++){
            u32 pgnoLeaf = get4byte(&aTrunkData[8+iLeaf*4]);
            checkRef(pCheck, pgnoLeaf);
          }
          pgnoTrunk = get4byte(&aTrunkData[0]);
          sqlite3PagerUnref(pTrunk);
        }
      }
    }

    sqlite3PagerUnref(pNode);
  }
}
#endif
/*
** This routine does a complete check of the given BTree file.  aRoot[] is
** an array of pages numbers were each page number is the root page of
** a table.  nRoot is the number of entries in aRoot.
**
** A read-only or read-write transaction must be opened before calling
** this function.
9505
9506
9507
9508
9509
9510
9511






9512
9513

9514
9515
9516
9517
9518
9519
9520

  i = PENDING_BYTE_PAGE(pBt);
  if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);

  /* Check the integrity of the freelist
  */
  sCheck.zPfx = "Main freelist: ";






  checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
            get4byte(&pBt->pPage1->aData[36]));

  sCheck.zPfx = 0;

  /* Check all the tables.
  */
  testcase( pBt->db->flags & SQLITE_CellSizeCk );
  pBt->db->flags &= ~SQLITE_CellSizeCk;
  for(i=0; (int)i<nRoot && sCheck.mxErr; i++){







>
>
>
>
>
>
|
|
>







9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828

  i = PENDING_BYTE_PAGE(pBt);
  if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);

  /* Check the integrity of the freelist
  */
  sCheck.zPfx = "Main freelist: ";
#ifdef SQLITE_SERVER_EDITION
  if( sqlite3PagerIsServer(pBt->pPager) ){
    checkServerList(&sCheck);
  }else
#endif
  {
    checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
        get4byte(&pBt->pPage1->aData[36]));
  }
  sCheck.zPfx = 0;

  /* Check all the tables.
  */
  testcase( pBt->db->flags & SQLITE_CellSizeCk );
  pBt->db->flags &= ~SQLITE_CellSizeCk;
  for(i=0; (int)i<nRoot && sCheck.mxErr; i++){

Changes to src/pager.c.

702
703
704
705
706
707
708



709
710
711
712
713
714
715
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  PCache *pPCache;            /* Pointer to page cache object */
#ifndef SQLITE_OMIT_WAL
  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
  char *zWal;                 /* File name for write-ahead log */
#endif



};

/*
** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains
** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS 
** or CACHE_WRITE to sqlite3_db_status().
*/







>
>
>







702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  PCache *pPCache;            /* Pointer to page cache object */
#ifndef SQLITE_OMIT_WAL
  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
  char *zWal;                 /* File name for write-ahead log */
#endif
#ifdef SQLITE_SERVER_EDITION
  Server *pServer;
#endif
};

/*
** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains
** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS 
** or CACHE_WRITE to sqlite3_db_status().
*/
831
832
833
834
835
836
837






838
839
840
841
842
843
844
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif







#ifndef NDEBUG 
/*
** Usage:
**
**   assert( assert_pager_state(pPager) );
**







>
>
>
>
>
>







834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

#ifdef SQLITE_SERVER_EDITION
# define pagerIsServer(x) ((x)->pServer!=0)
#else
# define pagerIsServer(x) 0
#endif

#ifndef NDEBUG 
/*
** Usage:
**
**   assert( assert_pager_state(pPager) );
**
1128
1129
1130
1131
1132
1133
1134

1135
1136
1137
1138
1139
1140
1141
*/
static int pagerUnlockDb(Pager *pPager, int eLock){
  int rc = SQLITE_OK;

  assert( !pPager->exclusiveMode || pPager->eLock==eLock );
  assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
  assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );

  if( isOpen(pPager->fd) ){
    assert( pPager->eLock>=eLock );
    rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock);
    if( pPager->eLock!=UNKNOWN_LOCK ){
      pPager->eLock = (u8)eLock;
    }
    IOTRACE(("UNLOCK %p %d\n", pPager, eLock))







>







1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
*/
static int pagerUnlockDb(Pager *pPager, int eLock){
  int rc = SQLITE_OK;

  assert( !pPager->exclusiveMode || pPager->eLock==eLock );
  assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
  assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );
  assert( eLock!=NO_LOCK || pagerIsServer(pPager)==0 );
  if( isOpen(pPager->fd) ){
    assert( pPager->eLock>=eLock );
    rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock);
    if( pPager->eLock!=UNKNOWN_LOCK ){
      pPager->eLock = (u8)eLock;
    }
    IOTRACE(("UNLOCK %p %d\n", pPager, eLock))
1803
1804
1805
1806
1807
1808
1809






1810
1811
1812
1813
1814
1815
1816
       || pPager->eState==PAGER_ERROR 
  );

  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  releaseAllSavepoints(pPager);







  if( pagerUseWal(pPager) ){
    assert( !isOpen(pPager->jfd) );
    sqlite3WalEndReadTransaction(pPager->pWal);
    pPager->eState = PAGER_OPEN;
  }else if( !pPager->exclusiveMode ){
    int rc;                       /* Error code returned by pagerUnlockDb() */
    int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;







>
>
>
>
>
>







1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
       || pPager->eState==PAGER_ERROR 
  );

  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  releaseAllSavepoints(pPager);

#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    sqlite3ServerEnd(pPager->pServer);
    pPager->eState = PAGER_OPEN;
  }else 
#endif
  if( pagerUseWal(pPager) ){
    assert( !isOpen(pPager->jfd) );
    sqlite3WalEndReadTransaction(pPager->pWal);
    pPager->eState = PAGER_OPEN;
  }else if( !pPager->exclusiveMode ){
    int rc;                       /* Error code returned by pagerUnlockDb() */
    int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;
2101
2102
2103
2104
2105
2106
2107





2108
2109
2110
2111
2112
2113
2114
  }

  if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){
    rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0);
    if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
  }






  if( !pPager->exclusiveMode 
   && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
  ){
    rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
    pPager->changeCountDone = 0;
  }
  pPager->eState = PAGER_READER;







>
>
>
>
>







2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
  }

  if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){
    rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0);
    if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
  }

#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    rc2 = sqlite3ServerReleaseWriteLocks(pPager->pServer);
  }else
#endif
  if( !pPager->exclusiveMode 
   && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
  ){
    rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
    pPager->changeCountDone = 0;
  }
  pPager->eState = PAGER_READER;
4091
4092
4093
4094
4095
4096
4097







4098
4099
4100
4101







4102
4103
4104
4105
4106
4107
4108
    ** If an error occurs while trying to sync the journal, shift the pager
    ** into the ERROR state. This causes UnlockAndRollback to unlock the
    ** database and close the journal file without attempting to roll it
    ** back or finalize it. The next database user will have to do hot-journal
    ** rollback before accessing the database file.
    */
    if( isOpen(pPager->jfd) ){







      pager_error(pPager, pagerSyncHotJournal(pPager));
    }
    pagerUnlockAndRollback(pPager);
  }







  sqlite3EndBenignMalloc();
  enable_simulated_io_errors();
  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
  IOTRACE(("CLOSE %p\n", pPager))
  sqlite3OsClose(pPager->jfd);
  sqlite3OsClose(pPager->fd);
  sqlite3PageFree(pTmp);







>
>
>
>
>
>
>




>
>
>
>
>
>
>







4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
    ** If an error occurs while trying to sync the journal, shift the pager
    ** into the ERROR state. This causes UnlockAndRollback to unlock the
    ** database and close the journal file without attempting to roll it
    ** back or finalize it. The next database user will have to do hot-journal
    ** rollback before accessing the database file.
    */
    if( isOpen(pPager->jfd) ){
      if( pagerIsServer(pPager) ){
        assert( pPager->journalMode==PAGER_JOURNALMODE_PERSIST );
        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
        /* If necessary, change the pager state so that the journal file 
        ** is deleted by the call to pagerUnlockAndRollback() below.  */
        if( pPager->eState==PAGER_OPEN ) pPager->eState = PAGER_READER;
      }
      pager_error(pPager, pagerSyncHotJournal(pPager));
    }
    pagerUnlockAndRollback(pPager);
  }
#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    sqlite3ServerDisconnect(pPager->pServer, pPager->fd);
    pPager->pServer = 0;
    sqlite3_free(pPager->zJournal);
  }
#endif
  sqlite3EndBenignMalloc();
  enable_simulated_io_errors();
  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
  IOTRACE(("CLOSE %p\n", pPager))
  sqlite3OsClose(pPager->jfd);
  sqlite3OsClose(pPager->fd);
  sqlite3PageFree(pTmp);
5046
5047
5048
5049
5050
5051
5052










































































5053
5054
5055
5056
5057
5058
5059
        }
      }
    }
  }

  return rc;
}











































































/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerGet() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
        }
      }
    }
  }

  return rc;
}

#ifdef SQLITE_SERVER_EDITION
static int pagerServerConnect(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->tempFile==0 ){
    int iClient = 0;
    pPager->noLock = 1;
    pPager->journalMode = PAGER_JOURNALMODE_PERSIST;
    rc = sqlite3ServerConnect(pPager, &pPager->pServer, &iClient);
    if( rc==SQLITE_OK ){
      pPager->zJournal = sqlite3_mprintf(
          "%s-journal%d", pPager->zFilename, iClient
      );
      if( pPager->zJournal==0 ){
        rc = SQLITE_NOMEM_BKPT;
      }
    }
  }
  return rc;
}

int sqlite3PagerRollbackJournal(Pager *pPager, int iClient){
  int rc;
  char *zJrnl = sqlite3_mprintf("%s-journal%d", pPager->zFilename, iClient);

  if( zJrnl ){
    int bExists = 0;
    sqlite3_file *jfd = 0;
    sqlite3_vfs * const pVfs = pPager->pVfs;

    rc = sqlite3OsAccess(pVfs, zJrnl, SQLITE_ACCESS_EXISTS, &bExists);
    if( rc==SQLITE_OK && bExists ){
      int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
      rc = sqlite3OsOpenMalloc(pVfs, zJrnl, &jfd, flags, &flags);
    }
    assert( rc==SQLITE_OK || jfd==0 );
    if( jfd ){
      sqlite3_file *saved_jfd = pPager->jfd;
      u8 saved_eState = pPager->eState;
      u8 saved_eLock = pPager->eLock;
      i64 saved_journalOff = pPager->journalOff;
      i64 saved_journalHdr = pPager->journalHdr;
      char *saved_zJournal = pPager->zJournal;

      pPager->eLock = EXCLUSIVE_LOCK;
      pPager->eState = PAGER_WRITER_DBMOD;
      pPager->jfd = jfd;
      rc = pagerSyncHotJournal(pPager);
      if( rc==SQLITE_OK ) rc = pager_playback(pPager, 1);

      pPager->jfd = saved_jfd;
      pPager->eState = saved_eState;
      pPager->eLock = saved_eLock;
      pPager->journalOff = saved_journalOff;
      pPager->journalHdr = saved_journalHdr;
      pPager->zJournal = saved_zJournal;

      sqlite3OsCloseFree(jfd);
      if( rc==SQLITE_OK ){
        rc = sqlite3OsDelete(pVfs, zJrnl, 0);
      }
    }
    sqlite3_free(zJrnl);
  }else{
    rc = SQLITE_NOMEM_BKPT;
  }

  return rc;
}

#else
# define pagerServerConnect(pPager) SQLITE_OK
#endif


/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerGet() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**
5086
5087
5088
5089
5090
5091
5092
5093


5094
5095
5096
5097
5098
5099
5100
  ** be OPEN or READER. READER is only possible if the pager is or was in 
  ** exclusive access mode.  */
  assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
  assert( assert_pager_state(pPager) );
  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
  assert( pPager->errCode==SQLITE_OK );

  if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){


    int bHotJournal = 1;          /* True if there exists a hot journal-file */

    assert( !MEMDB );
    assert( pPager->tempFile==0 || pPager->eLock==EXCLUSIVE_LOCK );

    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
    if( rc!=SQLITE_OK ){







|
>
>







5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
  ** be OPEN or READER. READER is only possible if the pager is or was in 
  ** exclusive access mode.  */
  assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
  assert( assert_pager_state(pPager) );
  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
  assert( pPager->errCode==SQLITE_OK );

  if( !pagerUseWal(pPager) 
   && !pagerIsServer(pPager) 
   && pPager->eState==PAGER_OPEN ){
    int bHotJournal = 1;          /* True if there exists a hot journal-file */

    assert( !MEMDB );
    assert( pPager->tempFile==0 || pPager->eLock==EXCLUSIVE_LOCK );

    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
    if( rc!=SQLITE_OK ){
5257
5258
5259
5260
5261
5262
5263


5264
5265
5266
5267

5268

5269
5270
5271
5272
5273







5274
5275
5276
5277
5278
5279
5280
5281
        ** to be the right size but is not actually valid. Avoid this
        ** possibility by unmapping the db here. */
        if( USEFETCH(pPager) ){
          sqlite3OsUnfetch(pPager->fd, 0, 0);
        }
      }
    }



    /* If there is a WAL file in the file-system, open this database in WAL
    ** mode. Otherwise, the following function call is a no-op.
    */

    rc = pagerOpenWalIfPresent(pPager);

#ifndef SQLITE_OMIT_WAL
    assert( pPager->pWal==0 || rc==SQLITE_OK );
#endif
  }








  if( pagerUseWal(pPager) ){
    assert( rc==SQLITE_OK );
    rc = pagerBeginReadTransaction(pPager);
  }

  if( pPager->tempFile==0 && pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
    rc = pagerPagecount(pPager, &pPager->dbSize);
  }







>
>




>
|
>





>
>
>
>
>
>
>
|







5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
        ** to be the right size but is not actually valid. Avoid this
        ** possibility by unmapping the db here. */
        if( USEFETCH(pPager) ){
          sqlite3OsUnfetch(pPager->fd, 0, 0);
        }
      }
    }

    rc = pagerServerConnect(pPager);

    /* If there is a WAL file in the file-system, open this database in WAL
    ** mode. Otherwise, the following function call is a no-op.
    */
    if( rc==SQLITE_OK ){
      rc = pagerOpenWalIfPresent(pPager);
    }
#ifndef SQLITE_OMIT_WAL
    assert( pPager->pWal==0 || rc==SQLITE_OK );
#endif
  }

#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    assert( rc==SQLITE_OK );
    pager_reset(pPager);
    rc = sqlite3ServerBegin(pPager->pServer);
  }
#endif
  if( rc==SQLITE_OK && pagerUseWal(pPager) ){
    assert( rc==SQLITE_OK );
    rc = pagerBeginReadTransaction(pPager);
  }

  if( pPager->tempFile==0 && pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
    rc = pagerPagecount(pPager, &pPager->dbSize);
  }
5560
5561
5562
5563
5564
5565
5566






5567
5568
5569
5570
5571
5572
5573
*/
int sqlite3PagerGet(
  Pager *pPager,      /* The pager open on the database file */
  Pgno pgno,          /* Page number to fetch */
  DbPage **ppPage,    /* Write a pointer to the page here */
  int flags           /* PAGER_GET_XXX flags */
){






  return pPager->xGet(pPager, pgno, ppPage, flags);
}

/*
** Acquire a page if it is already in the in-memory cache.  Do
** not read the page from disk.  Return a pointer to the page,
** or 0 if the page is not in cache. 







>
>
>
>
>
>







5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
*/
int sqlite3PagerGet(
  Pager *pPager,      /* The pager open on the database file */
  Pgno pgno,          /* Page number to fetch */
  DbPage **ppPage,    /* Write a pointer to the page here */
  int flags           /* PAGER_GET_XXX flags */
){
#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    int rc = sqlite3ServerLock(pPager->pServer, pgno, 0, 0);
    if( rc!=SQLITE_OK ) return rc;
  }
#endif
  return pPager->xGet(pPager, pgno, ppPage, flags);
}

/*
** Acquire a page if it is already in the in-memory cache.  Do
** not read the page from disk.  Return a pointer to the page,
** or 0 if the page is not in cache. 
5861
5862
5863
5864
5865
5866
5867







5868
5869
5870
5871
5872
5873
5874
       || pPager->eState==PAGER_WRITER_CACHEMOD
       || pPager->eState==PAGER_WRITER_DBMOD
  );
  assert( assert_pager_state(pPager) );
  assert( pPager->errCode==0 );
  assert( pPager->readOnly==0 );
  CHECK_PAGE(pPg);








  /* The journal file needs to be opened. Higher level routines have already
  ** obtained the necessary locks to begin the write-transaction, but the
  ** rollback journal might not yet be open. Open it now if this is the case.
  **
  ** This is done before calling sqlite3PcacheMakeDirty() on the page. 
  ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then







>
>
>
>
>
>
>







5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
       || pPager->eState==PAGER_WRITER_CACHEMOD
       || pPager->eState==PAGER_WRITER_DBMOD
  );
  assert( assert_pager_state(pPager) );
  assert( pPager->errCode==0 );
  assert( pPager->readOnly==0 );
  CHECK_PAGE(pPg);

#ifdef SQLITE_SERVER_EDITION
  if( pagerIsServer(pPager) ){
    rc = sqlite3ServerLock(pPager->pServer, pPg->pgno, 1, 0);
    if( rc!=SQLITE_OK ) return rc;
  }
#endif

  /* The journal file needs to be opened. Higher level routines have already
  ** obtained the necessary locks to begin the write-transaction, but the
  ** rollback journal might not yet be open. Open it now if this is the case.
  **
  ** This is done before calling sqlite3PcacheMakeDirty() on the page. 
  ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
6140
6141
6142
6143
6144
6145
6146


6147

6148
6149
6150
6151
6152
6153
6154
# define DIRECT_MODE 0
  assert( isDirectMode==0 );
  UNUSED_PARAMETER(isDirectMode);
#else
# define DIRECT_MODE isDirectMode
#endif



  if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){

    PgHdr *pPgHdr;                /* Reference to page 1 */

    assert( !pPager->tempFile && isOpen(pPager->fd) );

    /* Open page 1 of the file for writing. */
    rc = sqlite3PagerGet(pPager, 1, &pPgHdr, 0);
    assert( pPgHdr==0 || rc==SQLITE_OK );







>
>
|
>







6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
# define DIRECT_MODE 0
  assert( isDirectMode==0 );
  UNUSED_PARAMETER(isDirectMode);
#else
# define DIRECT_MODE isDirectMode
#endif

  if( 0==pagerIsServer(pPager) 
   && !pPager->changeCountDone 
   && ALWAYS(pPager->dbSize>0) 
  ){
    PgHdr *pPgHdr;                /* Reference to page 1 */

    assert( !pPager->tempFile && isOpen(pPager->fd) );

    /* Open page 1 of the file for writing. */
    rc = sqlite3PagerGet(pPager, 1, &pPgHdr, 0);
    assert( pPgHdr==0 || rc==SQLITE_OK );
6299
6300
6301
6302
6303
6304
6305




6306
6307
6308
6309
6310
6311
6312
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{




    if( pagerUseWal(pPager) ){
      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
      PgHdr *pPageOne = 0;
      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);







>
>
>
>







6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{
    /* If this connection is in server mode, ignore any master journal. */
    if( pagerIsServer(pPager) ){
      zMaster = 0;
    }
    if( pagerUseWal(pPager) ){
      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
      PgHdr *pPageOne = 0;
      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323

/*
** Return true if the underlying VFS for the given pager supports the
** primitives necessary for write-ahead logging.
*/
int sqlite3PagerWalSupported(Pager *pPager){
  const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
  if( pPager->noLock ) return 0;
  return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
}

/*
** Attempt to take an exclusive lock on the database file. If a PENDING lock
** is obtained instead, immediately release it.
*/







|







7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465

/*
** Return true if the underlying VFS for the given pager supports the
** primitives necessary for write-ahead logging.
*/
int sqlite3PagerWalSupported(Pager *pPager){
  const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
  if( pPager->noLock && !pagerIsServer(pPager) ) return 0;
  return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
}

/*
** Attempt to take an exclusive lock on the database file. If a PENDING lock
** is obtained instead, immediately release it.
*/
7404
7405
7406
7407
7408
7409
7410



7411
7412
7413
7414
7415
7416
7417
    /* Close any rollback journal previously open */
    sqlite3OsClose(pPager->jfd);

    rc = pagerOpenWal(pPager);
    if( rc==SQLITE_OK ){
      pPager->journalMode = PAGER_JOURNALMODE_WAL;
      pPager->eState = PAGER_OPEN;



    }
  }else{
    *pbOpen = 1;
  }

  return rc;
}







>
>
>







7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
    /* Close any rollback journal previously open */
    sqlite3OsClose(pPager->jfd);

    rc = pagerOpenWal(pPager);
    if( rc==SQLITE_OK ){
      pPager->journalMode = PAGER_JOURNALMODE_WAL;
      pPager->eState = PAGER_OPEN;
#ifdef SQLITE_SERVER_EDITION
      sqlite3WalServer(pPager->pWal, pPager->pServer);
#endif
    }
  }else{
    *pbOpen = 1;
  }

  return rc;
}
7516
7517
7518
7519
7520
7521
7522
7523









7524
** is empty, return 0.
*/
int sqlite3PagerWalFramesize(Pager *pPager){
  assert( pPager->eState>=PAGER_READER );
  return sqlite3WalFramesize(pPager->pWal);
}
#endif










#endif /* SQLITE_OMIT_DISKIO */








>
>
>
>
>
>
>
>
>

7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
** is empty, return 0.
*/
int sqlite3PagerWalFramesize(Pager *pPager){
  assert( pPager->eState>=PAGER_READER );
  return sqlite3WalFramesize(pPager->pWal);
}
#endif

#ifdef SQLITE_SERVER_EDITION
int sqlite3PagerIsServer(Pager *pPager){
  return pagerIsServer(pPager);
}
int sqlite3PagerPagelock(Pager *pPager, Pgno pgno, int bWrite){
  return sqlite3ServerLock(pPager->pServer, pgno, bWrite, 0);
}
#endif

#endif /* SQLITE_OMIT_DISKIO */

Changes to src/pager.h.

231
232
233
234
235
236
237






238
239
  void sqlite3PagerRefdump(Pager*);
  void disable_simulated_io_errors(void);
  void enable_simulated_io_errors(void);
#else
# define disable_simulated_io_errors()
# define enable_simulated_io_errors()
#endif







#endif /* SQLITE_PAGER_H */







>
>
>
>
>
>


231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
  void sqlite3PagerRefdump(Pager*);
  void disable_simulated_io_errors(void);
  void enable_simulated_io_errors(void);
#else
# define disable_simulated_io_errors()
# define enable_simulated_io_errors()
#endif

#ifdef SQLITE_SERVER_EDITION
  int sqlite3PagerRollbackJournal(Pager*, int);
  int sqlite3PagerIsServer(Pager *pPager);
  int sqlite3PagerPagelock(Pager *pPager, Pgno, int);
#endif

#endif /* SQLITE_PAGER_H */

Added src/server.c.



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/*
** 2017 April 24
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

#include "sqliteInt.h"

/*
** HMA file layout:
**
**      4 bytes - DMS slot. All connections read-lock this slot.
**
**   16*4 bytes - locking slots. Connections hold a read-lock on a locking slot
**                when they are connected, a write lock when they have an open
**                transaction.
**
**    N*4 bytes - Page locking slots. N is HMA_PAGELOCK_SLOTS.
**
** Page-locking slot format:
**
**   Each page-locking slot provides SHARED/RESERVED/EXCLUSIVE locks on a
**   single page. A RESERVED lock is similar to a RESERVED in SQLite's
**   rollback mode - existing SHARED locks may continue but new SHARED locks
**   may not be established. As in rollback mode, EXCLUSIVE and RESERVED 
**   locks are mutually exclusive.
**
**   Each 32-bit locking slot is divided into two sections - a bitmask for
**   read-locks and a single integer field for the write lock. The bitmask
**   occupies the least-significant 27 bits of the slot. The integer field
**   occupies the remaining 5 bits (so that it can store values from 0-31).
**
**   Each client has a unique integer client id. Currently these range from
**   0-15 (maximum of 16 concurrent connections). The page-locking slot format
**   allows this to be increased to 0-26 (maximum of 26 connections). To
**   take a SHARED lock, the corresponding bit is set in the locking slot
**   bitmask:
**
**     slot = slot | (1 << iClient);
**
**   To take an EXCLUSIVE or RESERVED lock, the integer part of the locking
**   slot is set to the client-id of the locker plus one (a value of zero 
**   indicates that no connection holds a RESERVED or EXCLUSIVE lock):
**
**     slot = slot | ((iClient+1) << 27)
*/

#ifdef SQLITE_SERVER_EDITION

#define HMA_CLIENT_SLOTS   16
#define HMA_PAGELOCK_SLOTS (256*1024)

#define HMA_FILE_SIZE (4 + 4*HMA_CLIENT_SLOTS + 4*HMA_PAGELOCK_SLOTS)

#include "unistd.h"
#include "fcntl.h"
#include "sys/mman.h"
#include "sys/types.h"
#include "sys/stat.h"
#include "errno.h"

typedef struct ServerHMA ServerHMA;

struct ServerGlobal {
  ServerHMA *pHma;                /* Linked list of all ServerHMA objects */
};
static struct ServerGlobal g_server;

/*
** There is one instance of the following structure for each distinct 
** HMA file opened by clients within this process. 
*/
struct ServerHMA {
  char *zName;                         /* hma file path */
  int fd;                              /* Fd open on hma file */
  int nClient;                         /* Current number of clients */
  Server *aClient[HMA_CLIENT_SLOTS];   /* Local (this process) clients */
  u32 *aMap;                           /* MMapped hma file */
  ServerHMA *pNext;                    /* Next HMA in this process */

  dev_t st_dev;
  ino_t st_ino;
};

struct Server {
  ServerHMA *pHma;                /* Hma file object */
  int iClient;                    /* Client id */
  Pager *pPager;                  /* Associated pager object */
  i64 nUsWrite;                   /* Cumulative us holding WRITER lock */
  i64 iUsWrite;                   /* Time WRITER lock was taken */
  int nAlloc;                     /* Allocated size of aLock[] array */
  int nLock;                      /* Number of entries in aLock[] */
  u32 *aLock;                     /* Mapped lock file */
};

#define SERVER_WRITE_LOCK 3
#define SERVER_READ_LOCK  2
#define SERVER_NO_LOCK    1

/*
** Global mutex functions used by code in this file.
*/
static void serverEnterMutex(void){
  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1));
}
static void serverLeaveMutex(void){
  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1));
}
static void serverAssertMutexHeld(void){
  assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_APP1)) );
}

static int posixLock(int fd, int iSlot, int eLock, int bBlock){
  int res;
  struct flock l;
  short aType[4] = {0, F_UNLCK, F_RDLCK, F_WRLCK};
  assert( eLock==SERVER_WRITE_LOCK 
       || eLock==SERVER_READ_LOCK 
       || eLock==SERVER_NO_LOCK 
  );
  memset(&l, 0, sizeof(l));
  l.l_type = aType[eLock];
  l.l_whence = SEEK_SET;
  l.l_start = iSlot*sizeof(u32);
  l.l_len = 1;

  res = fcntl(fd, (bBlock ? F_SETLKW : F_SETLK), &l);
  if( res && bBlock && errno==EDEADLK ){
    return SQLITE_BUSY_DEADLOCK;
  }
  return (res==0 ? SQLITE_OK : SQLITE_BUSY);
}

static int serverMapFile(ServerHMA *p){
  assert( p->aMap==0 );
  p->aMap = mmap(0, HMA_FILE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0);
  if( p->aMap==0 ){
    return SQLITE_ERROR;
  }
  return SQLITE_OK;
}


static void serverDecrHmaRefcount(ServerHMA *pHma){
  if( pHma ){
    pHma->nClient--;
    if( pHma->nClient<=0 ){
      ServerHMA **pp;
      if( pHma->aMap ) munmap(pHma->aMap, HMA_FILE_SIZE);
      if( pHma->fd>=0 ) close(pHma->fd);
      for(pp=&g_server.pHma; *pp!=pHma; pp=&(*pp)->pNext);
      *pp = pHma->pNext;
      sqlite3_free(pHma);
    }
  }
}


static int serverOpenHma(Pager *pPager, const char *zPath, ServerHMA **ppHma){
  struct stat sStat;              /* Structure populated by stat() */
  int res;                        /* result of stat() */
  int rc = SQLITE_OK;             /* Return code */
  ServerHMA *pHma = 0;

  serverAssertMutexHeld();

  res = stat(zPath, &sStat);
  if( res!=0 ){
    sqlite3_log(SQLITE_CANTOPEN, "Failed to stat(%s)", zPath);
    rc = SQLITE_ERROR;
  }else{
    for(pHma=g_server.pHma; pHma; pHma=pHma->pNext){
      if( sStat.st_dev==pHma->st_dev && sStat.st_ino==pHma->st_ino ) break;
    }
    if( pHma==0 ){
      int nPath = strlen(zPath);
      int nByte = sizeof(ServerHMA) + nPath+1 + 4;

      pHma = (ServerHMA*)sqlite3_malloc(nByte);
      if( pHma==0 ){
        rc = SQLITE_NOMEM;
      }else{
        int i;
        memset(pHma, 0, nByte);
        pHma->zName = (char*)&pHma[1];
        pHma->nClient = 1;
        pHma->st_dev = sStat.st_dev;
        pHma->st_ino = sStat.st_ino;
        pHma->pNext = g_server.pHma;
        g_server.pHma = pHma;

        memcpy(pHma->zName, zPath, nPath);
        memcpy(&pHma->zName[nPath], "-hma", 5);

        pHma->fd = open(pHma->zName, O_RDWR|O_CREAT, 0644);
        if( pHma->fd<0 ){
          sqlite3_log(SQLITE_CANTOPEN, "Failed to open(%s)", pHma->zName);
          rc = SQLITE_ERROR;
        }

        if( rc==SQLITE_OK ){
          /* Write-lock the DMS slot. If successful, initialize the hma file. */
          rc = posixLock(pHma->fd, 0, SERVER_WRITE_LOCK, 0);
          if( rc==SQLITE_OK ){
            res = ftruncate(pHma->fd, HMA_FILE_SIZE);
            if( res!=0 ){
              sqlite3_log(SQLITE_CANTOPEN, 
                  "Failed to ftruncate(%s)", pHma->zName
              );
              rc = SQLITE_ERROR;
            }
            if( rc==SQLITE_OK ){
              rc = serverMapFile(pHma);
            }
            if( rc==SQLITE_OK ){
              memset(pHma->aMap, 0, HMA_FILE_SIZE);
            }else{
              rc = SQLITE_ERROR;
            }
            for(i=0; rc==SQLITE_OK && i<HMA_CLIENT_SLOTS; i++){
              rc = sqlite3PagerRollbackJournal(pPager, i);
            }
          }else{
            rc = serverMapFile(pHma);
          }
          if( rc==SQLITE_OK ){
            rc = posixLock(pHma->fd, 0, SERVER_READ_LOCK, 1);
          }
        }

        if( rc!=SQLITE_OK ){
          serverDecrHmaRefcount(pHma);
          pHma = 0;
        }
      }
    }else{
      pHma->nClient++;
    }
  }

  *ppHma = pHma;
  return rc;
}

static u32 *serverPageLockSlot(Server *p, Pgno pgno){
  int iSlot = pgno % HMA_PAGELOCK_SLOTS;
  return &p->pHma->aMap[1 + HMA_CLIENT_SLOTS + iSlot];
}
static u32 *serverClientSlot(Server *p, int iClient){
  return &p->pHma->aMap[1 + iClient];
}

/*
** Close the "connection" and *-hma file. This deletes the object passed
** as the first argument.
*/
void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd){
  if( p->pHma ){
    ServerHMA *pHma = p->pHma;
    serverEnterMutex();
    if( p->iClient>=0 ){
      u32 *pSlot = serverClientSlot(p, p->iClient);
      *pSlot = 0;
      assert( pHma->aClient[p->iClient]==p );
      pHma->aClient[p->iClient] = 0;
      posixLock(pHma->fd, p->iClient+1, SERVER_NO_LOCK, 0);
    }
    if( dbfd 
     && pHma->nClient==1 
     && SQLITE_OK==sqlite3OsLock(dbfd, SQLITE_LOCK_EXCLUSIVE)
    ){
      unlink(pHma->zName);
    }
    serverDecrHmaRefcount(pHma);
    serverLeaveMutex();
  }
  sqlite3_free(p->aLock);
  sqlite3_free(p);
}

static int serverRollbackClient(Server *p, int iBlock){
  int rc;

  sqlite3_log(SQLITE_NOTICE, "Rolling back failed client %d", iBlock);

  /* Roll back any journal file for client iBlock. */
  rc = sqlite3PagerRollbackJournal(p->pPager, iBlock);

  /* Clear any locks held by client iBlock from the HMA file.  */
  if( rc==SQLITE_OK ){
    int i;
    for(i=0; i<HMA_PAGELOCK_SLOTS; i++){
      u32 *pSlot = serverPageLockSlot(p, (Pgno)i);
      u32 v = *pSlot;
      while( 1 ){
        u32 n = v & ~(1 << iBlock);
        if( (v>>HMA_CLIENT_SLOTS)==iBlock+1 ){
          n = n & ((1<<HMA_CLIENT_SLOTS)-1);
        }
        if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break;
        v = *pSlot;
      }
    }
  }

  return rc;
}


/*
** Open the *-hma file and "connect" to the system.
*/
int sqlite3ServerConnect(
  Pager *pPager, 
  Server **ppOut, 
  int *piClient
){
  const char *zPath = sqlite3PagerFilename(pPager, 0);
  int rc = SQLITE_OK;
  Server *p;

  p = (Server*)sqlite3_malloc(sizeof(Server));
  if( p==0 ){
    rc = SQLITE_NOMEM;
  }else{
    memset(p, 0, sizeof(Server));
    p->iClient = -1;
    p->pPager = pPager;

    serverEnterMutex();
    rc = serverOpenHma(pPager, zPath, &p->pHma);

    /* File is now mapped. Find a free client slot. */
    if( rc==SQLITE_OK ){
      int i;
      Server **aClient = p->pHma->aClient;
      int fd = p->pHma->fd;
      for(i=0; i<HMA_CLIENT_SLOTS; i++){
        if( aClient[i]==0 ){
          int res = posixLock(fd, i+1, SERVER_WRITE_LOCK, 0);
          if( res==SQLITE_OK ){
            u32 *pSlot = serverClientSlot(p, i);
            if( *pSlot ){
              rc = serverRollbackClient(p, i);
            }
            posixLock(fd, i+1, (!rc ? SERVER_READ_LOCK : SERVER_NO_LOCK), 0);
            break;
          }
        }
      }

      if( rc==SQLITE_OK ){
        if( i>HMA_CLIENT_SLOTS ){
          rc = SQLITE_BUSY;
        }else{
          u32 *pSlot = serverClientSlot(p, i);
          *piClient = p->iClient = i;
          aClient[i] = p;
          *pSlot = 1;
        }
      }
    }

    serverLeaveMutex();
  }

  if( rc!=SQLITE_OK ){
    sqlite3ServerDisconnect(p, 0);
    p = 0;
  }
  *ppOut = p;
  return rc;
}

static int serverOvercomeLock(
  Server *p,                      /* Server connection */
  int bWrite,                     /* True for a write-lock */
  int bBlock,                     /* If true, block for this lock */
  u32 v,                          /* Value of blocking page locking slot */
  int *pbRetry                    /* OUT: True if caller should retry lock */
){
  int rc = SQLITE_OK;
  int iBlock = ((int)(v>>HMA_CLIENT_SLOTS))-1;

  if( iBlock<0 || iBlock==p->iClient ){
    for(iBlock=0; iBlock<HMA_CLIENT_SLOTS; iBlock++){
      if( iBlock!=p->iClient && (v & (1<<iBlock)) ) break;
    }
  }
  assert( iBlock<HMA_CLIENT_SLOTS );

  serverEnterMutex();

  if( 0==p->pHma->aClient[iBlock] ){
    rc = posixLock(p->pHma->fd, iBlock+1, SERVER_WRITE_LOCK, 0);
    if( rc==SQLITE_OK ){
      rc = serverRollbackClient(p, iBlock);

      /* Release the lock on slot iBlock */
      posixLock(p->pHma->fd, iBlock+1, SERVER_NO_LOCK, 0);
      if( rc==SQLITE_OK ){
        *pbRetry = 1;
      }
    }else if( rc==SQLITE_BUSY ){
      if( bBlock ){
        rc = posixLock(p->pHma->fd, iBlock+1, SERVER_READ_LOCK, 1);
        if( rc==SQLITE_OK ){
          posixLock(p->pHma->fd, iBlock+1, SERVER_NO_LOCK, 0);
          *pbRetry = 1;
        }
      }

      if( rc==SQLITE_BUSY ){
        rc = SQLITE_OK;
      }
    }
  }

  serverLeaveMutex();

  return rc;
}

/*
** Begin a transaction.
*/
int sqlite3ServerBegin(Server *p){
#if 1
  int rc = posixLock(p->pHma->fd, p->iClient+1, SERVER_WRITE_LOCK, 1);
  if( rc ) return rc;
#endif
  return sqlite3ServerLock(p, 1, 0, 1);
}

/*
** End a transaction (and release all locks).
*/
int sqlite3ServerEnd(Server *p){
  int i;
  for(i=0; i<p->nLock; i++){
    u32 *pSlot = serverPageLockSlot(p, p->aLock[i]);
    while( 1 ){
      u32 v = *pSlot;
      u32 n = v;
      if( (v>>HMA_CLIENT_SLOTS)==p->iClient+1 ){
        n = n & ((1 << HMA_CLIENT_SLOTS)-1);
      }
      n = n & ~(1 << p->iClient);
      if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break;
    }
    if( p->aLock[i]==0 ){
      struct timeval t2;
      i64 nUs;
      gettimeofday(&t2, 0);
      nUs = (i64)t2.tv_sec * 1000000 + t2.tv_usec - p->iUsWrite; 
      p->nUsWrite += nUs;
      if( (p->nUsWrite / 1000000)!=((p->nUsWrite + nUs)/1000000) ){
        sqlite3_log(SQLITE_WARNING, 
            "Cumulative WRITER time: %lldms\n", p->nUsWrite/1000
        );
      }
    }
  }
  p->nLock = 0;
#if 1
  return posixLock(p->pHma->fd, p->iClient+1, SERVER_READ_LOCK, 0);
#endif
  return SQLITE_OK;
}

/*
** Release all write-locks.
*/
int sqlite3ServerReleaseWriteLocks(Server *p){
  int rc = SQLITE_OK;
  return rc;
}

/*
** Return the client id of the client that currently holds the EXCLUSIVE
** or RESERVED lock according to page-locking slot value v. Or -1 if no
** client holds such a lock.
*/
int serverWriteLocker(u32 v){
  return ((int)(v >> HMA_CLIENT_SLOTS)) - 1;
}

/*
** Lock page pgno for reading (bWrite==0) or writing (bWrite==1).
**
** If parameter bBlock is non-zero, then make this a blocking lock if
** possible.
*/
int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock){
  int rc = SQLITE_OK;
  int bReserved = 0;
  u32 *pSlot = serverPageLockSlot(p, pgno);

  /* Grow the aLock[] array, if required */
  if( p->nLock==p->nAlloc ){
    int nNew = p->nAlloc ? p->nAlloc*2 : 128;
    u32 *aNew;
    aNew = (u32*)sqlite3_realloc(p->aLock, sizeof(u32)*nNew);
    if( aNew==0 ){
      rc = SQLITE_NOMEM_BKPT;
    }else{
      p->aLock = aNew;
      p->nAlloc = nNew;
    }
  }
  if( rc==SQLITE_OK ){
    u32 v = *pSlot;

    /* Check if the required lock is already held. If so, exit this function
    ** early. Otherwise, add an entry to the aLock[] array to record the fact
    ** that the lock may need to be released.  */
    if( bWrite ){
      int iLock = ((int)(v>>HMA_CLIENT_SLOTS)) - 1;
      if( iLock==p->iClient ) goto server_lock_out;
    }else{
      if( v & (1<<p->iClient) ) goto server_lock_out;
    }
    p->aLock[p->nLock++] = pgno;

    while( 1 ){
      u32 n;
      int w;
      u32 mask = (bWrite ? (((1<<HMA_CLIENT_SLOTS)-1) & ~(1<<p->iClient)) : 0);

      while( ((w = serverWriteLocker(v))>=0 && w!=p->iClient) || (v & mask) ){
        int bRetry = 0;

        if( w<0 && bWrite && bBlock ){
          /* Attempt a RESERVED lock before anything else */
          n = v | ((p->iClient+1) << HMA_CLIENT_SLOTS);
          assert( serverWriteLocker(n)==p->iClient );
          if( __sync_val_compare_and_swap(pSlot, v, n)!=v ){
            v = *pSlot;
            continue;
          }
          v = n;
          bReserved = 1;
        }

        rc = serverOvercomeLock(p, bWrite, bBlock, v, &bRetry);
        if( rc!=SQLITE_OK ) goto server_lock_out;
        if( bRetry==0 ){
          /* There is a conflicting lock. Cannot obtain this lock. */
          sqlite3_log(SQLITE_BUSY_DEADLOCK, "Conflict at page %d", (int)pgno);
          rc = SQLITE_BUSY_DEADLOCK;
          goto server_lock_out;
        }

        v = *pSlot;
      }

      n = v | (1 << p->iClient);
      if( bWrite ){
        n = n | ((p->iClient+1) << HMA_CLIENT_SLOTS);
      }
      if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break;
      v = *pSlot;
    }
  }

server_lock_out:
  if( rc!=SQLITE_OK && bReserved ){
    u32 n;
    u32 v;
    do{
      v = *pSlot;
      assert( serverWriteLocker(v)==p->iClient );
      n = v & ((1<<HMA_CLIENT_SLOTS)-1);
    }while( __sync_val_compare_and_swap(pSlot, v, n)!=v );
  }

  if( pgno==0 ){
    struct timeval t1;
    gettimeofday(&t1, 0);
    p->iUsWrite = ((i64)t1.tv_sec * 1000000) + (i64)t1.tv_usec;
  }
  assert( rc!=SQLITE_OK || sqlite3ServerHasLock(p, pgno, bWrite) );
  return rc;
}

int sqlite3ServerHasLock(Server *p, Pgno pgno, int bWrite){
  u32 v = *serverPageLockSlot(p, pgno);
  if( bWrite ){
    return (v>>HMA_CLIENT_SLOTS)==(p->iClient+1);
  }
  return (v & (1 << p->iClient))!=0;
}

#endif /* ifdef SQLITE_SERVER_EDITION */

Added src/server.h.









































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/*
** 2017 April 24
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

#ifdef SQLITE_SERVER_EDITION

#ifndef SQLITE_SERVER_H
#define SQLITE_SERVER_H


typedef struct Server Server;

int sqlite3ServerConnect(Pager *pPager, Server **ppOut, int *piClient);

void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd);

int sqlite3ServerBegin(Server *p);
int sqlite3ServerEnd(Server *p);
int sqlite3ServerReleaseWriteLocks(Server *p);

int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite, int bBlock);

int sqlite3ServerHasLock(Server *p, Pgno pgno, int bWrite);

#endif /* SQLITE_SERVER_H */

#endif /* SQLITE_SERVER_EDITION */

Changes to src/sqlite.h.in.

493
494
495
496
497
498
499

500
501
502
503
504
505
506
#define SQLITE_IOERR_GETTEMPPATH       (SQLITE_IOERR | (25<<8))
#define SQLITE_IOERR_CONVPATH          (SQLITE_IOERR | (26<<8))
#define SQLITE_IOERR_VNODE             (SQLITE_IOERR | (27<<8))
#define SQLITE_IOERR_AUTH              (SQLITE_IOERR | (28<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
#define SQLITE_BUSY_SNAPSHOT           (SQLITE_BUSY   |  (2<<8))

#define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
#define SQLITE_CANTOPEN_ISDIR          (SQLITE_CANTOPEN | (2<<8))
#define SQLITE_CANTOPEN_FULLPATH       (SQLITE_CANTOPEN | (3<<8))
#define SQLITE_CANTOPEN_CONVPATH       (SQLITE_CANTOPEN | (4<<8))
#define SQLITE_CORRUPT_VTAB            (SQLITE_CORRUPT | (1<<8))
#define SQLITE_READONLY_RECOVERY       (SQLITE_READONLY | (1<<8))
#define SQLITE_READONLY_CANTLOCK       (SQLITE_READONLY | (2<<8))







>







493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
#define SQLITE_IOERR_GETTEMPPATH       (SQLITE_IOERR | (25<<8))
#define SQLITE_IOERR_CONVPATH          (SQLITE_IOERR | (26<<8))
#define SQLITE_IOERR_VNODE             (SQLITE_IOERR | (27<<8))
#define SQLITE_IOERR_AUTH              (SQLITE_IOERR | (28<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
#define SQLITE_BUSY_SNAPSHOT           (SQLITE_BUSY   |  (2<<8))
#define SQLITE_BUSY_DEADLOCK           (SQLITE_BUSY   |  (3<<8))
#define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
#define SQLITE_CANTOPEN_ISDIR          (SQLITE_CANTOPEN | (2<<8))
#define SQLITE_CANTOPEN_FULLPATH       (SQLITE_CANTOPEN | (3<<8))
#define SQLITE_CANTOPEN_CONVPATH       (SQLITE_CANTOPEN | (4<<8))
#define SQLITE_CORRUPT_VTAB            (SQLITE_CORRUPT | (1<<8))
#define SQLITE_READONLY_RECOVERY       (SQLITE_READONLY | (1<<8))
#define SQLITE_READONLY_CANTLOCK       (SQLITE_READONLY | (2<<8))

Changes to src/sqliteInt.h.

1097
1098
1099
1100
1101
1102
1103

1104
1105
1106
1107
1108
1109
1110
*/
#include "btree.h"
#include "vdbe.h"
#include "pager.h"
#include "pcache.h"
#include "os.h"
#include "mutex.h"


/* The SQLITE_EXTRA_DURABLE compile-time option used to set the default
** synchronous setting to EXTRA.  It is no longer supported.
*/
#ifdef SQLITE_EXTRA_DURABLE
# warning Use SQLITE_DEFAULT_SYNCHRONOUS=3 instead of SQLITE_EXTRA_DURABLE
# define SQLITE_DEFAULT_SYNCHRONOUS 3







>







1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
*/
#include "btree.h"
#include "vdbe.h"
#include "pager.h"
#include "pcache.h"
#include "os.h"
#include "mutex.h"
#include "server.h"

/* The SQLITE_EXTRA_DURABLE compile-time option used to set the default
** synchronous setting to EXTRA.  It is no longer supported.
*/
#ifdef SQLITE_EXTRA_DURABLE
# warning Use SQLITE_DEFAULT_SYNCHRONOUS=3 instead of SQLITE_EXTRA_DURABLE
# define SQLITE_DEFAULT_SYNCHRONOUS 3

Changes to src/vdbeaux.c.

2635
2636
2637
2638
2639
2640
2641
2642

2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663

    /* Lock all btrees used by the statement */
    sqlite3VdbeEnter(p);

    /* Check for one of the special errors */
    mrc = p->rc & 0xff;
    isSpecialError = mrc==SQLITE_NOMEM || mrc==SQLITE_IOERR
                     || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL;

    if( isSpecialError ){
      /* If the query was read-only and the error code is SQLITE_INTERRUPT, 
      ** no rollback is necessary. Otherwise, at least a savepoint 
      ** transaction must be rolled back to restore the database to a 
      ** consistent state.
      **
      ** Even if the statement is read-only, it is important to perform
      ** a statement or transaction rollback operation. If the error 
      ** occurred while writing to the journal, sub-journal or database
      ** file as part of an effort to free up cache space (see function
      ** pagerStress() in pager.c), the rollback is required to restore 
      ** the pager to a consistent state.
      */
      if( !p->readOnly || mrc!=SQLITE_INTERRUPT ){
        if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){
          eStatementOp = SAVEPOINT_ROLLBACK;
        }else{
          /* We are forced to roll back the active transaction. Before doing
          ** so, abort any other statements this handle currently has active.
          */
          sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);







|
>

|
|
|
|








|







2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664

    /* Lock all btrees used by the statement */
    sqlite3VdbeEnter(p);

    /* Check for one of the special errors */
    mrc = p->rc & 0xff;
    isSpecialError = mrc==SQLITE_NOMEM || mrc==SQLITE_IOERR
                     || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL
                     || p->rc==SQLITE_BUSY_DEADLOCK;
    if( isSpecialError ){
      /* If the query was read-only and the error code is SQLITE_INTERRUPT
      ** or SQLITE_BUSY_SERVER, no rollback is necessary. Otherwise, at 
      ** least a savepoint transaction must be rolled back to restore the
      ** database to a consistent state.
      **
      ** Even if the statement is read-only, it is important to perform
      ** a statement or transaction rollback operation. If the error 
      ** occurred while writing to the journal, sub-journal or database
      ** file as part of an effort to free up cache space (see function
      ** pagerStress() in pager.c), the rollback is required to restore 
      ** the pager to a consistent state.
      */
      if( !p->readOnly || (mrc!=SQLITE_INTERRUPT && mrc!=SQLITE_BUSY) ){
        if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){
          eStatementOp = SAVEPOINT_ROLLBACK;
        }else{
          /* We are forced to roll back the active transaction. Before doing
          ** so, abort any other statements this handle currently has active.
          */
          sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);

Changes to src/wal.c.

450
451
452
453
454
455
456



457






458
459
460
461
462
463
464
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
#ifdef SQLITE_ENABLE_SNAPSHOT
  WalIndexHdr *pSnapshot;    /* Start transaction here if not NULL */
#endif



};







/*
** Candidate values for Wal.exclusiveMode.
*/
#define WAL_NORMAL_MODE     0
#define WAL_EXCLUSIVE_MODE  1     
#define WAL_HEAPMEMORY_MODE 2







>
>
>

>
>
>
>
>
>







450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
#ifdef SQLITE_ENABLE_SNAPSHOT
  WalIndexHdr *pSnapshot;    /* Start transaction here if not NULL */
#endif
#ifdef SQLITE_SERVER_EDITION
  Server *pServer;
#endif
};

#ifdef SQLITE_SERVER_EDITION
# define walIsServer(p) ((p)->pServer!=0)
#else
# define walIsServer(p) 0
#endif

/*
** Candidate values for Wal.exclusiveMode.
*/
#define WAL_NORMAL_MODE     0
#define WAL_EXCLUSIVE_MODE  1     
#define WAL_HEAPMEMORY_MODE 2
1257
1258
1259
1260
1261
1262
1263








1264
1265
1266
1267
1268
1269
1270
      pWal->apWiData[i] = 0;
    }
  }else{
    sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
  }
}









/* 
** Open a connection to the WAL file zWalName. The database file must 
** already be opened on connection pDbFd. The buffer that zWalName points
** to must remain valid for the lifetime of the returned Wal* handle.
**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other







>
>
>
>
>
>
>
>







1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
      pWal->apWiData[i] = 0;
    }
  }else{
    sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
  }
}

#ifdef SQLITE_SERVER_EDITION
int sqlite3WalServer(Wal *pWal, Server *pServer){
  assert( pWal->pServer==0 );
  pWal->pServer = pServer;
  return SQLITE_OK;
}
#endif

/* 
** Open a connection to the WAL file zWalName. The database file must 
** already be opened on connection pDbFd. The buffer that zWalName points
** to must remain valid for the lifetime of the returned Wal* handle.
**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other
1888
1889
1890
1891
1892
1893
1894



1895
1896
1897
1898
1899
1900
1901
          ** checkpointed and behave accordingly. This seems unsafe though,
          ** as it would leave the system in a state where the contents of
          ** the wal-index header do not match the contents of the 
          ** file-system. To avoid this, update the wal-index header to
          ** indicate that the log file contains zero valid frames.  */
          walRestartHdr(pWal, salt1);
          rc = sqlite3OsTruncate(pWal->pWalFd, 0);



        }
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
  }

 walcheckpoint_out:







>
>
>







1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
          ** checkpointed and behave accordingly. This seems unsafe though,
          ** as it would leave the system in a state where the contents of
          ** the wal-index header do not match the contents of the 
          ** file-system. To avoid this, update the wal-index header to
          ** indicate that the log file contains zero valid frames.  */
          walRestartHdr(pWal, salt1);
          rc = sqlite3OsTruncate(pWal->pWalFd, 0);
        }else if( walIsServer(pWal) ){
          assert( eMode==SQLITE_CHECKPOINT_RESTART );
          walRestartHdr(pWal, salt1);
        }
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
  }

 walcheckpoint_out:
2046
2047
2048
2049
2050
2051
2052








2053
2054
2055
2056
2057
2058
2059
    testcase( pWal->szPage<=32768 );
    testcase( pWal->szPage>=65536 );
  }

  /* The header was successfully read. Return zero. */
  return 0;
}









/*
** Read the wal-index header from the wal-index and into pWal->hdr.
** If the wal-header appears to be corrupt, try to reconstruct the
** wal-index from the WAL before returning.
**
** Set *pChanged to 1 if the wal-index header value in pWal->hdr is







>
>
>
>
>
>
>
>







2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
    testcase( pWal->szPage<=32768 );
    testcase( pWal->szPage>=65536 );
  }

  /* The header was successfully read. Return zero. */
  return 0;
}

static int walIndexWriteLock(Wal *pWal){
  if( walIsServer(pWal) ){
    return sqlite3ServerLock(pWal->pServer, 0, 1, 0);
  }else{
    return walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
  }
}

/*
** Read the wal-index header from the wal-index and into pWal->hdr.
** If the wal-header appears to be corrupt, try to reconstruct the
** wal-index from the WAL before returning.
**
** Set *pChanged to 1 if the wal-index header value in pWal->hdr is
2087
2088
2089
2090
2091
2092
2093

2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111

2112

2113
2114
2115
2116
2117
2118
2119

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->readOnly & WAL_SHM_RDONLY ){

      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
        badHdr = walIndexTryHdr(pWal, pChanged);
        if( badHdr ){
          /* If the wal-index header is still malformed even while holding
          ** a WRITE lock, it can only mean that the header is corrupted and
          ** needs to be reconstructed.  So run recovery to do exactly that.
          */
          rc = walIndexRecover(pWal);
          *pChanged = 1;
        }
      }
      pWal->writeLock = 0;

      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);

    }
  }

  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */







>




|













>
|
>







2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->readOnly & WAL_SHM_RDONLY ){
      assert( walIsServer(pWal)==0 );
      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walIndexWriteLock(pWal)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
        badHdr = walIndexTryHdr(pWal, pChanged);
        if( badHdr ){
          /* If the wal-index header is still malformed even while holding
          ** a WRITE lock, it can only mean that the header is corrupted and
          ** needs to be reconstructed.  So run recovery to do exactly that.
          */
          rc = walIndexRecover(pWal);
          *pChanged = 1;
        }
      }
      pWal->writeLock = 0;
      if( walIsServer(pWal)==0 ){
        walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
      }
    }
  }

  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */
2245
2246
2247
2248
2249
2250
2251



2252
2253
2254
2255
2256
2257
2258
      }
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }




  pInfo = walCkptInfo(pWal);
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame 
#ifdef SQLITE_ENABLE_SNAPSHOT
   && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0
     || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr)))
#endif
  ){







>
>
>







2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
      }
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  assert( rc==SQLITE_OK );
  if( walIsServer(pWal) ) return SQLITE_OK;

  pInfo = walCkptInfo(pWal);
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame 
#ifdef SQLITE_ENABLE_SNAPSHOT
   && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0
     || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr)))
#endif
  ){
2584
2585
2586
2587
2588
2589
2590
2591








2592
2593
2594
2595
2596
2597
2598
){
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */
  int iMinHash;

  /* This routine is only be called from within a read transaction. */
  assert( pWal->readLock>=0 || pWal->lockError );









  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */







|
>
>
>
>
>
>
>
>







2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
){
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */
  int iMinHash;

  /* This routine is only be called from within a read transaction. */
  assert( walIsServer(pWal) || pWal->readLock>=0 || pWal->lockError );

  assert( walIsServer(pWal)==0 || pWal->writeLock==0 
       || sqlite3ServerHasLock(pWal->pServer, 0, 1) 
  );
  if( walIsServer(pWal) && pWal->writeLock==0 ){
    /* A server mode connection must read from the most recent snapshot. */
    iLast = walIndexHdr(pWal)->mxFrame;
  }

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
}

/* 
** Return the size of the database in pages (or zero, if unknown).
*/
Pgno sqlite3WalDbsize(Wal *pWal){
  if( pWal && ALWAYS(pWal->readLock>=0) ){
    return pWal->hdr.nPage;
  }
  return 0;
}


/* 







|







2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
}

/* 
** Return the size of the database in pages (or zero, if unknown).
*/
Pgno sqlite3WalDbsize(Wal *pWal){
  if( pWal && (walIsServer(pWal) || ALWAYS(pWal->readLock>=0)) ){
    return pWal->hdr.nPage;
  }
  return 0;
}


/* 
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733





2734
2735
2736
2737
2738
2739
2740
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );
  assert( pWal->writeLock==0 && pWal->iReCksum==0 );

  if( pWal->readOnly ){
    return SQLITE_READONLY;
  }






  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
  */
  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
  if( rc ){
    return rc;







|





>
>
>
>
>







2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( walIsServer(pWal) || pWal->readLock>=0 );
  assert( pWal->writeLock==0 && pWal->iReCksum==0 );

  if( pWal->readOnly ){
    return SQLITE_READONLY;
  }

  /* For a server connection, do nothing at this point. */
  if( walIsServer(pWal) ){
    return SQLITE_OK;
  }

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
  */
  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
  if( rc ){
    return rc;
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( ALWAYS(pWal->writeLock) ){
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    /* Restore the clients cache of the wal-index header to the state it
    ** was in before the client began writing to the database. 
    */
    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));







|







2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( pWal->writeLock ){
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    /* Restore the clients cache of the wal-index header to the state it
    ** was in before the client began writing to the database. 
    */
    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
2868
2869
2870
2871
2872
2873
2874

2875
2876
2877
2878
2879

2880
2881
2882
2883
2884
2885
2886
** unchanged.
**
** SQLITE_OK is returned if no error is encountered (regardless of whether
** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
** if an error occurs.
*/
static int walRestartLog(Wal *pWal){

  int rc = SQLITE_OK;
  int cnt;

  if( pWal->readLock==0 ){
    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);

    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
    if( pInfo->nBackfill>0 ){
      u32 salt1;
      sqlite3_randomness(4, &salt1);
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        /* If all readers are using WAL_READ_LOCK(0) (in other words if no







>



|
|
>







2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
** unchanged.
**
** SQLITE_OK is returned if no error is encountered (regardless of whether
** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
** if an error occurs.
*/
static int walRestartLog(Wal *pWal){
  volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  int rc = SQLITE_OK;
  int cnt;

  if( pWal->readLock==0 
   || (walIsServer(pWal) && pInfo->nBackfill==pWal->hdr.mxFrame)
  ){
    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
    if( pInfo->nBackfill>0 ){
      u32 salt1;
      sqlite3_randomness(4, &salt1);
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
2894
2895
2896
2897
2898
2899
2900

2901
2902
2903
2904
2905
2906
2907
        ** to handle if this transaction is rolled back.  */
        walRestartHdr(pWal, salt1);
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }else if( rc!=SQLITE_BUSY ){
        return rc;
      }
    }

    walUnlockShared(pWal, WAL_READ_LOCK(0));
    pWal->readLock = -1;
    cnt = 0;
    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
    }while( rc==WAL_RETRY );







>







2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
        ** to handle if this transaction is rolled back.  */
        walRestartHdr(pWal, salt1);
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }else if( rc!=SQLITE_BUSY ){
        return rc;
      }
    }
    if( walIsServer(pWal) ) return rc;
    walUnlockShared(pWal, WAL_READ_LOCK(0));
    pWal->readLock = -1;
    cnt = 0;
    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
    }while( rc==WAL_RETRY );
3054
3055
3056
3057
3058
3059
3060
3061
















3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
  int szFrame;                    /* The size of a single frame */
  i64 iOffset;                    /* Next byte to write in WAL file */
  WalWriter w;                    /* The writer */
  u32 iFirst = 0;                 /* First frame that may be overwritten */
  WalIndexHdr *pLive;             /* Pointer to shared header */

  assert( pList );
  assert( pWal->writeLock );

















  /* If this frame set completes a transaction, then nTruncate>0.  If
  ** nTruncate==0 then this frame set does not complete the transaction. */
  assert( (isCommit!=0)==(nTruncate!=0) );

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  }
#endif

  pLive = (WalIndexHdr*)walIndexHdr(pWal);
  if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
    iFirst = pLive->mxFrame+1;
  }

  /* See if it is possible to write these frames into the start of the
  ** log file, instead of appending to it at pWal->hdr.mxFrame.
  */
  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
    return rc;
  }

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















|







3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
  int szFrame;                    /* The size of a single frame */
  i64 iOffset;                    /* Next byte to write in WAL file */
  WalWriter w;                    /* The writer */
  u32 iFirst = 0;                 /* First frame that may be overwritten */
  WalIndexHdr *pLive;             /* Pointer to shared header */

  assert( pList );
  assert( pWal->writeLock || walIsServer(pWal) );
  if( pWal->writeLock==0 ){
    int bDummy = 0;
#if 0
    rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
#endif
    rc = sqlite3ServerLock(pWal->pServer, 0, 1, 1);
    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;
      rc = walIndexTryHdr(pWal, &bDummy);
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }
    assert( sqlite3ServerHasLock(pWal->pServer, 0, 1) );
  }
  assert( walIsServer(pWal)==0 || sqlite3ServerHasLock(pWal->pServer, 0, 1) );

  /* If this frame set completes a transaction, then nTruncate>0.  If
  ** nTruncate==0 then this frame set does not complete the transaction. */
  assert( (isCommit!=0)==(nTruncate!=0) );

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  }
#endif

  pLive = (WalIndexHdr*)walIndexHdr(pWal);
  if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
    iFirst = pLive->mxFrame+1;
  }

  /* See if it is possible to write these frames into the start of the
  ** log file, instead of appending to it at pWal->hdr.mxFrame.
  */
  if( walIsServer(pWal)==0 && SQLITE_OK!=(rc = walRestartLog(pWal)) ){
    return rc;
  }

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
3327
3328
3329
3330
3331
3332
3333











3334

3335
3336
3337
3338
3339
3340
3341
  **
  ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained
  ** immediately, and a busy-handler is configured, it is invoked and the
  ** writer lock retried until either the busy-handler returns 0 or the
  ** lock is successfully obtained.
  */
  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){











    rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);

    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;
    }else if( rc==SQLITE_BUSY ){
      eMode2 = SQLITE_CHECKPOINT_PASSIVE;
      xBusy2 = 0;
      rc = SQLITE_OK;
    }







>
>
>
>
>
>
>
>
>
>
>
|
>







3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
  **
  ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained
  ** immediately, and a busy-handler is configured, it is invoked and the
  ** writer lock retried until either the busy-handler returns 0 or the
  ** lock is successfully obtained.
  */
  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
    if( walIsServer(pWal) ){
      rc = sqlite3ServerBegin(pWal->pServer);
      if( rc!=SQLITE_OK ) goto ckpt_out;
      if( eMode>=SQLITE_CHECKPOINT_RESTART ){
        /* Exclusive lock on page 1. This is exclusive access to the db. */
        rc = sqlite3ServerLock(pWal->pServer, 1, 1, 1);
      }else{
        /* Take the server write-lock ("page" 0) */
        rc = sqlite3ServerLock(pWal->pServer, 0, 1, 1);
      }
    }else{
      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);
    }
    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;
    }else if( rc==SQLITE_BUSY ){
      eMode2 = SQLITE_CHECKPOINT_PASSIVE;
      xBusy2 = 0;
      rc = SQLITE_OK;
    }
3372
3373
3374
3375
3376
3377
3378

3379
3380
3381
3382

3383
3384
3385
3386
3387
3388
3389
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */

  sqlite3WalEndWriteTransaction(pWal);
  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  pWal->ckptLock = 0;
  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));

  return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc);
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.







>




>







3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
 ckpt_out:
  sqlite3WalEndWriteTransaction(pWal);
  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  pWal->ckptLock = 0;
  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
  if( walIsServer(pWal) ) sqlite3ServerEnd(pWal->pServer);
  return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc);
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.

Changes to src/wal.h.

139
140
141
142
143
144
145




146
147
148
** stored in each frame (i.e. the db page-size when the WAL was created).
*/
int sqlite3WalFramesize(Wal *pWal);
#endif

/* Return the sqlite3_file object for the WAL file */
sqlite3_file *sqlite3WalFile(Wal *pWal);





#endif /* ifndef SQLITE_OMIT_WAL */
#endif /* SQLITE_WAL_H */







>
>
>
>



139
140
141
142
143
144
145
146
147
148
149
150
151
152
** stored in each frame (i.e. the db page-size when the WAL was created).
*/
int sqlite3WalFramesize(Wal *pWal);
#endif

/* Return the sqlite3_file object for the WAL file */
sqlite3_file *sqlite3WalFile(Wal *pWal);

#ifdef SQLITE_SERVER_EDITION
int sqlite3WalServer(Wal *pWal, Server *pServer);
#endif

#endif /* ifndef SQLITE_OMIT_WAL */
#endif /* SQLITE_WAL_H */

Changes to test/permutations.test.

268
269
270
271
272
273
274









275
276
277
278
279
280
281
  fts3corrupt3.test
  fts3misc.test
}

test_suite "fts5" -prefix "" -description {
  All FTS5 tests.
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]










test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt* *fault* *big* *fts5aj*







>
>
>
>
>
>
>
>
>







268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  fts3corrupt3.test
  fts3misc.test
}

test_suite "fts5" -prefix "" -description {
  All FTS5 tests.
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]

test_suite "server" -prefix "" -description {
  All server-edition tests.
} -files [
  test_set \
      select1.test \
      [glob -nocomplain $::testdir/server*.test] \
      -exclude *server1.test
]

test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt* *fault* *big* *fts5aj*

Added test/server2.test.











































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# 2017 April 25
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the server mode of SQLite.
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix server2

#-------------------------------------------------------------------------
# Check that the *-hma file is deleted correctly.
#
do_execsql_test 1.0 {
  CREATE TABLE t1(a, b);
} {}
do_test 1.1 {
  file exists test.db-hma
} {1}
do_test 1.2 {
  db close
  file exists test.db-hma
} {0}
do_test 1.3 {
  sqlite3 db test.db
  db eval { CREATE TABLE t2(a, b) }
  sqlite3 db2 test.db
  db2 eval { CREATE TABLE t3(a, b) }
  file exists test.db-hma
} {1}
do_test 1.4 {
  db2 close
  file exists test.db-hma
} {1}
integrity_check 1.5
do_test 1.6 {
  db close
  file exists test.db-hma
} {0}


#-------------------------------------------------------------------------
#
reset_db
sqlite3 db2 test.db

do_execsql_test 2.0 {
  CREATE TABLE t1(a, b);
  CREATE TABLE t2(c, d);
}

# Two concurrent transactions committed.
#
do_test 2.1 {
  db eval {
    BEGIN;
      INSERT INTO t1 VALUES(1, 2);
  }
  db2 eval {
    BEGIN;
      INSERT INTO t2 VALUES(3, 4);
  }
} {}
do_test 2.2 {
  lsort [glob test.db*]
} {test.db test.db-hma test.db-journal0 test.db-journal1}
do_test 2.3.1 { db eval COMMIT  } {}
do_test 2.3.2 { db2 eval COMMIT } {}
do_execsql_test 2.4 {SELECT * FROM t1, t2} {1 2 3 4}
do_test 2.5 {
  lsort [glob test.db*]
} {test.db test.db-hma test.db-journal0 test.db-journal1}

do_test 2.6 {
  execsql {BEGIN}
  execsql {INSERT INTO t1 VALUES(5, 6)}

  execsql {BEGIN} db2
  catchsql {INSERT INTO t1 VALUES(7, 8)} db2
} {1 {database is locked}}
do_test 2.7 {
  # Transaction is automatically rolled back in this case.
  sqlite3_get_autocommit db2
} {1}
do_test 2.8 {
  execsql COMMIT
  execsql { SELECT * FROM t1 } db2
} {1 2 5 6}
db2 close

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
  CREATE TABLE t1(a, b);
}

do_test 3.1 {
  glob test.db*
} {test.db-journal0 test.db test.db-hma}

do_test 3.2 {
  db close
  glob test.db*
} {test.db}

finish_test

Added test/server3.test.



























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 2017 April 25
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the server mode of SQLite.
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/lock_common.tcl
set testprefix server3

db close

do_multiclient_test tn {
  do_test $tn.1 {
    sql1 { CREATE TABLE t1(a, b) }
    sql2 { CREATE TABLE t2(a, b) }
  } {}

  do_test $tn.2 {
    sql1 {
      INSERT INTO t2 VALUES(1, 2);
      BEGIN;
        INSERT INTO t1 VALUES(1, 2);
    }
  } {}

  do_test $tn.3 { csql2 { SELECT * FROM t1 } } {1 {database is locked}}
  do_test $tn.4 { csql2 { SELECT * FROM t1 } } {1 {database is locked}}
  do_test $tn.5 {  sql2 { SELECT * FROM t2 } } {1 2}


}

finish_test

Added test/servercrash.test.









































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# 2017 April 27
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix servercrash

ifcapable !crashtest {
  finish_test
  return
}
do_not_use_codec

do_execsql_test 1.0 {
  PRAGMA page_siBlockze = 4096;
  PRAGMA auto_vacuum = OFF;
  CREATE TABLE t1(a, b);
  CREATE TABLE t2(c, d);

  INSERT INTO t1 VALUES(1, 2), (3, 4);
  INSERT INTO t2 VALUES(1, 2), (3, 4);
}

for {set i 0} {$i < 10} {incr i} {
  do_test 1.$i.1 {
    crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) }
  } {1 {child process exited abnormally}}

  do_execsql_test 1.$i.2 {
    SELECT * FROM t1
  } {1 2 3 4}
}

for {set i 0} {$i < 10} {incr i} {
  do_test 2.$i.1 {
    crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) }
  } {1 {child process exited abnormally}}

  do_test 2.$i.2 {
    sqlite3 dbX test.db
    execsql { SELECT * FROM t1 } dbX
  } {1 2 3 4}
  dbX close
}

db close
for {set i 0} {$i < 10} {incr i} {
  do_test 3.$i.1 {
    crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) }
  } {1 {child process exited abnormally}}

  sqlite3 db test.db
  do_execsql_test 3.$i.2 { SELECT * FROM t1 } {1 2 3 4}
  db close
}

finish_test

Added test/serverwal.test.





















































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# 2017 April 25
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the server mode of SQLite.
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix serverwal

# Check files are created and deleted as expected.
#
do_execsql_test 1.0 {
  PRAGMA journal_mode = wal;
} {wal}
do_execsql_test 1.1 {
  CREATE TABLE t1(a, b);
}
do_execsql_test 1.2 {
  SELECT * FROM t1;
} {}
do_test 1.3 {
  lsort [glob test.db*]
} {test.db test.db-hma test.db-shm test.db-wal}
do_test 1.4 {
  db close
  glob test.db*
} {test.db}

#-------------------------------------------------------------------------
# Two concurrent transactions.
#
do_test 2.0 {
  sqlite3 db  test.db
  sqlite3 db2 test.db
  db eval {
    CREATE TABLE t2(a, b);
  }
} {}
do_test 2.1 {
  execsql {
    BEGIN;
      INSERT INTO t1 VALUES(1, 2);
  } db
  execsql {
    BEGIN;
      INSERT INTO t2 VALUES(1, 2);
  } db2
} {}
do_test 2.2 {
  execsql COMMIT db
  execsql COMMIT db2
} {}
db close
db2 close

#-------------------------------------------------------------------------
# That the wal file can be wrapped around.
#
reset_db
do_execsql_test 3.0 {
  PRAGMA journal_mode = wal;
  CREATE TABLE ttt(a, b);
  INSERT INTO ttt VALUES(1, 2);
  INSERT INTO ttt VALUES(3, 4);
  INSERT INTO ttt VALUES(5, 6);
  INSERT INTO ttt VALUES(7, 8);
  INSERT INTO ttt VALUES(9, 10);
} {wal}

do_test 3.1 {
  set N [file size test.db-wal]
  execsql {
    PRAGMA wal_checkpoint = restart;
    INSERT INTO ttt VALUES(11, 12);
    INSERT INTO ttt VALUES(13, 14);
  }
  expr {$N == [file size test.db-wal]}
} {1}

#-------------------------------------------------------------------------
# That ROLLBACK appears to work.
#
reset_db
do_execsql_test 4.0 {
  PRAGMA cache_size = 10;
  CREATE TABLE ttt(a, b);
  CREATE INDEX yyy ON ttt(b, a);
  PRAGMA journal_mode = wal;
  WITH s(i) AS (
    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100
  )
  INSERT INTO ttt SELECT randomblob(100), randomblob(100) FROM s;
} {wal}

do_execsql_test 4.1 {
  PRAGMA integrity_check;
  BEGIN;
    UPDATE ttt SET b=a;
  ROLLBACK;
  PRAGMA integrity_check;
} {ok ok}

reset_db
do_execsql_test 5.1 {
  CREATE TABLE xyz(a);
  PRAGMA journal_mode = wal;
  INSERT INTO xyz VALUES(1);
  INSERT INTO xyz VALUES(2);
  INSERT INTO xyz VALUES(3);
} {wal}

breakpoint

do_test 5.2 {
  sqlite3 db2 test.db
  execsql { SELECT * FROM xyz } db2
} {1 2 3}

do_execsql_test 5.3 {
  PRAGMA wal_checkpoint = restart 
} {0 0 0}

do_test 5.4 {
  execsql { SELECT * FROM xyz } db2
} {1 2 3}

finish_test

Changes to test/tester.tcl.

582
583
584
585
586
587
588




589
590
591
592
593
594
595
# Create a test database
#
proc reset_db {} {
  catch {db close}
  forcedelete test.db
  forcedelete test.db-journal
  forcedelete test.db-wal




  sqlite3 db ./test.db
  set ::DB [sqlite3_connection_pointer db]
  if {[info exists ::SETUP_SQL]} {
    db eval $::SETUP_SQL
  }
}
reset_db







>
>
>
>







582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
# Create a test database
#
proc reset_db {} {
  catch {db close}
  forcedelete test.db
  forcedelete test.db-journal
  forcedelete test.db-wal
  for {set i 0} {$i < 16} {incr i} {
    forcedelete test.db-journal$i
  }

  sqlite3 db ./test.db
  set ::DB [sqlite3_connection_pointer db]
  if {[info exists ::SETUP_SQL]} {
    db eval $::SETUP_SQL
  }
}
reset_db

Changes to tool/mksqlite3c.tcl.

110
111
112
113
114
115
116

117
118
119
120
121
122
123
   os_win.h
   os.h
   pager.h
   parse.h
   pcache.h
   pragma.h
   rtree.h

   sqlite3session.h
   sqlite3.h
   sqlite3ext.h
   sqlite3rbu.h
   sqliteicu.h
   sqliteInt.h
   sqliteLimit.h







>







110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
   os_win.h
   os.h
   pager.h
   parse.h
   pcache.h
   pragma.h
   rtree.h
   server.h
   sqlite3session.h
   sqlite3.h
   sqlite3ext.h
   sqlite3rbu.h
   sqliteicu.h
   sqliteInt.h
   sqliteLimit.h
315
316
317
318
319
320
321

322
323
324
325
326
327
328

   bitvec.c
   pcache.c
   pcache1.c
   rowset.c
   pager.c
   wal.c


   btmutex.c
   btree.c
   backup.c

   vdbemem.c
   vdbeaux.c







>







316
317
318
319
320
321
322
323
324
325
326
327
328
329
330

   bitvec.c
   pcache.c
   pcache1.c
   rowset.c
   pager.c
   wal.c
   server.c

   btmutex.c
   btree.c
   backup.c

   vdbemem.c
   vdbeaux.c

Added tool/tserver.c.





















































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
/*
** 2017 June 7
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
**
** Simple multi-threaded server used for informal testing of concurrency
** between connections in different threads. Listens for tcp/ip connections
** on port 9999 of the 127.0.0.1 interface only. To build:
**
**   gcc -g $(TOP)/tool/tserver.c sqlite3.o -lpthread -o tserver
**
** To run using "x.db" as the db file:
**
**   ./tserver x.db
**
** To connect, open a client socket on port 9999 and start sending commands.
** Commands are either SQL - which must be terminated by a semi-colon, or
** dot-commands, which must be terminated by a newline. If an SQL statement
** is seen, it is prepared and added to an internal list.
**
** Dot-commands are:
**
**   .list                    Display all SQL statements in the list.
**   .quit                    Disconnect.
**   .run                     Run all SQL statements in the list.
**   .repeats N               Configure the number of repeats per ".run".
**   .seconds N               Configure the number of seconds to ".run" for.
**
** Example input:
**
**   BEGIN;
**     INSERT INTO t1 VALUES(randomblob(10), randomblob(100));
**     INSERT INTO t1 VALUES(randomblob(10), randomblob(100));
**     INSERT INTO t1 VALUES(randomblob(10), randomblob(100));
**   COMMIT;
**   .repeats 100000
**   .run
**
*/
#define TSERVER_PORTNUMBER 9999

#include <arpa/inet.h>
#include <assert.h>
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <unistd.h>

#include "sqlite3.h"

/* Database used by this server */
static char *zDatabaseName = 0;

typedef struct ClientCtx ClientCtx;
struct ClientCtx {
  sqlite3 *db;                    /* Database handle for this client */
  int fd;                         /* Client fd */
  int nRepeat;                    /* Number of times to repeat SQL */
  int nSecond;                    /* Number of seconds to run for */
  sqlite3_stmt **apPrepare;       /* Array of prepared statements */
  int nPrepare;                   /* Valid size of apPrepare[] */
  int nAlloc;                     /* Allocated size of apPrepare[] */
};

static int is_eol(int i){
  return (i=='\n' || i=='\r');
}
static int is_whitespace(int i){
  return (i==' ' || i=='\t' || is_eol(i));
}

static void trim_string(const char **pzStr, int *pnStr){
  const char *zStr = *pzStr;
  int nStr = *pnStr;

  while( nStr>0 && is_whitespace(zStr[0]) ){
    zStr++;
    nStr--;
  }
  while( nStr>0 && is_whitespace(zStr[nStr-1]) ){
    nStr--;
  }

  *pzStr = zStr;
  *pnStr = nStr;
}

static int send_message(ClientCtx *p, const char *zFmt, ...){
  char *zMsg;
  va_list ap;                             /* Vararg list */
  va_start(ap, zFmt);
  int res = -1;

  zMsg = sqlite3_vmprintf(zFmt, ap);
  if( zMsg ){
    res = write(p->fd, zMsg, strlen(zMsg));
  }
  sqlite3_free(zMsg);
  va_end(ap);

  return (res<0);
}

static int handle_some_sql(ClientCtx *p, const char *zSql, int nSql){
  const char *zTail = zSql;
  int nTail = nSql;
  int rc = SQLITE_OK;

  while( rc==SQLITE_OK ){
    if( p->nPrepare>=p->nAlloc ){
      int nByte = (p->nPrepare+32) * sizeof(sqlite3_stmt*);
      sqlite3_stmt **apNew = sqlite3_realloc(p->apPrepare, nByte);
      if( apNew ){
        p->apPrepare = apNew;
        p->nAlloc = p->nPrepare+32;
      }else{
        rc = SQLITE_NOMEM;
        break;
      }
    }
    rc = sqlite3_prepare_v2(
        p->db, zTail, nTail, &p->apPrepare[p->nPrepare], &zTail
    );
    if( rc!=SQLITE_OK ){
      send_message(p, "error - %s\n", sqlite3_errmsg(p->db));
      rc = 1;
      break;
    }
    if( p->apPrepare[p->nPrepare]==0 ){
      break;
    }
    p->nPrepare++;
    nTail = nSql - (zTail-zSql);
    rc = send_message(p, "ok (%d SQL statements)\n", p->nPrepare);
  }

  return rc;
}

static sqlite3_int64 get_timer(void){
  struct timeval t;
  gettimeofday(&t, 0);
  return ((sqlite3_int64)t.tv_usec / 1000) + ((sqlite3_int64)t.tv_sec * 1000);
}

static void clear_sql(ClientCtx *p){
  int j;
  for(j=0; j<p->nPrepare; j++){
    sqlite3_finalize(p->apPrepare[j]);
  }
  p->nPrepare = 0;
}

static int handle_dot_command(ClientCtx *p, const char *zCmd, int nCmd){
  assert( zCmd[0]=='.' );
  int n;
  int rc = 0;
  const char *z = &zCmd[1];
  const char *zArg;
  int nArg;

  for(n=0; n<(nCmd-1); n++){
    if( is_whitespace(z[n]) ) break;
  }

  zArg = &z[n];
  nArg = nCmd-n;
  trim_string(&zArg, &nArg);

  if( n>=1 && n<=4 && 0==strncmp(z, "list", n) ){
    int i;
    for(i=0; rc==0 && i<p->nPrepare; i++){
      const char *zSql = sqlite3_sql(p->apPrepare[i]);
      int nSql = strlen(zSql);
      trim_string(&zSql, &nSql);
      rc = send_message(p, "%d: %.*s\n", i, nSql, zSql);
    }
  }

  else if( n>=1 && n<=4 && 0==strncmp(z, "quit", n) ){
    rc = 1;
  }

  else if( n>=2 && n<=7 && 0==strncmp(z, "repeats", n) ){
    if( nArg ){
      p->nRepeat = strtol(zArg, 0, 0);
      if( p->nRepeat>0 ) p->nSecond = 0;
    }
    rc = send_message(p, "ok (repeat=%d)\n", p->nRepeat);
  }

  else if( n>=2 && n<=3 && 0==strncmp(z, "run", n) ){
    int i, j;
    int nBusy = 0;
    sqlite3_int64 t0 = get_timer();
    sqlite3_int64 t1 = t0;
    int nT1 = 0;
    int nTBusy1 = 0;

    for(j=0; (p->nRepeat<=0 || j<p->nRepeat) && rc==SQLITE_OK; j++){
      sqlite3_int64 t2;

      for(i=0; i<p->nPrepare && rc==SQLITE_OK; i++){
        sqlite3_stmt *pStmt = p->apPrepare[i];

        /* Execute the statement */
        while( sqlite3_step(pStmt)==SQLITE_ROW );
        rc = sqlite3_reset(pStmt);

        if( (rc & 0xFF)==SQLITE_BUSY ){
          if( sqlite3_get_autocommit(p->db)==0 ){
            sqlite3_exec(p->db, "ROLLBACK", 0, 0, 0);
          }
          nBusy++;
          rc = SQLITE_OK;
          break;
        }
        else if( rc!=SQLITE_OK ){
          send_message(p, "error - %s\n", sqlite3_errmsg(p->db));
        }
      }

      t2 = get_timer();
      if( t2>=(t1+1000) ){
        int nMs = (t2 - t1);
        int nDone = (j+1 - nBusy - nT1);

        rc = send_message(
            p, "(%d done @ %d per second, %d busy)\n", 
            nDone, (1000*nDone + nMs/2) / nMs, nBusy - nTBusy1
        );
        t1 = t2;
        nT1 = j+1 - nBusy;
        nTBusy1 = nBusy;
        if( p->nSecond>0 && (p->nSecond*1000)<=t1-t0 ) break;
      }
    }

    if( rc==SQLITE_OK ){
      send_message(p, "ok (%d/%d SQLITE_BUSY)\n", nBusy, j);
    }
    clear_sql(p);
  }

  else if( n>=1 && n<=7 && 0==strncmp(z, "seconds", n) ){
    if( nArg ){
      p->nSecond = strtol(zArg, 0, 0);
      if( p->nSecond>0 ) p->nRepeat = 0;
    }
    rc = send_message(p, "ok (repeat=%d)\n", p->nRepeat);
  }

  else{
    send_message(p, 
        "unrecognized dot command: %.*s\n"
        "should be \"list\", \"run\", \"repeats\", or \"seconds\"\n", n, z
    );
    rc = 1;
  }

  return rc;
}

static void *handle_client(void *pArg){
  char zCmd[32*1024];             /* Read buffer */
  int nCmd = 0;                   /* Valid bytes in zCmd[] */
  int res;                        /* Result of read() call */
  int rc = SQLITE_OK;
  int j;

  ClientCtx ctx;
  memset(&ctx, 0, sizeof(ClientCtx));

  ctx.fd = (int)(intptr_t)pArg;
  ctx.nRepeat = 1;
  rc = sqlite3_open(zDatabaseName, &ctx.db);
  if( rc!=SQLITE_OK ){
    fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(ctx.db));
    return 0;
  }

  while( rc==SQLITE_OK ){
    int i;
    int iStart;
    int nConsume;
    res = read(ctx.fd, &zCmd[nCmd], sizeof(zCmd)-nCmd-1);
    if( res<=0 ) break;
    nCmd += res;
    if( nCmd>=sizeof(zCmd)-1 ){
      fprintf(stderr, "oversized (>32KiB) message\n");
      res = 0;
      break;
    }
    zCmd[nCmd] = '\0';

    do {
      nConsume = 0;

      /* Gobble up any whitespace */
      iStart = 0;
      while( is_whitespace(zCmd[iStart]) ) iStart++;

      if( zCmd[iStart]=='.' ){
        /* This is a dot-command. Search for end-of-line. */
        for(i=iStart; i<nCmd; i++){
          if( is_eol(zCmd[i]) ){
            rc = handle_dot_command(&ctx, &zCmd[iStart], i-iStart);
            nConsume = i+1;
            break;
          }
        }
      }else{

        int iSemi;
        char c = 0;
        for(iSemi=iStart; iSemi<nCmd; iSemi++){
          if( zCmd[iSemi]==';' ){
            c = zCmd[iSemi+1];
            zCmd[iSemi+1] = '\0';
            break;
          }
        }

        if( iSemi<nCmd ){
          if( sqlite3_complete(zCmd) ){
            rc = handle_some_sql(&ctx, zCmd, iSemi+1);
            nConsume = iSemi+1;
          }

          if( c ){
            zCmd[iSemi+1] = c;
          }
        }
      }

      if( nConsume>0 ){
        nCmd = nCmd-nConsume;
        if( nCmd>0 ){
          memmove(zCmd, &zCmd[nConsume], nCmd);
        }
      }
    }while( rc==SQLITE_OK && nConsume>0 );
  }

  fprintf(stdout, "Client %d disconnects\n", ctx.fd);
  close(ctx.fd);
  clear_sql(&ctx);
  sqlite3_free(ctx.apPrepare);
  sqlite3_close(ctx.db);
  return 0;
} 

int main(int argc, char *argv[]) {
  sqlite3 *db;
  int sfd;
  int rc;
  int yes = 1;
  struct sockaddr_in server;

  /* Ignore SIGPIPE. Otherwise the server exits if a client disconnects
  ** abruptly.  */
  signal(SIGPIPE, SIG_IGN);

  if( argc!=2 ){
    fprintf(stderr, "Usage: %s DATABASE\n", argv[0]);
    return 1;
  }
  zDatabaseName = argv[1];

  rc = sqlite3_open(zDatabaseName, &db);
  if( rc!=SQLITE_OK ){
    fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(db));
    return 1;
  }

  rc = sqlite3_exec(db, "SELECT * FROM sqlite_master", 0, 0, 0);
  if( rc!=SQLITE_OK ){
    fprintf(stderr, "sqlite3_exec(): %s\n", sqlite3_errmsg(db));
    return 1;
  }

  sfd = socket(AF_INET, SOCK_STREAM, 0);
  if( sfd<0 ){
    fprintf(stderr, "socket() failed\n");
    return 1;
  }

  rc = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
  if( rc<0 ){
    perror("setsockopt");
    return 1;
  }

  memset(&server, 0, sizeof(server));
  server.sin_family = AF_INET;
  server.sin_addr.s_addr = inet_addr("127.0.0.1");
  server.sin_port = htons(TSERVER_PORTNUMBER);

  rc = bind(sfd, (struct sockaddr *)&server, sizeof(struct sockaddr));
  if( rc<0 ){
    fprintf(stderr, "bind() failed\n");
    return 1;
  }

  rc = listen(sfd, 8);
  if( rc<0 ){
    fprintf(stderr, "listen() failed\n");
    return 1;
  }

  while( 1 ){
    pthread_t tid;
    int cfd = accept(sfd, NULL, NULL);
    if( cfd<0 ){
      perror("accept()");
      return 1;
    }

    fprintf(stdout, "Client %d connects\n", cfd);
    rc = pthread_create(&tid, NULL, handle_client, (void*)(intptr_t)cfd);
    if( rc!=0 ){
      perror("pthread_create()");
      return 1;
    }

    pthread_detach(tid);
  }

  return 0;
}