/ Changes On Branch wal-header-sync
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch wal-header-sync Excluding Merge-Ins

This is equivalent to a diff from 09ccc4a1be to 9799241f7d

2011-12-17
13:45
Merge in changes that cause the first sector of the WAL file to be synced when the WAL restarts. This is a fix for the power-loss corruption problem described in ticket [ff5be73dee086] (check-in: 44ca4d1233 user: drh tags: trunk)
2011-12-16
21:26
Enhance the WAL header sync so that it honors the various synchronous pragmas, settings, and device characteristics. (Closed-Leaf check-in: 9799241f7d user: drh tags: wal-header-sync)
19:34
Proposed changes that ensure that the WAL header is written prior to the first commit mark. (check-in: 91d0437c07 user: drh tags: wal-header-sync)
17:01
Add code for a test that was failing before the persistent-wal related changes of [09ccc4a1be]. (check-in: 49d21ce50f user: dan tags: trunk)
15:38
Merge the fix for [a1fa75cbdd02] from the experimental branch. Also fix the persistent-wal mode feature of truncating the WAL on close so that it always truncates the WAL to zero bytes. (check-in: 09ccc4a1be user: drh tags: trunk)
15:11
Improved logging of master-journal name conflicts. (check-in: b1005ef46c user: drh tags: trunk)
13:24
Experimental fix for [a1fa75cbdd]. (Closed-Leaf check-in: 6492af76ea user: dan tags: experimental)

Changes to src/pager.c.

612
613
614
615
616
617
618

619
620
621
622
623
624
625
  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 noReadlock;              /* Do not bother to obtain readlocks */
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 ckptSyncFlags;           /* SYNC_NORMAL or SYNC_FULL for checkpoint */

  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 memDb;                   /* True to inhibit all file I/O */

  /**************************************************************************
  ** The following block contains those class members that change during







>







612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 noReadlock;              /* Do not bother to obtain readlocks */
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 ckptSyncFlags;           /* SYNC_NORMAL or SYNC_FULL for checkpoint */
  u8 walSyncFlags;            /* SYNC_NORMAL or SYNC_FULL for wal writes */
  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 memDb;                   /* True to inhibit all file I/O */

  /**************************************************************************
  ** The following block contains those class members that change during
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
#ifndef SQLITE_OMIT_WAL
static int pagerUseWal(Pager *pPager){
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

#ifndef NDEBUG 
/*
** Usage:







|







783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
#ifndef SQLITE_OMIT_WAL
static int pagerUseWal(Pager *pPager){
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

#ifndef NDEBUG 
/*
** Usage:
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
** The list of pages passed into this routine is always sorted by page number.
** Hence, if page 1 appears anywhere on the list, it will be the first page.
*/ 
static int pagerWalFrames(
  Pager *pPager,                  /* Pager object */
  PgHdr *pList,                   /* List of frames to log */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit,                   /* True if this is a commit */
  int syncFlags                   /* Flags to pass to OsSync() (or 0) */
){
  int rc;                         /* Return code */
#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
  PgHdr *p;                       /* For looping over pages */
#endif

  assert( pPager->pWal );







|
<







2952
2953
2954
2955
2956
2957
2958
2959

2960
2961
2962
2963
2964
2965
2966
** The list of pages passed into this routine is always sorted by page number.
** Hence, if page 1 appears anywhere on the list, it will be the first page.
*/ 
static int pagerWalFrames(
  Pager *pPager,                  /* Pager object */
  PgHdr *pList,                   /* List of frames to log */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit                    /* True if this is a commit */

){
  int rc;                         /* Return code */
#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
  PgHdr *p;                       /* For looping over pages */
#endif

  assert( pPager->pWal );
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
    }
    assert( pList );
  }

  if( pList->pgno==1 ) pager_write_changecounter(pList);
  rc = sqlite3WalFrames(pPager->pWal, 
      pPager->pageSize, pList, nTruncate, isCommit, syncFlags
  );
  if( rc==SQLITE_OK && pPager->pBackup ){
    PgHdr *p;
    for(p=pList; p; p=p->pDirty){
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }







|







2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
    }
    assert( pList );
  }

  if( pList->pgno==1 ) pager_write_changecounter(pList);
  rc = sqlite3WalFrames(pPager->pWal, 
      pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags
  );
  if( rc==SQLITE_OK && pPager->pBackup ){
    PgHdr *p;
    for(p=pList; p; p=p->pDirty){
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
3363
3364
3365
3366
3367
3368
3369




3370
3371
3372
3373
3374
3375
3376
  }else if( bCkptFullFsync ){
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  }else{
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }




}
#endif

/*
** The following global variable is incremented whenever the library
** attempts to open a temporary file.  This information is used for
** testing and analysis only.  







>
>
>
>







3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
  }else if( bCkptFullFsync ){
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  }else{
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }
  pPager->walSyncFlags = pPager->syncFlags;
  if( pPager->fullSync ){
    pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS;
  }
}
#endif

/*
** The following global variable is incremented whenever the library
** attempts to open a temporary file.  This information is used for
** testing and analysis only.  
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
  pPg->pDirty = 0;
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    if( subjRequiresPage(pPg) ){ 
      rc = subjournalPage(pPg); 
    }
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0, 0);
    }
  }else{
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){







|







4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
  pPg->pDirty = 0;
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    if( subjRequiresPage(pPg) ){ 
      rc = subjournalPage(pPg); 
    }
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0);
    }
  }else{
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){
4529
4530
4531
4532
4533
4534
4535






4536
4537

4538

4539
4540
4541
4542
4543
4544
4545
  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  pPager->exclusiveMode = (u8)tempFile; 
  pPager->changeCountDone = pPager->tempFile;
  pPager->memDb = (u8)memDb;
  pPager->readOnly = (u8)readOnly;
  assert( useJournal || pPager->tempFile );
  pPager->noSync = pPager->tempFile;






  pPager->fullSync = pPager->noSync ?0:1;
  pPager->syncFlags = pPager->noSync ? 0 : SQLITE_SYNC_NORMAL;

  pPager->ckptSyncFlags = pPager->syncFlags;

  /* pPager->pFirst = 0; */
  /* pPager->pFirstSynced = 0; */
  /* pPager->pLast = 0; */
  pPager->nExtra = (u16)nExtra;
  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  assert( isOpen(pPager->fd) || tempFile );
  setSectorSize(pPager);







>
>
>
>
>
>
|
|
>
|
>







4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  pPager->exclusiveMode = (u8)tempFile; 
  pPager->changeCountDone = pPager->tempFile;
  pPager->memDb = (u8)memDb;
  pPager->readOnly = (u8)readOnly;
  assert( useJournal || pPager->tempFile );
  pPager->noSync = pPager->tempFile;
  if( pPager->noSync ){
    assert( pPager->fullSync==0 );
    assert( pPager->syncFlags==0 );
    assert( pPager->walSyncFlags==0 );
    assert( pPager->ckptSyncFlags==0 );
  }else{
    pPager->fullSync = 1;
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }
  /* pPager->pFirst = 0; */
  /* pPager->pFirstSynced = 0; */
  /* pPager->pLast = 0; */
  pPager->nExtra = (u16)nExtra;
  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  assert( isOpen(pPager->fd) || tempFile );
  setSectorSize(pPager);
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
        pList = pPageOne;
        pList->pDirty = 0;
      }
      assert( rc==SQLITE_OK );
      if( ALWAYS(pList) ){
        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, 
            (pPager->fullSync ? pPager->syncFlags : 0)
        );
      }
      sqlite3PagerUnref(pPageOne);
      if( rc==SQLITE_OK ){
        sqlite3PcacheCleanAll(pPager->pPCache);
      }
    }else{
      /* The following block updates the change-counter. Exactly how it







|
<
<







5773
5774
5775
5776
5777
5778
5779
5780


5781
5782
5783
5784
5785
5786
5787
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
        pList = pPageOne;
        pList->pDirty = 0;
      }
      assert( rc==SQLITE_OK );
      if( ALWAYS(pList) ){
        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1);


      }
      sqlite3PagerUnref(pPageOne);
      if( rc==SQLITE_OK ){
        sqlite3PcacheCleanAll(pPager->pPCache);
      }
    }else{
      /* The following block updates the change-counter. Exactly how it

Changes to src/wal.c.

410
411
412
413
414
415
416

417
418
419

420
421
422
423
424

425
426
427
428
429
430
431
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */

  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */

  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */

  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};







>



>





>







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */
  int szFirstBlock;          /* Size of first block written to WAL file */
  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 syncFlags;              /* Flags to use to sync header writes */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 noSyncHeader;           /* Avoid WAL header fsyncs if true */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};
1292
1293
1294
1295
1296
1297
1298


1299
1300
1301
1302
1303
1304
1305
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{


    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*







>
>







1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd);
    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; }
    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*
2612
2613
2614
2615
2616
2617
2618






































2619
2620
2621
2622
2623
2624
2625
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}







































/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}

/*
** Write iAmt bytes of content into the WAL file beginning at iOffset.
**
** When crossing the boundary between the first and second sectors of the
** file, first write all of the first sector content, then fsync(), then
** continue writing content for the second sector.  This ensures that
** the WAL header is overwritten before the first commit mark.
*/
static int walWriteToLog(
  Wal *pWal,                 /* WAL to write to */
  void *pContent,            /* Content to be written */
  int iAmt,                  /* Number of bytes to write */
  sqlite3_int64 iOffset      /* Start writing at this offset */
){
  int rc;
  if( iOffset>=pWal->szFirstBlock
   || iOffset+iAmt<pWal->szFirstBlock
   || pWal->syncFlags==0
  ){
    /* The common and fast case.  Just write the data. */
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset);
  }else{
    /* If this write will cross the first sector boundary, it has to
    ** be split it two with a sync in between. */
    int iFirstAmt = pWal->szFirstBlock - iOffset;
    assert( iFirstAmt>0 && iFirstAmt<iAmt );
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iFirstAmt, iOffset);
    if( rc ) return rc;
    assert( pWal->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) );
    rc = sqlite3OsSync(pWal->pWalFd, pWal->syncFlags);
    if( rc ) return rc;
    pContent = (void*)(iFirstAmt + (char*)pContent);
    rc = sqlite3OsWrite(pWal->pWalFd, pContent,
                        iAmt-iFirstAmt, iOffset+iFirstAmt);
  }
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
2681
2682
2683
2684
2685
2686
2687










2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );











  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
    iOffset = walFrameOffset(++iFrame, szPage);
    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( sync_flags ){
    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
    i64 iOffset = walFrameOffset(iFrame+1, szPage);

    assert( isCommit );
    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      void *pData;
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); 
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }

    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);
    }







>
>
>
>
>
>
>
>
>
>


















|





|







|



<












|




|







|







2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776

2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );

  /* Setup information needed to do the WAL header sync */
  if( pWal->noSyncHeader ){
    assert( pWal->szFirstBlock==0 );
    assert( pWal->syncFlags==0 );
  }else{
    pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd);
    if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage;
    pWal->syncFlags = sync_flags & SQLITE_SYNC_MASK;
  }

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
    iOffset = walFrameOffset(++iFrame, szPage);
    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){
    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
    i64 iOffset = walFrameOffset(iFrame+1, szPage);


    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      void *pData;
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = walWriteToLog(pWal, pData, szPage, iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }

    rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);
    }

Changes to src/wal.h.

83
84
85
86
87
88
89






90
91
92
93
94
95
96
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);







/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int eMode,                      /* One of PASSIVE, FULL and RESTART */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */







>
>
>
>
>
>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Additional values that can be added to the sync_flags argument of
** sqlite3WalFrames():
*/
#define WAL_SYNC_TRANSACTIONS  0x20   /* Sync at the end of each transaction */
#define SQLITE_SYNC_MASK       0x13   /* Mask off the SQLITE_SYNC_* values */

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int eMode,                      /* One of PASSIVE, FULL and RESTART */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */

Changes to test/wal2.test.

1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
  }
}

#-------------------------------------------------------------------------
# Test that "PRAGMA checkpoint_fullsync" appears to be working.
#
foreach {tn sql reslist} {
  1 { }                                 {8 0 3 0 5 0}
  2 { PRAGMA checkpoint_fullfsync = 1 } {8 4 3 2 5 2}
  3 { PRAGMA checkpoint_fullfsync = 0 } {8 0 3 0 5 0}
} {
  faultsim_delete_and_reopen

  execsql {PRAGMA auto_vacuum = 0}
  execsql $sql
  do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal}

  set sqlite_sync_count 0
  set sqlite_fullsync_count 0

  do_execsql_test wal2-14.$tn.2 {
    PRAGMA wal_autocheckpoint = 10;
    CREATE TABLE t1(a, b);                -- 2 wal syncs
    INSERT INTO t1 VALUES(1, 2);          -- 1 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
    BEGIN;
      INSERT INTO t1 VALUES(3, 4);
      INSERT INTO t1 VALUES(5, 6);
    COMMIT;                               -- 1 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
  } {10 0 5 5 0 2 2}

  do_test wal2-14.$tn.3 {
    cond_incr_sync_count 1
    list $sqlite_sync_count $sqlite_fullsync_count
  } [lrange $reslist 0 1]







|
|
|













|




|







1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
  }
}

#-------------------------------------------------------------------------
# Test that "PRAGMA checkpoint_fullsync" appears to be working.
#
foreach {tn sql reslist} {
  1 { }                                 {10 0 4 0 6 0}
  2 { PRAGMA checkpoint_fullfsync = 1 } {10 4 4 2 6 2}
  3 { PRAGMA checkpoint_fullfsync = 0 } {10 0 4 0 6 0}
} {
  faultsim_delete_and_reopen

  execsql {PRAGMA auto_vacuum = 0}
  execsql $sql
  do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal}

  set sqlite_sync_count 0
  set sqlite_fullsync_count 0

  do_execsql_test wal2-14.$tn.2 {
    PRAGMA wal_autocheckpoint = 10;
    CREATE TABLE t1(a, b);                -- 2 wal syncs
    INSERT INTO t1 VALUES(1, 2);          -- 2 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
    BEGIN;
      INSERT INTO t1 VALUES(3, 4);
      INSERT INTO t1 VALUES(5, 6);
    COMMIT;                               -- 2 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
  } {10 0 5 5 0 2 2}

  do_test wal2-14.$tn.3 {
    cond_incr_sync_count 1
    list $sqlite_sync_count $sqlite_fullsync_count
  } [lrange $reslist 0 1]
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264

1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275







1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298

catch { db close }

# PRAGMA checkpoint_fullsync
# PRAGMA fullfsync
# PRAGMA synchronous
#
foreach {tn settings commit_sync ckpt_sync} {
  1  {0 0 off}     {0 0}  {0 0}
  2  {0 0 normal}  {0 0}  {2 0}
  3  {0 0 full}    {1 0}  {2 0}

  4  {0 1 off}     {0 0}  {0 0}
  5  {0 1 normal}  {0 0}  {0 2}
  6  {0 1 full}    {0 1}  {0 2}

  7  {1 0 off}     {0 0}  {0 0}
  8  {1 0 normal}  {0 0}  {0 2}
  9  {1 0 full}    {1 0}  {0 2}

  10 {1 1 off}     {0 0}  {0 0}
  11 {1 1 normal}  {0 0}  {0 2}
  12 {1 1 full}    {0 1}  {0 2}
} {
  forcedelete test.db

  testvfs tvfs -default 1
  tvfs filter xSync
  tvfs script xSyncCb
  proc xSyncCb {method file fileid flags} {
    incr ::sync($flags)
  }

  sqlite3 db test.db
  do_execsql_test 15.$tn.1 "
    CREATE TABLE t1(x);

    PRAGMA journal_mode = WAL;
    PRAGMA checkpoint_fullfsync = [lindex $settings 0];
    PRAGMA fullfsync = [lindex $settings 1];
    PRAGMA synchronous = [lindex $settings 2];
  " {wal}

  do_test 15.$tn.2 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)







  } $commit_sync

  do_test 15.$tn.3 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('def') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.4 {
    set sync(normal) 0
    set sync(full) 0
    execsql { PRAGMA wal_checkpoint }
    list $::sync(normal) $::sync(full)
  } $ckpt_sync
  
  db close
  tvfs delete
}



finish_test







|
|
|
|

|
|
|

|
|
|

|
|
|













>




|






>
>
>
>
>
>
>


|






|













1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306

catch { db close }

# PRAGMA checkpoint_fullsync
# PRAGMA fullfsync
# PRAGMA synchronous
#
foreach {tn settings restart_sync commit_sync ckpt_sync} {
  1  {0 0 off}     {0 0}  {0 0}  {0 0}
  2  {0 0 normal}  {1 0}  {0 0}  {2 0}
  3  {0 0 full}    {2 0}  {1 0}  {2 0}

  4  {0 1 off}     {0 0}  {0 0}  {0 0}
  5  {0 1 normal}  {0 1}  {0 0}  {0 2}
  6  {0 1 full}    {0 2}  {0 1}  {0 2}

  7  {1 0 off}     {0 0}  {0 0}  {0 0}
  8  {1 0 normal}  {1 0}  {0 0}  {0 2}
  9  {1 0 full}    {2 0}  {1 0}  {0 2}

  10 {1 1 off}     {0 0}  {0 0}  {0 0}
  11 {1 1 normal}  {0 1}  {0 0}  {0 2}
  12 {1 1 full}    {0 2}  {0 1}  {0 2}
} {
  forcedelete test.db

  testvfs tvfs -default 1
  tvfs filter xSync
  tvfs script xSyncCb
  proc xSyncCb {method file fileid flags} {
    incr ::sync($flags)
  }

  sqlite3 db test.db
  do_execsql_test 15.$tn.1 "
    CREATE TABLE t1(x);
    PRAGMA wal_autocheckpoint = OFF;
    PRAGMA journal_mode = WAL;
    PRAGMA checkpoint_fullfsync = [lindex $settings 0];
    PRAGMA fullfsync = [lindex $settings 1];
    PRAGMA synchronous = [lindex $settings 2];
  " {0 wal}

  do_test 15.$tn.2 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)
  } $restart_sync

  do_test 15.$tn.3 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.4 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('def') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.5 {
    set sync(normal) 0
    set sync(full) 0
    execsql { PRAGMA wal_checkpoint }
    list $::sync(normal) $::sync(full)
  } $ckpt_sync
  
  db close
  tvfs delete
}



finish_test

Changes to test/wal3.test.

213
214
215
216
217
218
219

220
221
222
223
224
225
226
    testvfs T
    T filter {} 
    T script sync_counter
    sqlite3 db test.db -vfs T
  
    execsql "PRAGMA synchronous = $syncmode"
    execsql { PRAGMA journal_mode = WAL }


    set ::syncs [list]
    T filter xSync
    execsql {
      CREATE TABLE x(y);
      INSERT INTO x VALUES('z');
      PRAGMA wal_checkpoint;







>







213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
    testvfs T
    T filter {} 
    T script sync_counter
    sqlite3 db test.db -vfs T
  
    execsql "PRAGMA synchronous = $syncmode"
    execsql { PRAGMA journal_mode = WAL }
    execsql { CREATE TABLE filler(a,b,c); }

    set ::syncs [list]
    T filter xSync
    execsql {
      CREATE TABLE x(y);
      INSERT INTO x VALUES('z');
      PRAGMA wal_checkpoint;