SQLite

Check-in [78a623bbb1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use a macro to make sure that the calls to sqlite3Utf8Read() from MemTranslate() are inlined. This speeds up converting from utf8 to utf16. (CVS 5492)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 78a623bbb17e35bcb902d3ea1d0b963570588d2d
User & Date: danielk1977 2008-07-29 11:25:14.000
Context
2008-07-29
14:28
Increase the version number to 3.6.1. (CVS 5493) (check-in: ec09b551e8 user: drh tags: trunk)
11:25
Use a macro to make sure that the calls to sqlite3Utf8Read() from MemTranslate() are inlined. This speeds up converting from utf8 to utf16. (CVS 5492) (check-in: 78a623bbb1 user: danielk1977 tags: trunk)
10:26
Add a missing "finish_test" to the end of misc5.test. (CVS 5491) (check-in: 89216b1669 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/utf.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.62 2008/06/27 18:59:45 mihailim Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.63 2008/07/29 11:25:14 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
150
151
152
153
154
155
156











157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
**     and rendered as themselves even though they are technically
**     invalid characters.
**
**  *  This routine accepts an infinite number of different UTF8 encodings
**     for unicode values 0x80 and greater.  It do not change over-length
**     encodings to 0xfffd as some systems recommend.
*/











int sqlite3Utf8Read(
  const unsigned char *z,         /* First byte of UTF-8 character */
  const unsigned char *zTerm,     /* Pretend this byte is 0x00 */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
){
  int c = *(z++);
  if( c>=0xc0 ){
    c = sqlite3UtfTrans1[c-0xc0];
    while( z!=zTerm && (*z & 0xc0)==0x80 ){
      c = (c<<6) + (0x3f & *(z++));
    }
    if( c<0x80
        || (c&0xFFFFF800)==0xD800
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }
  }
  *pzNext = z;
  return c;
}




/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
*/ 







>
>
>
>
>
>
>
>
>
>
>





|
<
<
<
<
<
|
<
<
<



>







150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173





174



175
176
177
178
179
180
181
182
183
184
185
**     and rendered as themselves even though they are technically
**     invalid characters.
**
**  *  This routine accepts an infinite number of different UTF8 encodings
**     for unicode values 0x80 and greater.  It do not change over-length
**     encodings to 0xfffd as some systems recommend.
*/
#define READ_UTF8(zIn, zTerm, c)                           \
  c = *(zIn++);                                            \
  if( c>=0xc0 ){                                           \
    c = sqlite3UtfTrans1[c-0xc0];                          \
    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
      c = (c<<6) + (0x3f & *(zIn++));                      \
    }                                                      \
    if( c<0x80                                             \
        || (c&0xFFFFF800)==0xD800                          \
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
  }
int sqlite3Utf8Read(
  const unsigned char *z,         /* First byte of UTF-8 character */
  const unsigned char *zTerm,     /* Pretend this byte is 0x00 */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
){
  int c;





  READ_UTF8(z, zTerm, c);



  *pzNext = z;
  return c;
}




/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
*/ 
264
265
266
267
268
269
270
271

272
273
274
275
276
277
278

279
280
281
282
283
284
285
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);

        WRITE_UTF16LE(z, c);
      }
    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);

        WRITE_UTF16BE(z, c);
      }
    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );







|
>






|
>







268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
        READ_UTF8(zIn, zTerm, c);
        WRITE_UTF16LE(z, c);
      }
    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
        READ_UTF8(zIn, zTerm, c);
        WRITE_UTF16BE(z, c);
      }
    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );