/ Check-in [51027f08c0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Proper surrogate pair decoding added to JSON functions. See the mailing list bug report and [https://bugs.python.org/issue38749]. More test cases needed here, but it seems to work so far.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 51027f08c0478f1bf9d7545d9e268c772c0a5cd5dda4b03d78f16c7d94f2f50d
User & Date: drh 2019-11-10 11:09:06
Context
2019-11-11
15:13
Remove an asm() block from build tool mksourceid.c, as it causes build failures on some systems and performance is not important at build-time. check-in: 8e100e6c35 user: dan tags: trunk
2019-11-10
11:09
Proper surrogate pair decoding added to JSON functions. See the mailing list bug report and [https://bugs.python.org/issue38749]. More test cases needed here, but it seems to work so far. check-in: 51027f08c0 user: drh tags: trunk
10:08
Remove an incorrect ALWAYS() macro. check-in: f7a74f89db user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/misc/json1.c.

   517    517   ){
   518    518     JsonString s;
   519    519     jsonInit(&s, pCtx);
   520    520     jsonRenderNode(pNode, &s, aReplace);
   521    521     jsonResult(&s);
   522    522     sqlite3_result_subtype(pCtx, JSON_SUBTYPE);
   523    523   }
          524  +
          525  +/*
          526  +** Translate a single byte of Hex into an integer.
          527  +** This routine only works if h really is a valid hexadecimal
          528  +** character:  0..9a..fA..F
          529  +*/
          530  +static u8 jsonHexToInt(int h){
          531  +  assert( (h>='0' && h<='9') ||  (h>='a' && h<='f') ||  (h>='A' && h<='F') );
          532  +#ifdef SQLITE_EBCDIC
          533  +  h += 9*(1&~(h>>4));
          534  +#else
          535  +  h += 9*(1&(h>>6));
          536  +#endif
          537  +  return (u8)(h & 0xf);
          538  +}
          539  +
          540  +/*
          541  +** Convert a 4-byte hex string into an integer
          542  +*/
          543  +static u32 jsonHexToInt4(const char *z){
          544  +  u32 v;
          545  +  assert( safe_isxdigit(z[0]) );
          546  +  assert( safe_isxdigit(z[1]) );
          547  +  assert( safe_isxdigit(z[2]) );
          548  +  assert( safe_isxdigit(z[3]) );
          549  +  v = (jsonHexToInt(z[0])<<12)
          550  +    + (jsonHexToInt(z[1])<<8)
          551  +    + (jsonHexToInt(z[2])<<4)
          552  +    + jsonHexToInt(z[3]);
          553  +  return v;
          554  +}
   524    555   
   525    556   /*
   526    557   ** Make the JsonNode the return value of the function.
   527    558   */
   528    559   static void jsonReturn(
   529    560     JsonNode *pNode,            /* Node to return */
   530    561     sqlite3_context *pCtx,      /* Return value for this function */
................................................................................
   611    642           for(i=1, j=0; i<n-1; i++){
   612    643             char c = z[i];
   613    644             if( c!='\\' ){
   614    645               zOut[j++] = c;
   615    646             }else{
   616    647               c = z[++i];
   617    648               if( c=='u' ){
   618         -              u32 v = 0, k;
   619         -              for(k=0; k<4; i++, k++){
   620         -                assert( i<n-2 );
   621         -                c = z[i+1];
   622         -                assert( safe_isxdigit(c) );
   623         -                if( c<='9' ) v = v*16 + c - '0';
   624         -                else if( c<='F' ) v = v*16 + c - 'A' + 10;
   625         -                else v = v*16 + c - 'a' + 10;
   626         -              }
          649  +              u32 v = jsonHexToInt4(z+i+1);
          650  +              i += 4;
   627    651                 if( v==0 ) break;
   628    652                 if( v<=0x7f ){
   629    653                   zOut[j++] = (char)v;
   630    654                 }else if( v<=0x7ff ){
   631    655                   zOut[j++] = (char)(0xc0 | (v>>6));
   632    656                   zOut[j++] = 0x80 | (v&0x3f);
   633    657                 }else{
   634         -                zOut[j++] = (char)(0xe0 | (v>>12));
   635         -                zOut[j++] = 0x80 | ((v>>6)&0x3f);
   636         -                zOut[j++] = 0x80 | (v&0x3f);
          658  +                u32 vlo;
          659  +                if( (v&0xfc00)==0xd800
          660  +                  && i<n-6
          661  +                  && z[i+1]=='\\'
          662  +                  && z[i+2]=='u'
          663  +                  && ((vlo = jsonHexToInt4(z+i+3))&0xfc00)==0xdc00
          664  +                ){
          665  +                  /* We have a surrogate pair */
          666  +                  v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
          667  +                  i += 6;
          668  +                  zOut[j++] = 0xf0 | (v>>18);
          669  +                  zOut[j++] = 0x80 | ((v>>12)&0x3f);
          670  +                  zOut[j++] = 0x80 | ((v>>6)&0x3f);
          671  +                  zOut[j++] = 0x80 | (v&0x3f);
          672  +                }else{
          673  +                  zOut[j++] = 0xe0 | (v>>12);
          674  +                  zOut[j++] = 0x80 | ((v>>6)&0x3f);
          675  +                  zOut[j++] = 0x80 | (v&0x3f);
          676  +                }
   637    677                 }
   638    678               }else{
   639    679                 if( c=='b' ){
   640    680                   c = '\b';
   641    681                 }else if( c=='f' ){
   642    682                   c = '\f';
   643    683                 }else if( c=='n' ){

Changes to test/json101.test.

   827    827   } {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
   828    828   do_execsql_test json-15.120 {
   829    829     SELECT * FROM (JSON_EACH('{"a":1, "b":2}'));
   830    830   } {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
   831    831   do_execsql_test json-15.130 {
   832    832     SELECT xyz.* FROM (JSON_EACH('{"a":1, "b":2}')) AS xyz;
   833    833   } {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
          834  +
          835  +# 2019-11-10
          836  +# Mailing list bug report on the handling of surrogate pairs
          837  +# in JSON.
          838  +#
          839  +do_execsql_test json-16.10 {
          840  +  SELECT length(json_extract('"abc\uD834\uDD1Exyz"','$'));
          841  +} {7}
          842  +do_execsql_test json-16.20 {
          843  +  SELECT length(json_extract('"\uD834\uDD1E"','$'));
          844  +} {1}
          845  +do_execsql_test json-16.30 {
          846  +  SELECT unicode(json_extract('"\uD834\uDD1E"','$'));
          847  +} {119070}
          848  +
   834    849   
   835    850   finish_test