Index: src/printf.c ================================================================== --- src/printf.c +++ src/printf.c @@ -622,26 +622,56 @@ length = 1; break; case etCHARX: if( bArgList ){ bufpt = getTextArg(pArgList); - c = bufpt ? bufpt[0] : 0; + length = 1; + if( bufpt ){ + buf[0] = c = *(bufpt++); + if( (c&0xc0)==0xc0 ){ + while( length<4 && (bufpt[0]&0xc0)==0x80 ){ + buf[length++] = *(bufpt++); + } + } + }else{ + buf[0] = 0; + } }else{ - c = va_arg(ap,int); + unsigned int ch = va_arg(ap,unsigned int); + if( ch<0x00080 ){ + buf[0] = ch & 0xff; + length = 1; + }else if( ch<0x00800 ){ + buf[0] = 0xc0 + (u8)((ch>>6)&0x1f); + buf[1] = 0x80 + (u8)(ch & 0x3f); + length = 2; + }else if( ch<0x10000 ){ + buf[0] = 0xe0 + (u8)((ch>>12)&0x0f); + buf[1] = 0x80 + (u8)((ch>>6) & 0x3f); + buf[2] = 0x80 + (u8)(ch & 0x3f); + length = 3; + }else{ + buf[0] = 0xf0 + (u8)((ch>>18) & 0x07); + buf[1] = 0x80 + (u8)((ch>>12) & 0x3f); + buf[2] = 0x80 + (u8)((ch>>6) & 0x3f); + buf[3] = 0x80 + (u8)(ch & 0x3f); + length = 4; + } } if( precision>1 ){ width -= precision-1; if( width>1 && !flag_leftjustify ){ sqlite3AppendChar(pAccum, width-1, ' '); width = 0; } - sqlite3AppendChar(pAccum, precision-1, c); + while( precision-- > 1 ){ + sqlite3StrAccumAppend(pAccum, buf, length); + } } - length = 1; - buf[0] = c; bufpt = buf; - break; + flag_altform2 = 1; + goto adjust_width_for_utf8; case etSTRING: case etDYNSTRING: if( bArgList ){ bufpt = getTextArg(pArgList); xtype = etSTRING; @@ -652,18 +682,34 @@ bufpt = ""; }else if( xtype==etDYNSTRING ){ zExtra = bufpt; } if( precision>=0 ){ - for(length=0; length 0 && z[0] ){ + SQLITE_SKIP_UTF8(z); + } + length = (int)(z - (unsigned char*)bufpt); + }else{ + for(length=0; length0 ){ + /* Adjust width to account for extra bytes in UTF-8 characters */ + int ii = length - 1; + while( ii>=0 ) if( (bufpt[ii--] & 0xc0)==0x80 ) width++; + } break; - case etSQLESCAPE: /* Escape ' characters */ - case etSQLESCAPE2: /* Escape ' and enclose in '...' */ - case etSQLESCAPE3: { /* Escape " characters */ + case etSQLESCAPE: /* %q: Escape ' characters */ + case etSQLESCAPE2: /* %Q: Escape ' and enclose in '...' */ + case etSQLESCAPE3: { /* %w: Escape " characters */ int i, j, k, n, isnull; int needQuote; char ch; char q = ((xtype==etSQLESCAPE3)?'"':'\''); /* Quote character */ char *escarg; @@ -673,13 +719,21 @@ }else{ escarg = va_arg(ap,char*); } isnull = escarg==0; if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)"); + /* For %q, %Q, and %w, the precision is the number of byte (or + ** characters if the ! flags is present) to use from the input. + ** Because of the extra quoting characters inserted, the number + ** of output characters may be larger than the precision. + */ k = precision; for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){ if( ch==q ) n++; + if( flag_altform2 && (ch&0xc0)==0xc0 ){ + while( (escarg[i+1]&0xc0)==0x80 ){ i++; } + } } needQuote = !isnull && xtype==etSQLESCAPE2; n += i + 3; if( n>etBUFSIZE ){ bufpt = zExtra = sqlite3Malloc( n ); @@ -698,14 +752,11 @@ if( ch==q ) bufpt[j++] = ch; } if( needQuote ) bufpt[j++] = q; bufpt[j] = 0; length = j; - /* The precision in %q and %Q means how many input characters to - ** consume, not the length of the output... - ** if( precision>=0 && precisionprintfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return; pToken = va_arg(ap, Token*); @@ -740,11 +791,14 @@ } }/* End switch over the format type */ /* ** The text of the conversion is pointed to by "bufpt" and is ** "length" characters long. The field width is "width". Do - ** the output. + ** the output. Both length and width are in bytes, not characters, + ** at this point. If the "!" flag was present on string conversions + ** indicating that width and precision should be expressed in characters, + ** then the values have been translated prior to reaching this point. */ width -= length; if( width>0 ){ if( !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' '); sqlite3StrAccumAppend(pAccum, bufpt, length); Index: test/printf2.test ================================================================== --- test/printf2.test +++ test/printf2.test @@ -146,8 +146,65 @@ } {|123,456,789|-123,456,789|} do_execsql_test printf2-4.10 { SELECT printf('|%,d|%,d|',1234567890,-1234567890); } {|1,234,567,890|-1,234,567,890|} +# 2018-02-19. Unicode characters with %c +do_execsql_test printf2-5.100 { + SELECT printf('(%8c)',char(11106)); +} {{( ⭢)}} +do_execsql_test printf2-5.101 { + SELECT printf('(%-8c)',char(11106)); +} {{(⭢ )}} +do_execsql_test printf2-5.102 { + SELECT printf('(%5.3c)',char(1492)); +} {{( ההה)}} +do_execsql_test printf2-5.103 { + SELECT printf('(%-5.3c)',char(1492)); +} {{(ההה )}} +do_execsql_test printf2-5.104 { + SELECT printf('(%3.3c)',char(1492)); +} {{(ההה)}} +do_execsql_test printf2-5.105 { + SELECT printf('(%-3.3c)',char(1492)); +} {{(ההה)}} +do_execsql_test printf2-5.104 { + SELECT printf('(%2c)',char(1513)); +} {{( ש)}} +do_execsql_test printf2-5.106 { + SELECT printf('(%-2c)',char(1513)); +} {{(ש )}} + +# 2018-02-19. Unicode characters with the "!" flag in %s and friends. +do_execsql_test printf2-6.100 { + SELECT printf('(%!.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {(הנה)} +do_execsql_test printf2-6.101 { + SELECT printf('(%.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {(הנה)} +do_execsql_test printf2-6.102 { + SELECT printf('(%!5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{( הנה)}} +do_execsql_test printf2-6.103 { + SELECT printf('(%8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{( הנה)}} +do_execsql_test printf2-6.104 { + SELECT printf('(%!-5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{(הנה )}} +do_execsql_test printf2-6.105 { + SELECT printf('(%-8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{(הנה )}} +do_execsql_test printf2-6.106 { + SELECT printf('(%!.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {('הנה')} +do_execsql_test printf2-6.107 { + SELECT printf('(%.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {('הנה')} +do_execsql_test printf2-6.108 { + SELECT printf('(%!7.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{( 'הנה')}} +do_execsql_test printf2-6.109 { + SELECT printf('(%10.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד'); +} {{( 'הנה')}} finish_test