/ Check-in [fea8a4db9d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve test coverage of fts5_unicode2.c.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: fea8a4db9d8c7b9a946017a0dc984cbca6ce240e
User & Date: dan 2015-05-22 06:08:25
Context
2015-05-22
07:44
Increase test coverage of fts5_vocab.c. check-in: 065ab83a6c user: dan tags: fts5
06:08
Improve test coverage of fts5_unicode2.c. check-in: fea8a4db9d user: dan tags: fts5
2015-05-20
09:27
Improve test coverage of fts5_tokenize.c. check-in: 0e91a6a520 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/unicode/mkunicode.tcl.

     1      1   
     2         -#
     3         -# Parameter $zName must be a path to the file UnicodeData.txt. This command
     4         -# reads the file and returns a list of mappings required to remove all
     5         -# diacritical marks from a unicode string. Each mapping is itself a list
     6         -# consisting of two elements - the unicode codepoint and the single ASCII
     7         -# character that it should be replaced with, or an empty string if the 
     8         -# codepoint should simply be removed from the input. Examples:
     9         -#
    10         -#   { 224 a  }     (replace codepoint 224 to "a")
    11         -#   { 769 "" }     (remove codepoint 769 from input)
    12         -#
    13         -# Mappings are only returned for non-upper case codepoints. It is assumed
    14         -# that the input has already been folded to lower case.
    15         -#
    16         -proc rd_load_unicodedata_text {zName} {
    17         -  global tl_lookup_table
    18         -
    19         -  set fd [open $zName]
    20         -  set lField {
    21         -    code
    22         -    character_name
    23         -    general_category
    24         -    canonical_combining_classes
    25         -    bidirectional_category
    26         -    character_decomposition_mapping
    27         -    decimal_digit_value
    28         -    digit_value
    29         -    numeric_value
    30         -    mirrored
    31         -    unicode_1_name
    32         -    iso10646_comment_field
    33         -    uppercase_mapping
    34         -    lowercase_mapping
    35         -    titlecase_mapping
    36         -  }
    37         -  set lRet [list]
    38         -
    39         -  while { ![eof $fd] } {
    40         -    set line [gets $fd]
    41         -    if {$line == ""} continue
    42         -
    43         -    set fields [split $line ";"]
    44         -    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
    45         -    foreach $lField $fields {}
    46         -    if { [llength $character_decomposition_mapping]!=2
    47         -      || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
    48         -    } {
    49         -      continue
    50         -    }
    51         -
    52         -    set iCode  [expr "0x$code"]
    53         -    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
    54         -    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]
    55         -
    56         -    if {[info exists tl_lookup_table($iCode)]} continue
    57         -
    58         -    if { ($iAscii >= 97 && $iAscii <= 122)
    59         -      || ($iAscii >= 65 && $iAscii <= 90)
    60         -    } {
    61         -      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
    62         -      set dia($iDia) 1
    63         -    }
    64         -  }
    65         -
    66         -  foreach d [array names dia] {
    67         -    lappend lRet [list $d ""]
    68         -  }
    69         -  set lRet [lsort -integer -index 0 $lRet]
    70         -
    71         -  close $fd
    72         -  set lRet
    73         -}
    74         -
            2  +source [file join [file dirname [info script]] parseunicode.tcl]
    75      3   
    76      4   proc print_rd {map} {
    77      5     global tl_lookup_table
    78      6     set aChar [list]
    79      7     set lRange [list]
    80      8   
    81      9     set nRange 1
................................................................................
   200    128     puts "      (mask1 & (1 << (c-$iFirst-32)));"
   201    129     puts "\}"
   202    130   }
   203    131   
   204    132   
   205    133   #-------------------------------------------------------------------------
   206    134   
   207         -# Parameter $zName must be a path to the file UnicodeData.txt. This command
   208         -# reads the file and returns a list of codepoints (integers). The list
   209         -# contains all codepoints in the UnicodeData.txt assigned to any "General
   210         -# Category" that is not a "Letter" or "Number".
   211         -#
   212         -proc an_load_unicodedata_text {zName} {
   213         -  set fd [open $zName]
   214         -  set lField {
   215         -    code
   216         -    character_name
   217         -    general_category
   218         -    canonical_combining_classes
   219         -    bidirectional_category
   220         -    character_decomposition_mapping
   221         -    decimal_digit_value
   222         -    digit_value
   223         -    numeric_value
   224         -    mirrored
   225         -    unicode_1_name
   226         -    iso10646_comment_field
   227         -    uppercase_mapping
   228         -    lowercase_mapping
   229         -    titlecase_mapping
   230         -  }
   231         -  set lRet [list]
   232         -
   233         -  while { ![eof $fd] } {
   234         -    set line [gets $fd]
   235         -    if {$line == ""} continue
   236         -
   237         -    set fields [split $line ";"]
   238         -    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
   239         -    foreach $lField $fields {}
   240         -
   241         -    set iCode [expr "0x$code"]
   242         -    set bAlnum [expr {
   243         -         [lsearch {L N} [string range $general_category 0 0]] >= 0
   244         -      || $general_category=="Co"
   245         -    }]
   246         -
   247         -    if { !$bAlnum } { lappend lRet $iCode }
   248         -  }
   249         -
   250         -  close $fd
   251         -  set lRet
   252         -}
   253         -
   254    135   proc an_load_separator_ranges {} {
   255    136     global unicodedata.txt
   256    137     set lSep [an_load_unicodedata_text ${unicodedata.txt}]
   257    138     unset -nocomplain iFirst 
   258    139     unset -nocomplain nRange 
   259    140     set lRange [list]
   260    141     foreach sep $lSep {
................................................................................
   436    317     }]
   437    318     puts "  return 0;"
   438    319     puts "\}"
   439    320   }
   440    321   
   441    322   #-------------------------------------------------------------------------
   442    323   
   443         -proc tl_load_casefolding_txt {zName} {
   444         -  global tl_lookup_table
   445         -
   446         -  set fd [open $zName]
   447         -  while { ![eof $fd] } {
   448         -    set line [gets $fd]
   449         -    if {[string range $line 0 0] == "#"} continue
   450         -    if {$line == ""} continue
   451         -
   452         -    foreach x {a b c d} {unset -nocomplain $x}
   453         -    foreach {a b c d} [split $line ";"] {}
   454         -
   455         -    set a2 [list]
   456         -    set c2 [list]
   457         -    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
   458         -    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
   459         -    set b [string trim $b]
   460         -    set d [string trim $d]
   461         -
   462         -    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
   463         -  }
   464         -}
   465         -
   466    324   proc tl_create_records {} {
   467    325     global tl_lookup_table
   468    326   
   469    327     set iFirst ""
   470    328     set nOff 0
   471    329     set nRange 0
   472    330     set nIncr 0
................................................................................
   631    489   
   632    490     assert( c>=0 );
   633    491     assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
   634    492   
   635    493     if( c<128 ){
   636    494       if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
   637    495     }else if( c<65536 ){
          496  +    const struct TableEntry *p;
   638    497       int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   639    498       int iLo = 0;
   640    499       int iRes = -1;
   641    500   
          501  +    assert( c>aEntry[0].iCode );
   642    502       while( iHi>=iLo ){
   643    503         int iTest = (iHi + iLo) / 2;
   644    504         int cmp = (c - aEntry[iTest].iCode);
   645    505         if( cmp>=0 ){
   646    506           iRes = iTest;
   647    507           iLo = iTest+1;
   648    508         }else{
   649    509           iHi = iTest-1;
   650    510         }
   651    511       }
   652         -    assert( iRes<0 || c>=aEntry[iRes].iCode );
   653    512   
   654         -    if( iRes>=0 ){
   655         -      const struct TableEntry *p = &aEntry[iRes];
   656         -      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
   657         -        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
   658         -        assert( ret>0 );
   659         -      }
          513  +    assert( iRes>=0 && c>=aEntry[iRes].iCode );
          514  +    p = &aEntry[iRes];
          515  +    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
          516  +      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
          517  +      assert( ret>0 );
   660    518       }
   661    519   
   662    520       if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
   663    521     }
   664    522     }]
   665    523   
   666    524     foreach entry $lHigh {

Added ext/fts3/unicode/parseunicode.tcl.

            1  +
            2  +#--------------------------------------------------------------------------
            3  +# Parameter $zName must be a path to the file UnicodeData.txt. This command
            4  +# reads the file and returns a list of mappings required to remove all
            5  +# diacritical marks from a unicode string. Each mapping is itself a list
            6  +# consisting of two elements - the unicode codepoint and the single ASCII
            7  +# character that it should be replaced with, or an empty string if the 
            8  +# codepoint should simply be removed from the input. Examples:
            9  +#
           10  +#   { 224 a  }     (replace codepoint 224 to "a")
           11  +#   { 769 "" }     (remove codepoint 769 from input)
           12  +#
           13  +# Mappings are only returned for non-upper case codepoints. It is assumed
           14  +# that the input has already been folded to lower case.
           15  +#
           16  +proc rd_load_unicodedata_text {zName} {
           17  +  global tl_lookup_table
           18  +
           19  +  set fd [open $zName]
           20  +  set lField {
           21  +    code
           22  +    character_name
           23  +    general_category
           24  +    canonical_combining_classes
           25  +    bidirectional_category
           26  +    character_decomposition_mapping
           27  +    decimal_digit_value
           28  +    digit_value
           29  +    numeric_value
           30  +    mirrored
           31  +    unicode_1_name
           32  +    iso10646_comment_field
           33  +    uppercase_mapping
           34  +    lowercase_mapping
           35  +    titlecase_mapping
           36  +  }
           37  +  set lRet [list]
           38  +
           39  +  while { ![eof $fd] } {
           40  +    set line [gets $fd]
           41  +    if {$line == ""} continue
           42  +
           43  +    set fields [split $line ";"]
           44  +    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
           45  +    foreach $lField $fields {}
           46  +    if { [llength $character_decomposition_mapping]!=2
           47  +      || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
           48  +    } {
           49  +      continue
           50  +    }
           51  +
           52  +    set iCode  [expr "0x$code"]
           53  +    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
           54  +    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]
           55  +
           56  +    if {[info exists tl_lookup_table($iCode)]} continue
           57  +
           58  +    if { ($iAscii >= 97 && $iAscii <= 122)
           59  +      || ($iAscii >= 65 && $iAscii <= 90)
           60  +    } {
           61  +      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
           62  +      set dia($iDia) 1
           63  +    }
           64  +  }
           65  +
           66  +  foreach d [array names dia] {
           67  +    lappend lRet [list $d ""]
           68  +  }
           69  +  set lRet [lsort -integer -index 0 $lRet]
           70  +
           71  +  close $fd
           72  +  set lRet
           73  +}
           74  +
           75  +#-------------------------------------------------------------------------
           76  +# Parameter $zName must be a path to the file UnicodeData.txt. This command
           77  +# reads the file and returns a list of codepoints (integers). The list
           78  +# contains all codepoints in the UnicodeData.txt assigned to any "General
           79  +# Category" that is not a "Letter" or "Number".
           80  +#
           81  +proc an_load_unicodedata_text {zName} {
           82  +  set fd [open $zName]
           83  +  set lField {
           84  +    code
           85  +    character_name
           86  +    general_category
           87  +    canonical_combining_classes
           88  +    bidirectional_category
           89  +    character_decomposition_mapping
           90  +    decimal_digit_value
           91  +    digit_value
           92  +    numeric_value
           93  +    mirrored
           94  +    unicode_1_name
           95  +    iso10646_comment_field
           96  +    uppercase_mapping
           97  +    lowercase_mapping
           98  +    titlecase_mapping
           99  +  }
          100  +  set lRet [list]
          101  +
          102  +  while { ![eof $fd] } {
          103  +    set line [gets $fd]
          104  +    if {$line == ""} continue
          105  +
          106  +    set fields [split $line ";"]
          107  +    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
          108  +    foreach $lField $fields {}
          109  +
          110  +    set iCode [expr "0x$code"]
          111  +    set bAlnum [expr {
          112  +         [lsearch {L N} [string range $general_category 0 0]] >= 0
          113  +      || $general_category=="Co"
          114  +    }]
          115  +
          116  +    if { !$bAlnum } { lappend lRet $iCode }
          117  +  }
          118  +
          119  +  close $fd
          120  +  set lRet
          121  +}
          122  +
          123  +proc tl_load_casefolding_txt {zName} {
          124  +  global tl_lookup_table
          125  +
          126  +  set fd [open $zName]
          127  +  while { ![eof $fd] } {
          128  +    set line [gets $fd]
          129  +    if {[string range $line 0 0] == "#"} continue
          130  +    if {$line == ""} continue
          131  +
          132  +    foreach x {a b c d} {unset -nocomplain $x}
          133  +    foreach {a b c d} [split $line ";"] {}
          134  +
          135  +    set a2 [list]
          136  +    set c2 [list]
          137  +    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
          138  +    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
          139  +    set b [string trim $b]
          140  +    set d [string trim $d]
          141  +
          142  +    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
          143  +  }
          144  +}
          145  +
          146  +

Changes to ext/fts5/fts5Int.h.

   627    627   
   628    628   int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
   629    629   
   630    630   /*
   631    631   ** End of interface to code in fts5_vocab.c.
   632    632   **************************************************************************/
   633    633   
          634  +
          635  +/**************************************************************************
          636  +** Interface to automatically generated code in fts5_unicode2.c. 
          637  +*/
          638  +int sqlite3Fts5UnicodeIsalnum(int c);
          639  +int sqlite3Fts5UnicodeIsdiacritic(int c);
          640  +int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
          641  +/*
          642  +** End of interface to code in fts5_unicode2.c.
          643  +**************************************************************************/
          644  +
   634    645   #endif

Changes to ext/fts5/fts5_expr.c.

  1613   1613   static void fts5ExprFunctionTcl(
  1614   1614     sqlite3_context *pCtx,          /* Function call context */
  1615   1615     int nArg,                       /* Number of args */
  1616   1616     sqlite3_value **apVal           /* Function arguments */
  1617   1617   ){
  1618   1618     fts5ExprFunction(pCtx, nArg, apVal, 1);
  1619   1619   }
         1620  +
         1621  +/*
         1622  +** The implementation of an SQLite user-defined-function that accepts a
         1623  +** single integer as an argument. If the integer is an alpha-numeric 
         1624  +** unicode code point, 1 is returned. Otherwise 0.
         1625  +*/
         1626  +static void fts5ExprIsAlnum(
         1627  +  sqlite3_context *pCtx,          /* Function call context */
         1628  +  int nArg,                       /* Number of args */
         1629  +  sqlite3_value **apVal           /* Function arguments */
         1630  +){
         1631  +  int iCode;
         1632  +  if( nArg!=1 ){
         1633  +    sqlite3_result_error(pCtx, 
         1634  +        "wrong number of arguments to function fts5_isalnum", -1
         1635  +    );
         1636  +    return;
         1637  +  }
         1638  +  iCode = sqlite3_value_int(apVal[0]);
         1639  +  sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode));
         1640  +}
         1641  +
         1642  +static void fts5ExprFold(
         1643  +  sqlite3_context *pCtx,          /* Function call context */
         1644  +  int nArg,                       /* Number of args */
         1645  +  sqlite3_value **apVal           /* Function arguments */
         1646  +){
         1647  +  if( nArg!=1 && nArg!=2 ){
         1648  +    sqlite3_result_error(pCtx, 
         1649  +        "wrong number of arguments to function fts5_fold", -1
         1650  +    );
         1651  +  }else{
         1652  +    int iCode;
         1653  +    int bRemoveDiacritics = 0;
         1654  +    iCode = sqlite3_value_int(apVal[0]);
         1655  +    if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
         1656  +    sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
         1657  +  }
         1658  +}
  1620   1659   
  1621   1660   /*
  1622   1661   ** This is called during initialization to register the fts5_expr() scalar
  1623   1662   ** UDF with the SQLite handle passed as the only argument.
  1624   1663   */
  1625   1664   int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
  1626   1665     struct Fts5ExprFunc {
  1627   1666       const char *z;
  1628   1667       void (*x)(sqlite3_context*,int,sqlite3_value**);
  1629   1668     } aFunc[] = {
  1630         -    { "fts5_expr", fts5ExprFunctionHr },
         1669  +    { "fts5_expr",     fts5ExprFunctionHr },
  1631   1670       { "fts5_expr_tcl", fts5ExprFunctionTcl },
         1671  +    { "fts5_isalnum",  fts5ExprIsAlnum },
         1672  +    { "fts5_fold",     fts5ExprFold },
  1632   1673     };
  1633   1674     int i;
  1634   1675     int rc = SQLITE_OK;
  1635   1676     void *pCtx = (void*)pGlobal;
  1636   1677   
  1637   1678     for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){
  1638   1679       struct Fts5ExprFunc *p = &aFunc[i];

Changes to ext/fts5/fts5_tokenize.c.

   170    170     return rc;
   171    171   }
   172    172   
   173    173   /**************************************************************************
   174    174   ** Start of unicode61 tokenizer implementation.
   175    175   */
   176    176   
   177         -/*
   178         -** Functions in fts5_unicode2.c. 
   179         -*/
   180         -int sqlite3Fts5UnicodeIsalnum(int c);
   181         -int sqlite3Fts5UnicodeIsdiacritic(int c);
   182         -int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
   183         -
   184    177   
   185    178   /*
   186    179   ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
   187    180   ** from the sqlite3 source file utf.c. If this file is compiled as part
   188    181   ** of the amalgamation, they are not required.
   189    182   */
   190    183   #ifndef SQLITE_AMALGAMATION

Changes to ext/fts5/fts5_unicode2.c.

   323    323   
   324    324     assert( c>=0 );
   325    325     assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
   326    326   
   327    327     if( c<128 ){
   328    328       if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
   329    329     }else if( c<65536 ){
          330  +    const struct TableEntry *p;
   330    331       int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   331    332       int iLo = 0;
   332    333       int iRes = -1;
   333    334   
          335  +    assert( c>aEntry[0].iCode );
   334    336       while( iHi>=iLo ){
   335    337         int iTest = (iHi + iLo) / 2;
   336    338         int cmp = (c - aEntry[iTest].iCode);
   337    339         if( cmp>=0 ){
   338    340           iRes = iTest;
   339    341           iLo = iTest+1;
   340    342         }else{
   341    343           iHi = iTest-1;
   342    344         }
   343    345       }
   344         -    assert( iRes<0 || c>=aEntry[iRes].iCode );
   345    346   
   346         -    if( iRes>=0 ){
   347         -      const struct TableEntry *p = &aEntry[iRes];
   348         -      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
   349         -        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
   350         -        assert( ret>0 );
   351         -      }
          347  +    assert( iRes>=0 && c>=aEntry[iRes].iCode );
          348  +    p = &aEntry[iRes];
          349  +    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
          350  +      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
          351  +      assert( ret>0 );
   352    352       }
   353    353   
   354    354       if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
   355    355     }
   356    356     
   357    357     else if( c>=66560 && c<66600 ){
   358    358       ret = c + 40;
   359    359     }
   360    360   
   361    361     return ret;
   362    362   }
   363    363   #endif /* defined(SQLITE_ENABLE_FTS5) */

Added ext/fts5/test/fts5unicode3.test.

            1  +# 2014 Dec 20
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +# Tests focusing on the fts5 tokenizers
           13  +#
           14  +
           15  +proc fts3_unicode_path {file} {
           16  +  file join [file dirname [info script]] .. .. fts3 unicode $file
           17  +}
           18  +
           19  +source [file join [file dirname [info script]] fts5_common.tcl]
           20  +source [fts3_unicode_path parseunicode.tcl]
           21  +set testprefix fts5unicode3
           22  +
           23  +set CF [fts3_unicode_path CaseFolding.txt]
           24  +set UD [fts3_unicode_path UnicodeData.txt]
           25  +
           26  +tl_load_casefolding_txt $CF
           27  +foreach x [an_load_unicodedata_text $UD] {
           28  +  set aNotAlnum($x) 1
           29  +}
           30  +
           31  +foreach {y} [rd_load_unicodedata_text $UD] {
           32  +  foreach {code ascii} $y {}
           33  +  if {$ascii==""} {
           34  +    set int 0
           35  +  } else {
           36  +    binary scan $ascii c int
           37  +  }
           38  +  set aDiacritic($code) $int
           39  +}
           40  +
           41  +proc tcl_fold {i {bRemoveDiacritic 0}} {
           42  +  global tl_lookup_table
           43  +  global aDiacritic
           44  +
           45  +  if {[info exists tl_lookup_table($i)]} {
           46  +    set i $tl_lookup_table($i)
           47  +  }
           48  +  if {$bRemoveDiacritic && [info exists aDiacritic($i)]} {
           49  +    set i $aDiacritic($i)
           50  +  }
           51  +  expr $i
           52  +}
           53  +db func tcl_fold tcl_fold
           54  +
           55  +proc tcl_isalnum {i} {
           56  +  global aNotAlnum
           57  +  expr {![info exists aNotAlnum($i)]}
           58  +}
           59  +db func tcl_isalnum tcl_isalnum
           60  +
           61  +
           62  +do_catchsql_test 1.0.1 {
           63  +  SELECT fts5_isalnum(1, 2, 3);
           64  +} {1 {wrong number of arguments to function fts5_isalnum}}
           65  +do_catchsql_test 1.0.2 {
           66  +  SELECT fts5_fold();
           67  +} {1 {wrong number of arguments to function fts5_fold}}
           68  +do_catchsql_test 1.0.3 {
           69  +  SELECT fts5_fold(1,2,3);
           70  +} {1 {wrong number of arguments to function fts5_fold}}
           71  +
           72  +do_execsql_test 1.1 {
           73  +  WITH ii(i) AS (
           74  +    SELECT -1
           75  +    UNION ALL
           76  +    SELECT i+1 FROM ii WHERE i<100000
           77  +  )
           78  +  SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
           79  +} {0 {}}
           80  +
           81  +do_execsql_test 1.2 {
           82  +  WITH ii(i) AS (
           83  +    SELECT -1
           84  +    UNION ALL
           85  +    SELECT i+1 FROM ii WHERE i<100000
           86  +  )
           87  +  SELECT count(*), min(i) FROM ii 
           88  +  WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
           89  +} {0 {}}
           90  +
           91  +do_execsql_test 1.3 {
           92  +  WITH ii(i) AS (
           93  +    SELECT -1
           94  +    UNION ALL
           95  +    SELECT i+1 FROM ii WHERE i<100000
           96  +  )
           97  +  SELECT count(*), min(i) FROM ii 
           98  +  WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
           99  +} {0 {}}
          100  +
          101  +do_test 1.4 {
          102  +  set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
          103  +  append str {"unicode61 separators '}
          104  +  for {set i 700} {$i<900} {incr i} {
          105  +    append str [format %c $i]
          106  +  }
          107  +  append str {'");}
          108  +  execsql $str
          109  +} {}
          110  +do_test 1.5 {
          111  +  set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
          112  +  append str {"unicode61 tokenchars '}
          113  +  for {set i 700} {$i<900} {incr i} {
          114  +    append str [format %c $i]
          115  +  }
          116  +  append str {'");}
          117  +  execsql $str
          118  +} {}
          119  +
          120  +
          121  +finish_test
          122  +