Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improve test coverage of fts5_tokenize.c. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
0e91a6a520f040b8902da6a1a4d9107d |
User & Date: | dan 2015-05-20 09:27:51.629 |
Context
2015-05-22
| ||
06:08 | Improve test coverage of fts5_unicode2.c. (check-in: fea8a4db9d user: dan tags: fts5) | |
2015-05-20
| ||
09:27 | Improve test coverage of fts5_tokenize.c. (check-in: 0e91a6a520 user: dan tags: fts5) | |
2015-05-19
| ||
19:37 | Add tests for fts5 tokenizers. (check-in: 4f90ba20e2 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
662 663 664 665 666 667 668 | } return ((mask & 0x0007)==0x0005); } } /* porter rule condition: (m > 1 and (*S or *T)) */ static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ | | | | 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 | } return ((mask & 0x0007)==0x0005); } } /* porter rule condition: (m > 1 and (*S or *T)) */ static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ assert( nStem>0 ); return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') && fts5Porter_MGt1(zStem, nStem); } /* porter rule condition: (*v*) */ static int fts5Porter_Vowel(char *zStem, int nStem){ int i; for(i=0; i<nStem; i++){ |
︙ | ︙ | |||
1163 1164 1165 1166 1167 1168 1169 | /* Steps 2 through 4. */ fts5PorterStep2(aBuf, &nBuf); fts5PorterStep3(aBuf, &nBuf); fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ | > | | 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 | /* Steps 2 through 4. */ fts5PorterStep2(aBuf, &nBuf); fts5PorterStep3(aBuf, &nBuf); fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ assert( nBuf>0 ); if( aBuf[nBuf-1]=='e' ){ if( fts5Porter_MGt1(aBuf, nBuf-1) || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) ){ nBuf--; } } |
︙ | ︙ |
Added ext/fts5/test/fts5porter2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 porter stemmer implementation. # # These are extra tests added to those in fts5porter.test in order to # improve test coverage of the porter stemmer implementation. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter2 set test_vocab { tion tion ation ation vation vation avation avat vion vion ion ion relational relat relation relat relate relat zzz zzz ii ii iiing ii xtional xtional xenci xenci xlogi xlogi realization realiz realize realiz xization xizat capitalism capit talism talism xiveness xive xfulness xful xousness xous xical xical xicate xicat xicity xiciti ies ie eed e eing e s s } set i 0 foreach {in out} $test_vocab { do_test "1.$i.($in -> $out)" { lindex [sqlite3_fts5_tokenize db porter $in] 0 } $out incr i } finish_test |
Changes to ext/fts5/test/fts5tokenizer.test.
︙ | ︙ | |||
205 206 207 208 209 210 211 212 213 214 | INSERT INTO e5 VALUES($c || ' ' || $a); } do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | INSERT INTO e5 VALUES($c || ' ' || $a); } do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } #------------------------------------------------------------------------- # Test the 'separators' option with the unicode61 tokenizer. # do_execsql_test 8.1 { BEGIN; CREATE VIRTUAL TABLE e6 USING fts5(x, tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); SELECT term FROM e7; ROLLBACK; } { brown dog fox jumped lazy over quick the } do_execsql_test 8.2 [subst { BEGIN; CREATE VIRTUAL TABLE e6 USING fts5(x, tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'" ); INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog' ); INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09'); CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); SELECT term FROM e7; ROLLBACK; }] [subst { brown dog fox jumped lazy over quick the \u0E08 \u0E09 }] finish_test |
Changes to ext/fts5/test/fts5unicode2.test.
︙ | ︙ | |||
65 66 67 68 69 70 71 72 73 74 75 76 77 78 | # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" #------------------------------------------------------------------------- # set docs [list { Enhance the INSERT syntax to allow multiple rows to be inserted via the VALUES clause. } { | > > > > > > | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \ "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3" do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \ "abc abc def def" #------------------------------------------------------------------------- # set docs [list { Enhance the INSERT syntax to allow multiple rows to be inserted via the VALUES clause. } { |
︙ | ︙ | |||
221 222 223 224 225 226 227 228 229 230 231 232 233 234 | execsql { CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.2 { set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] | > > > > | 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | execsql { CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } execsql "CREATE VIRTUAL TABLE t8 USING fts5( a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\" )" } {} do_test 4.2 { set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] |
︙ | ︙ | |||
248 249 250 251 252 253 254 255 256 257 258 259 260 261 | execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} #------------------------------------------------------------------------- breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int | > > > > > > > > > | 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.4 { sqlite3_exec_hex db { CREATE VIRTUAL TABLE t9 USING fts5(a, b, tokenize="unicode61 separators '%C09004'" ); INSERT INTO t9(a) VALUES('abc%88def %89ghi%90'); } } {0 {}} #------------------------------------------------------------------------- breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int |
︙ | ︙ |