diff options
Diffstat (limited to 'libraries/sqlite/unix/sqlite-3.5.1/test/fts3atoken.test')
-rw-r--r-- | libraries/sqlite/unix/sqlite-3.5.1/test/fts3atoken.test | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/libraries/sqlite/unix/sqlite-3.5.1/test/fts3atoken.test b/libraries/sqlite/unix/sqlite-3.5.1/test/fts3atoken.test new file mode 100644 index 0000000..cf9574e --- /dev/null +++ b/libraries/sqlite/unix/sqlite-3.5.1/test/fts3atoken.test | |||
@@ -0,0 +1,174 @@ | |||
1 | # 2007 June 21 | ||
2 | # | ||
3 | # The author disclaims copyright to this source code. In place of | ||
4 | # a legal notice, here is a blessing: | ||
5 | # | ||
6 | # May you do good and not evil. | ||
7 | # May you find forgiveness for yourself and forgive others. | ||
8 | # May you share freely, never taking more than you give. | ||
9 | # | ||
10 | #************************************************************************* | ||
11 | # This file implements regression tests for SQLite library. The focus | ||
12 | # of this script is testing the pluggable tokeniser feature of the | ||
13 | # FTS3 module. | ||
14 | # | ||
15 | # $Id: fts3atoken.test,v 1.1 2007/08/20 17:38:42 shess Exp $ | ||
16 | # | ||
17 | |||
18 | set testdir [file dirname $argv0] | ||
19 | source $testdir/tester.tcl | ||
20 | |||
21 | # If SQLITE_ENABLE_FTS3 is defined, omit this file. | ||
22 | ifcapable !fts3 { | ||
23 | finish_test | ||
24 | return | ||
25 | } | ||
26 | |||
27 | proc escape_string {str} { | ||
28 | set out "" | ||
29 | foreach char [split $str ""] { | ||
30 | scan $char %c i | ||
31 | if {$i<=127} { | ||
32 | append out $char | ||
33 | } else { | ||
34 | append out [format {\x%.4x} $i] | ||
35 | } | ||
36 | } | ||
37 | set out | ||
38 | } | ||
39 | |||
40 | #-------------------------------------------------------------------------- | ||
41 | # Test cases fts3token-1.* are the warm-body test for the SQL scalar | ||
42 | # function fts3_tokenizer(). The procedure is as follows: | ||
43 | # | ||
44 | # 1: Verify that there is no such fts3 tokenizer as 'blah'. | ||
45 | # | ||
46 | # 2: Query for the built-in tokenizer 'simple'. Insert a copy of the | ||
47 | # retrieved value as tokenizer 'blah'. | ||
48 | # | ||
49 | # 3: Test that the value returned for tokenizer 'blah' is now the | ||
50 | # same as that retrieved for 'simple'. | ||
51 | # | ||
52 | # 4: Test that it is now possible to create an fts3 table using | ||
53 | # tokenizer 'blah' (it was not possible in step 1). | ||
54 | # | ||
55 | # 5: Test that the table created to use tokenizer 'blah' is usable. | ||
56 | # | ||
57 | do_test fts3token-1.1 { | ||
58 | catchsql { | ||
59 | CREATE VIRTUAL TABLE t1 USING fts3(content, tokenize blah); | ||
60 | } | ||
61 | } {1 {unknown tokenizer: blah}} | ||
62 | do_test fts3token-1.2 { | ||
63 | execsql { | ||
64 | SELECT fts3_tokenizer('blah', fts3_tokenizer('simple')) IS NULL; | ||
65 | } | ||
66 | } {0} | ||
67 | do_test fts3token-1.3 { | ||
68 | execsql { | ||
69 | SELECT fts3_tokenizer('blah') == fts3_tokenizer('simple'); | ||
70 | } | ||
71 | } {1} | ||
72 | do_test fts3token-1.4 { | ||
73 | catchsql { | ||
74 | CREATE VIRTUAL TABLE t1 USING fts3(content, tokenize blah); | ||
75 | } | ||
76 | } {0 {}} | ||
77 | do_test fts3token-1.5 { | ||
78 | execsql { | ||
79 | INSERT INTO t1(content) VALUES('There was movement at the station'); | ||
80 | INSERT INTO t1(content) VALUES('For the word has passed around'); | ||
81 | INSERT INTO t1(content) VALUES('That the colt from ol regret had got away'); | ||
82 | SELECT content FROM t1 WHERE content MATCH 'movement' | ||
83 | } | ||
84 | } {{There was movement at the station}} | ||
85 | |||
86 | #-------------------------------------------------------------------------- | ||
87 | # Test cases fts3token-2.* test error cases in the scalar function based | ||
88 | # API for getting and setting tokenizers. | ||
89 | # | ||
90 | do_test fts3token-2.1 { | ||
91 | catchsql { | ||
92 | SELECT fts3_tokenizer('nosuchtokenizer'); | ||
93 | } | ||
94 | } {1 {unknown tokenizer: nosuchtokenizer}} | ||
95 | |||
96 | #-------------------------------------------------------------------------- | ||
97 | # Test cases fts3token-3.* test the three built-in tokenizers with a | ||
98 | # simple input string via the built-in test function. This is as much | ||
99 | # to test the test function as the tokenizer implementations. | ||
100 | # | ||
101 | do_test fts3token-3.1 { | ||
102 | execsql { | ||
103 | SELECT fts3_tokenizer_test('simple', 'I don''t see how'); | ||
104 | } | ||
105 | } {{0 i I 1 don don 2 t t 3 see see 4 how how}} | ||
106 | do_test fts3token-3.2 { | ||
107 | execsql { | ||
108 | SELECT fts3_tokenizer_test('porter', 'I don''t see how'); | ||
109 | } | ||
110 | } {{0 i I 1 don don 2 t t 3 see see 4 how how}} | ||
111 | ifcapable icu { | ||
112 | do_test fts3token-3.3 { | ||
113 | execsql { | ||
114 | SELECT fts3_tokenizer_test('icu', 'I don''t see how'); | ||
115 | } | ||
116 | } {{0 i I 1 don't don't 2 see see 3 how how}} | ||
117 | } | ||
118 | |||
119 | #-------------------------------------------------------------------------- | ||
120 | # Test cases fts3token-4.* test the ICU tokenizer. In practice, this | ||
121 | # tokenizer only has two modes - "thai" and "everybody else". Some other | ||
122 | # Asian languages (Lao, Khmer etc.) require the same special treatment as | ||
123 | # Thai, but ICU doesn't support them yet. | ||
124 | # | ||
125 | ifcapable icu { | ||
126 | |||
127 | proc do_icu_test {name locale input output} { | ||
128 | set ::out [db eval { SELECT fts3_tokenizer_test('icu', $locale, $input) }] | ||
129 | do_test $name { | ||
130 | lindex $::out 0 | ||
131 | } $output | ||
132 | } | ||
133 | |||
134 | do_icu_test fts3token-4.1 en_US {} {} | ||
135 | do_icu_test fts3token-4.2 en_US {Test cases fts3} [list \ | ||
136 | 0 test Test 1 cases cases 2 fts3 fts3 | ||
137 | ] | ||
138 | |||
139 | # The following test shows that ICU is smart enough to recognise | ||
140 | # Thai chararacters, even when the locale is set to English/United | ||
141 | # States. | ||
142 | # | ||
143 | set input "\u0e2d\u0e30\u0e44\u0e23\u0e19\u0e30\u0e04\u0e23\u0e31\u0e1a" | ||
144 | set output "0 \u0e2d\u0e30\u0e44\u0e23 \u0e2d\u0e30\u0e44\u0e23 " | ||
145 | append output "1 \u0e19\u0e30 \u0e19\u0e30 " | ||
146 | append output "2 \u0e04\u0e23\u0e31\u0e1a \u0e04\u0e23\u0e31\u0e1a" | ||
147 | |||
148 | do_icu_test fts3token-4.3 th_TH $input $output | ||
149 | do_icu_test fts3token-4.4 en_US $input $output | ||
150 | |||
151 | # ICU handles an unknown locale by falling back to the default. | ||
152 | # So this is not an error. | ||
153 | do_icu_test fts3token-4.5 MiddleOfTheOcean $input $output | ||
154 | |||
155 | set longtoken "AReallyReallyLongTokenOneThatWillSurelyRequire" | ||
156 | append longtoken "AReallocInTheIcuTokenizerCode" | ||
157 | |||
158 | set input "short tokens then " | ||
159 | append input $longtoken | ||
160 | set output "0 short short " | ||
161 | append output "1 tokens tokens " | ||
162 | append output "2 then then " | ||
163 | append output "3 [string tolower $longtoken] $longtoken" | ||
164 | |||
165 | do_icu_test fts3token-4.6 MiddleOfTheOcean $input $output | ||
166 | do_icu_test fts3token-4.7 th_TH $input $output | ||
167 | do_icu_test fts3token-4.8 en_US $input $output | ||
168 | } | ||
169 | |||
170 | do_test fts3token-internal { | ||
171 | execsql { SELECT fts3_tokenizer_internal_test() } | ||
172 | } {ok} | ||
173 | |||
174 | finish_test | ||