aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/sqlite/unix/sqlite-3.5.1/src/tokenize.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libraries/sqlite/unix/sqlite-3.5.1/src/tokenize.c508
1 files changed, 508 insertions, 0 deletions
diff --git a/libraries/sqlite/unix/sqlite-3.5.1/src/tokenize.c b/libraries/sqlite/unix/sqlite-3.5.1/src/tokenize.c
new file mode 100644
index 0000000..b4a9b9b
--- /dev/null
+++ b/libraries/sqlite/unix/sqlite-3.5.1/src/tokenize.c
@@ -0,0 +1,508 @@
1/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** An tokenizer for SQL
13**
14** This file contains C code that splits an SQL input string up into
15** individual tokens and sends those tokens one-by-one over to the
16** parser for analysis.
17**
18** $Id: tokenize.c,v 1.136 2007/08/27 23:26:59 drh Exp $
19*/
20#include "sqliteInt.h"
21#include <ctype.h>
22#include <stdlib.h>
23
24/*
25** The charMap() macro maps alphabetic characters into their
26** lower-case ASCII equivalent. On ASCII machines, this is just
27** an upper-to-lower case map. On EBCDIC machines we also need
28** to adjust the encoding. Only alphabetic characters and underscores
29** need to be translated.
30*/
31#ifdef SQLITE_ASCII
32# define charMap(X) sqlite3UpperToLower[(unsigned char)X]
33#endif
34#ifdef SQLITE_EBCDIC
35# define charMap(X) ebcdicToAscii[(unsigned char)X]
36const unsigned char ebcdicToAscii[] = {
37/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */
46 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */
47 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */
48 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
50 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */
51 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */
52 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */
54};
55#endif
56
57/*
58** The sqlite3KeywordCode function looks up an identifier to determine if
59** it is a keyword. If it is a keyword, the token code of that keyword is
60** returned. If the input is not a keyword, TK_ID is returned.
61**
62** The implementation of this routine was generated by a program,
63** mkkeywordhash.h, located in the tool subdirectory of the distribution.
64** The output of the mkkeywordhash.c program is written into a file
65** named keywordhash.h and then included into this source file by
66** the #include below.
67*/
68#include "keywordhash.h"
69
70
71/*
72** If X is a character that can be used in an identifier then
73** IdChar(X) will be true. Otherwise it is false.
74**
75** For ASCII, any character with the high-order bit set is
76** allowed in an identifier. For 7-bit characters,
77** sqlite3IsIdChar[X] must be 1.
78**
79** For EBCDIC, the rules are more complex but have the same
80** end result.
81**
82** Ticket #1066. the SQL standard does not allow '$' in the
83** middle of identfiers. But many SQL implementations do.
84** SQLite will allow '$' in identifiers for compatibility.
85** But the feature is undocumented.
86*/
87#ifdef SQLITE_ASCII
88const char sqlite3IsAsciiIdChar[] = {
89/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
90 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
92 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
94 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
96};
97#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20]))
98#endif
99#ifdef SQLITE_EBCDIC
100const char sqlite3IsEbcdicIdChar[] = {
101/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
102 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */
103 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */
104 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */
105 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */
106 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */
107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */
108 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
110 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */
111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */
112 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */
114};
115#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
116#endif
117
118
119/*
120** Return the length of the token that begins at z[0].
121** Store the token type in *tokenType before returning.
122*/
123static int getToken(const unsigned char *z, int *tokenType){
124 int i, c;
125 switch( *z ){
126 case ' ': case '\t': case '\n': case '\f': case '\r': {
127 for(i=1; isspace(z[i]); i++){}
128 *tokenType = TK_SPACE;
129 return i;
130 }
131 case '-': {
132 if( z[1]=='-' ){
133 for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
134 *tokenType = TK_COMMENT;
135 return i;
136 }
137 *tokenType = TK_MINUS;
138 return 1;
139 }
140 case '(': {
141 *tokenType = TK_LP;
142 return 1;
143 }
144 case ')': {
145 *tokenType = TK_RP;
146 return 1;
147 }
148 case ';': {
149 *tokenType = TK_SEMI;
150 return 1;
151 }
152 case '+': {
153 *tokenType = TK_PLUS;
154 return 1;
155 }
156 case '*': {
157 *tokenType = TK_STAR;
158 return 1;
159 }
160 case '/': {
161 if( z[1]!='*' || z[2]==0 ){
162 *tokenType = TK_SLASH;
163 return 1;
164 }
165 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
166 if( c ) i++;
167 *tokenType = TK_COMMENT;
168 return i;
169 }
170 case '%': {
171 *tokenType = TK_REM;
172 return 1;
173 }
174 case '=': {
175 *tokenType = TK_EQ;
176 return 1 + (z[1]=='=');
177 }
178 case '<': {
179 if( (c=z[1])=='=' ){
180 *tokenType = TK_LE;
181 return 2;
182 }else if( c=='>' ){
183 *tokenType = TK_NE;
184 return 2;
185 }else if( c=='<' ){
186 *tokenType = TK_LSHIFT;
187 return 2;
188 }else{
189 *tokenType = TK_LT;
190 return 1;
191 }
192 }
193 case '>': {
194 if( (c=z[1])=='=' ){
195 *tokenType = TK_GE;
196 return 2;
197 }else if( c=='>' ){
198 *tokenType = TK_RSHIFT;
199 return 2;
200 }else{
201 *tokenType = TK_GT;
202 return 1;
203 }
204 }
205 case '!': {
206 if( z[1]!='=' ){
207 *tokenType = TK_ILLEGAL;
208 return 2;
209 }else{
210 *tokenType = TK_NE;
211 return 2;
212 }
213 }
214 case '|': {
215 if( z[1]!='|' ){
216 *tokenType = TK_BITOR;
217 return 1;
218 }else{
219 *tokenType = TK_CONCAT;
220 return 2;
221 }
222 }
223 case ',': {
224 *tokenType = TK_COMMA;
225 return 1;
226 }
227 case '&': {
228 *tokenType = TK_BITAND;
229 return 1;
230 }
231 case '~': {
232 *tokenType = TK_BITNOT;
233 return 1;
234 }
235 case '`':
236 case '\'':
237 case '"': {
238 int delim = z[0];
239 for(i=1; (c=z[i])!=0; i++){
240 if( c==delim ){
241 if( z[i+1]==delim ){
242 i++;
243 }else{
244 break;
245 }
246 }
247 }
248 if( c ){
249 *tokenType = TK_STRING;
250 return i+1;
251 }else{
252 *tokenType = TK_ILLEGAL;
253 return i;
254 }
255 }
256 case '.': {
257#ifndef SQLITE_OMIT_FLOATING_POINT
258 if( !isdigit(z[1]) )
259#endif
260 {
261 *tokenType = TK_DOT;
262 return 1;
263 }
264 /* If the next character is a digit, this is a floating point
265 ** number that begins with ".". Fall thru into the next case */
266 }
267 case '0': case '1': case '2': case '3': case '4':
268 case '5': case '6': case '7': case '8': case '9': {
269 *tokenType = TK_INTEGER;
270 for(i=0; isdigit(z[i]); i++){}
271#ifndef SQLITE_OMIT_FLOATING_POINT
272 if( z[i]=='.' ){
273 i++;
274 while( isdigit(z[i]) ){ i++; }
275 *tokenType = TK_FLOAT;
276 }
277 if( (z[i]=='e' || z[i]=='E') &&
278 ( isdigit(z[i+1])
279 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
280 )
281 ){
282 i += 2;
283 while( isdigit(z[i]) ){ i++; }
284 *tokenType = TK_FLOAT;
285 }
286#endif
287 while( IdChar(z[i]) ){
288 *tokenType = TK_ILLEGAL;
289 i++;
290 }
291 return i;
292 }
293 case '[': {
294 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
295 *tokenType = TK_ID;
296 return i;
297 }
298 case '?': {
299 *tokenType = TK_VARIABLE;
300 for(i=1; isdigit(z[i]); i++){}
301 return i;
302 }
303 case '#': {
304 for(i=1; isdigit(z[i]); i++){}
305 if( i>1 ){
306 /* Parameters of the form #NNN (where NNN is a number) are used
307 ** internally by sqlite3NestedParse. */
308 *tokenType = TK_REGISTER;
309 return i;
310 }
311 /* Fall through into the next case if the '#' is not followed by
312 ** a digit. Try to match #AAAA where AAAA is a parameter name. */
313 }
314#ifndef SQLITE_OMIT_TCL_VARIABLE
315 case '$':
316#endif
317 case '@': /* For compatibility with MS SQL Server */
318 case ':': {
319 int n = 0;
320 *tokenType = TK_VARIABLE;
321 for(i=1; (c=z[i])!=0; i++){
322 if( IdChar(c) ){
323 n++;
324#ifndef SQLITE_OMIT_TCL_VARIABLE
325 }else if( c=='(' && n>0 ){
326 do{
327 i++;
328 }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
329 if( c==')' ){
330 i++;
331 }else{
332 *tokenType = TK_ILLEGAL;
333 }
334 break;
335 }else if( c==':' && z[i+1]==':' ){
336 i++;
337#endif
338 }else{
339 break;
340 }
341 }
342 if( n==0 ) *tokenType = TK_ILLEGAL;
343 return i;
344 }
345#ifndef SQLITE_OMIT_BLOB_LITERAL
346 case 'x': case 'X': {
347 if( (c=z[1])=='\'' || c=='"' ){
348 int delim = c;
349 *tokenType = TK_BLOB;
350 for(i=2; (c=z[i])!=0; i++){
351 if( c==delim ){
352 if( i%2 ) *tokenType = TK_ILLEGAL;
353 break;
354 }
355 if( !isxdigit(c) ){
356 *tokenType = TK_ILLEGAL;
357 return i;
358 }
359 }
360 if( c ) i++;
361 return i;
362 }
363 /* Otherwise fall through to the next case */
364 }
365#endif
366 default: {
367 if( !IdChar(*z) ){
368 break;
369 }
370 for(i=1; IdChar(z[i]); i++){}
371 *tokenType = keywordCode((char*)z, i);
372 return i;
373 }
374 }
375 *tokenType = TK_ILLEGAL;
376 return 1;
377}
378int sqlite3GetToken(const unsigned char *z, int *tokenType){
379 return getToken(z, tokenType);
380}
381
382/*
383** Run the parser on the given SQL string. The parser structure is
384** passed in. An SQLITE_ status code is returned. If an error occurs
385** and pzErrMsg!=NULL then an error message might be written into
386** memory obtained from sqlite3_malloc() and *pzErrMsg made to point to that
387** error message. Or maybe not.
388*/
389int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
390 int nErr = 0;
391 int i;
392 void *pEngine;
393 int tokenType;
394 int lastTokenParsed = -1;
395 sqlite3 *db = pParse->db;
396
397 if( db->activeVdbeCnt==0 ){
398 db->u1.isInterrupted = 0;
399 }
400 pParse->rc = SQLITE_OK;
401 i = 0;
402 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3_malloc);
403 if( pEngine==0 ){
404 db->mallocFailed = 1;
405 return SQLITE_NOMEM;
406 }
407 assert( pParse->sLastToken.dyn==0 );
408 assert( pParse->pNewTable==0 );
409 assert( pParse->pNewTrigger==0 );
410 assert( pParse->nVar==0 );
411 assert( pParse->nVarExpr==0 );
412 assert( pParse->nVarExprAlloc==0 );
413 assert( pParse->apVarExpr==0 );
414 pParse->zTail = pParse->zSql = zSql;
415 while( !db->mallocFailed && zSql[i]!=0 ){
416 assert( i>=0 );
417 pParse->sLastToken.z = (u8*)&zSql[i];
418 assert( pParse->sLastToken.dyn==0 );
419 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
420 i += pParse->sLastToken.n;
421 if( i>SQLITE_MAX_SQL_LENGTH ){
422 pParse->rc = SQLITE_TOOBIG;
423 break;
424 }
425 switch( tokenType ){
426 case TK_SPACE:
427 case TK_COMMENT: {
428 if( db->u1.isInterrupted ){
429 pParse->rc = SQLITE_INTERRUPT;
430 sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
431 goto abort_parse;
432 }
433 break;
434 }
435 case TK_ILLEGAL: {
436 if( pzErrMsg ){
437 sqlite3_free(*pzErrMsg);
438 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
439 &pParse->sLastToken);
440 }
441 nErr++;
442 goto abort_parse;
443 }
444 case TK_SEMI: {
445 pParse->zTail = &zSql[i];
446 /* Fall thru into the default case */
447 }
448 default: {
449 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
450 lastTokenParsed = tokenType;
451 if( pParse->rc!=SQLITE_OK ){
452 goto abort_parse;
453 }
454 break;
455 }
456 }
457 }
458abort_parse:
459 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
460 if( lastTokenParsed!=TK_SEMI ){
461 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
462 pParse->zTail = &zSql[i];
463 }
464 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
465 }
466 sqlite3ParserFree(pEngine, sqlite3_free);
467 if( db->mallocFailed ){
468 pParse->rc = SQLITE_NOMEM;
469 }
470 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
471 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
472 }
473 if( pParse->zErrMsg ){
474 if( pzErrMsg && *pzErrMsg==0 ){
475 *pzErrMsg = pParse->zErrMsg;
476 }else{
477 sqlite3_free(pParse->zErrMsg);
478 }
479 pParse->zErrMsg = 0;
480 if( !nErr ) nErr++;
481 }
482 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
483 sqlite3VdbeDelete(pParse->pVdbe);
484 pParse->pVdbe = 0;
485 }
486#ifndef SQLITE_OMIT_SHARED_CACHE
487 if( pParse->nested==0 ){
488 sqlite3_free(pParse->aTableLock);
489 pParse->aTableLock = 0;
490 pParse->nTableLock = 0;
491 }
492#endif
493
494 if( !IN_DECLARE_VTAB ){
495 /* If the pParse->declareVtab flag is set, do not delete any table
496 ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
497 ** will take responsibility for freeing the Table structure.
498 */
499 sqlite3DeleteTable(pParse->pNewTable);
500 }
501
502 sqlite3DeleteTrigger(pParse->pNewTrigger);
503 sqlite3_free(pParse->apVarExpr);
504 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
505 pParse->rc = SQLITE_ERROR;
506 }
507 return nErr;
508}