aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/sqlite/win32/fts1.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/sqlite/win32/fts1.c')
-rwxr-xr-xlibraries/sqlite/win32/fts1.c3344
1 files changed, 3344 insertions, 0 deletions
diff --git a/libraries/sqlite/win32/fts1.c b/libraries/sqlite/win32/fts1.c
new file mode 100755
index 0000000..5a69965
--- /dev/null
+++ b/libraries/sqlite/win32/fts1.c
@@ -0,0 +1,3344 @@
1/* fts1 has a design flaw which can lead to database corruption (see
2** below). It is recommended not to use it any longer, instead use
3** fts3 (or higher). If you believe that your use of fts1 is safe,
4** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
5*/
6#ifndef SQLITE_ENABLE_BROKEN_FTS1
7#error fts1 has a design flaw and has been deprecated.
8#endif
9/* The flaw is that fts1 uses the content table's unaliased rowid as
10** the unique docid. fts1 embeds the rowid in the index it builds,
11** and expects the rowid to not change. The SQLite VACUUM operation
12** will renumber such rowids, thereby breaking fts1. If you are using
13** fts1 in a system which has disabled VACUUM, then you can continue
14** to use it safely. Note that PRAGMA auto_vacuum does NOT disable
15** VACUUM, though systems using auto_vacuum are unlikely to invoke
16** VACUUM.
17**
18** fts1 should be safe even across VACUUM if you only insert documents
19** and never delete.
20*/
21
22/* The author disclaims copyright to this source code.
23 *
24 * This is an SQLite module implementing full-text search.
25 */
26
27/*
28** The code in this file is only compiled if:
29**
30** * The FTS1 module is being built as an extension
31** (in which case SQLITE_CORE is not defined), or
32**
33** * The FTS1 module is being built into the core of
34** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
35*/
36#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
37
38#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
39# define SQLITE_CORE 1
40#endif
41
42#include <assert.h>
43#include <stdlib.h>
44#include <stdio.h>
45#include <string.h>
46#include <ctype.h>
47
48#include "fts1.h"
49#include "fts1_hash.h"
50#include "fts1_tokenizer.h"
51#include "sqlite3.h"
52#include "sqlite3ext.h"
53SQLITE_EXTENSION_INIT1
54
55
56#if 0
57# define TRACE(A) printf A; fflush(stdout)
58#else
59# define TRACE(A)
60#endif
61
62/* utility functions */
63
64typedef struct StringBuffer {
65 int len; /* length, not including null terminator */
66 int alloced; /* Space allocated for s[] */
67 char *s; /* Content of the string */
68} StringBuffer;
69
70static void initStringBuffer(StringBuffer *sb){
71 sb->len = 0;
72 sb->alloced = 100;
73 sb->s = malloc(100);
74 sb->s[0] = '\0';
75}
76
77static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
78 if( sb->len + nFrom >= sb->alloced ){
79 sb->alloced = sb->len + nFrom + 100;
80 sb->s = realloc(sb->s, sb->alloced+1);
81 if( sb->s==0 ){
82 initStringBuffer(sb);
83 return;
84 }
85 }
86 memcpy(sb->s + sb->len, zFrom, nFrom);
87 sb->len += nFrom;
88 sb->s[sb->len] = 0;
89}
90static void append(StringBuffer *sb, const char *zFrom){
91 nappend(sb, zFrom, strlen(zFrom));
92}
93
94/* We encode variable-length integers in little-endian order using seven bits
95 * per byte as follows:
96**
97** KEY:
98** A = 0xxxxxxx 7 bits of data and one flag bit
99** B = 1xxxxxxx 7 bits of data and one flag bit
100**
101** 7 bits - A
102** 14 bits - BA
103** 21 bits - BBA
104** and so on.
105*/
106
107/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
108#define VARINT_MAX 10
109
110/* Write a 64-bit variable-length integer to memory starting at p[0].
111 * The length of data written will be between 1 and VARINT_MAX bytes.
112 * The number of bytes written is returned. */
113static int putVarint(char *p, sqlite_int64 v){
114 unsigned char *q = (unsigned char *) p;
115 sqlite_uint64 vu = v;
116 do{
117 *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
118 vu >>= 7;
119 }while( vu!=0 );
120 q[-1] &= 0x7f; /* turn off high bit in final byte */
121 assert( q - (unsigned char *)p <= VARINT_MAX );
122 return (int) (q - (unsigned char *)p);
123}
124
125/* Read a 64-bit variable-length integer from memory starting at p[0].
126 * Return the number of bytes read, or 0 on error.
127 * The value is stored in *v. */
128static int getVarint(const char *p, sqlite_int64 *v){
129 const unsigned char *q = (const unsigned char *) p;
130 sqlite_uint64 x = 0, y = 1;
131 while( (*q & 0x80) == 0x80 ){
132 x += y * (*q++ & 0x7f);
133 y <<= 7;
134 if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */
135 assert( 0 );
136 return 0;
137 }
138 }
139 x += y * (*q++);
140 *v = (sqlite_int64) x;
141 return (int) (q - (unsigned char *)p);
142}
143
144static int getVarint32(const char *p, int *pi){
145 sqlite_int64 i;
146 int ret = getVarint(p, &i);
147 *pi = (int) i;
148 assert( *pi==i );
149 return ret;
150}
151
152/*** Document lists ***
153 *
154 * A document list holds a sorted list of varint-encoded document IDs.
155 *
156 * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
157 *
158 * array {
159 * varint docid;
160 * array {
161 * varint position; (delta from previous position plus POS_BASE)
162 * varint startOffset; (delta from previous startOffset)
163 * varint endOffset; (delta from startOffset)
164 * }
165 * }
166 *
167 * Here, array { X } means zero or more occurrences of X, adjacent in memory.
168 *
169 * A position list may hold positions for text in multiple columns. A position
170 * POS_COLUMN is followed by a varint containing the index of the column for
171 * following positions in the list. Any positions appearing before any
172 * occurrences of POS_COLUMN are for column 0.
173 *
174 * A doclist with type DL_POSITIONS is like the above, but holds only docids
175 * and positions without offset information.
176 *
177 * A doclist with type DL_DOCIDS is like the above, but holds only docids
178 * without positions or offset information.
179 *
180 * On disk, every document list has positions and offsets, so we don't bother
181 * to serialize a doclist's type.
182 *
183 * We don't yet delta-encode document IDs; doing so will probably be a
184 * modest win.
185 *
186 * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
187 * After the first offset, estimate the next offset by using the
188 * current token position and the previous token position and offset,
189 * offset to handle some variance. So the estimate would be
190 * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
191 * as normal. Offsets more than 64 chars from the estimate are
192 * encoded as the delta to the previous start offset + 128. An
193 * additional tiny increment can be gained by using the end offset of
194 * the previous token to make the estimate a tiny bit more precise.
195*/
196
197/* It is not safe to call isspace(), tolower(), or isalnum() on
198** hi-bit-set characters. This is the same solution used in the
199** tokenizer.
200*/
201/* TODO(shess) The snippet-generation code should be using the
202** tokenizer-generated tokens rather than doing its own local
203** tokenization.
204*/
205/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
206static int safe_isspace(char c){
207 return (c&0x80)==0 ? isspace(c) : 0;
208}
209static int safe_tolower(char c){
210 return (c&0x80)==0 ? tolower(c) : c;
211}
212static int safe_isalnum(char c){
213 return (c&0x80)==0 ? isalnum(c) : 0;
214}
215
216typedef enum DocListType {
217 DL_DOCIDS, /* docids only */
218 DL_POSITIONS, /* docids + positions */
219 DL_POSITIONS_OFFSETS /* docids + positions + offsets */
220} DocListType;
221
222/*
223** By default, only positions and not offsets are stored in the doclists.
224** To change this so that offsets are stored too, compile with
225**
226** -DDL_DEFAULT=DL_POSITIONS_OFFSETS
227**
228*/
229#ifndef DL_DEFAULT
230# define DL_DEFAULT DL_POSITIONS
231#endif
232
233typedef struct DocList {
234 char *pData;
235 int nData;
236 DocListType iType;
237 int iLastColumn; /* the last column written */
238 int iLastPos; /* the last position written */
239 int iLastOffset; /* the last start offset written */
240} DocList;
241
242enum {
243 POS_END = 0, /* end of this position list */
244 POS_COLUMN, /* followed by new column number */
245 POS_BASE
246};
247
248/* Initialize a new DocList to hold the given data. */
249static void docListInit(DocList *d, DocListType iType,
250 const char *pData, int nData){
251 d->nData = nData;
252 if( nData>0 ){
253 d->pData = malloc(nData);
254 memcpy(d->pData, pData, nData);
255 } else {
256 d->pData = NULL;
257 }
258 d->iType = iType;
259 d->iLastColumn = 0;
260 d->iLastPos = d->iLastOffset = 0;
261}
262
263/* Create a new dynamically-allocated DocList. */
264static DocList *docListNew(DocListType iType){
265 DocList *d = (DocList *) malloc(sizeof(DocList));
266 docListInit(d, iType, 0, 0);
267 return d;
268}
269
270static void docListDestroy(DocList *d){
271 free(d->pData);
272#ifndef NDEBUG
273 memset(d, 0x55, sizeof(*d));
274#endif
275}
276
277static void docListDelete(DocList *d){
278 docListDestroy(d);
279 free(d);
280}
281
282static char *docListEnd(DocList *d){
283 return d->pData + d->nData;
284}
285
286/* Append a varint to a DocList's data. */
287static void appendVarint(DocList *d, sqlite_int64 i){
288 char c[VARINT_MAX];
289 int n = putVarint(c, i);
290 d->pData = realloc(d->pData, d->nData + n);
291 memcpy(d->pData + d->nData, c, n);
292 d->nData += n;
293}
294
295static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
296 appendVarint(d, iDocid);
297 if( d->iType>=DL_POSITIONS ){
298 appendVarint(d, POS_END); /* initially empty position list */
299 d->iLastColumn = 0;
300 d->iLastPos = d->iLastOffset = 0;
301 }
302}
303
304/* helper function for docListAddPos and docListAddPosOffset */
305static void addPos(DocList *d, int iColumn, int iPos){
306 assert( d->nData>0 );
307 --d->nData; /* remove previous terminator */
308 if( iColumn!=d->iLastColumn ){
309 assert( iColumn>d->iLastColumn );
310 appendVarint(d, POS_COLUMN);
311 appendVarint(d, iColumn);
312 d->iLastColumn = iColumn;
313 d->iLastPos = d->iLastOffset = 0;
314 }
315 assert( iPos>=d->iLastPos );
316 appendVarint(d, iPos-d->iLastPos+POS_BASE);
317 d->iLastPos = iPos;
318}
319
320/* Add a position to the last position list in a doclist. */
321static void docListAddPos(DocList *d, int iColumn, int iPos){
322 assert( d->iType==DL_POSITIONS );
323 addPos(d, iColumn, iPos);
324 appendVarint(d, POS_END); /* add new terminator */
325}
326
327/*
328** Add a position and starting and ending offsets to a doclist.
329**
330** If the doclist is setup to handle only positions, then insert
331** the position only and ignore the offsets.
332*/
333static void docListAddPosOffset(
334 DocList *d, /* Doclist under construction */
335 int iColumn, /* Column the inserted term is part of */
336 int iPos, /* Position of the inserted term */
337 int iStartOffset, /* Starting offset of inserted term */
338 int iEndOffset /* Ending offset of inserted term */
339){
340 assert( d->iType>=DL_POSITIONS );
341 addPos(d, iColumn, iPos);
342 if( d->iType==DL_POSITIONS_OFFSETS ){
343 assert( iStartOffset>=d->iLastOffset );
344 appendVarint(d, iStartOffset-d->iLastOffset);
345 d->iLastOffset = iStartOffset;
346 assert( iEndOffset>=iStartOffset );
347 appendVarint(d, iEndOffset-iStartOffset);
348 }
349 appendVarint(d, POS_END); /* add new terminator */
350}
351
352/*
353** A DocListReader object is a cursor into a doclist. Initialize
354** the cursor to the beginning of the doclist by calling readerInit().
355** Then use routines
356**
357** peekDocid()
358** readDocid()
359** readPosition()
360** skipPositionList()
361** and so forth...
362**
363** to read information out of the doclist. When we reach the end
364** of the doclist, atEnd() returns TRUE.
365*/
366typedef struct DocListReader {
367 DocList *pDoclist; /* The document list we are stepping through */
368 char *p; /* Pointer to next unread byte in the doclist */
369 int iLastColumn;
370 int iLastPos; /* the last position read, or -1 when not in a position list */
371} DocListReader;
372
373/*
374** Initialize the DocListReader r to point to the beginning of pDoclist.
375*/
376static void readerInit(DocListReader *r, DocList *pDoclist){
377 r->pDoclist = pDoclist;
378 if( pDoclist!=NULL ){
379 r->p = pDoclist->pData;
380 }
381 r->iLastColumn = -1;
382 r->iLastPos = -1;
383}
384
385/*
386** Return TRUE if we have reached then end of pReader and there is
387** nothing else left to read.
388*/
389static int atEnd(DocListReader *pReader){
390 return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
391}
392
393/* Peek at the next docid without advancing the read pointer.
394*/
395static sqlite_int64 peekDocid(DocListReader *pReader){
396 sqlite_int64 ret;
397 assert( !atEnd(pReader) );
398 assert( pReader->iLastPos==-1 );
399 getVarint(pReader->p, &ret);
400 return ret;
401}
402
403/* Read the next docid. See also nextDocid().
404*/
405static sqlite_int64 readDocid(DocListReader *pReader){
406 sqlite_int64 ret;
407 assert( !atEnd(pReader) );
408 assert( pReader->iLastPos==-1 );
409 pReader->p += getVarint(pReader->p, &ret);
410 if( pReader->pDoclist->iType>=DL_POSITIONS ){
411 pReader->iLastColumn = 0;
412 pReader->iLastPos = 0;
413 }
414 return ret;
415}
416
417/* Read the next position and column index from a position list.
418 * Returns the position, or -1 at the end of the list. */
419static int readPosition(DocListReader *pReader, int *iColumn){
420 int i;
421 int iType = pReader->pDoclist->iType;
422
423 if( pReader->iLastPos==-1 ){
424 return -1;
425 }
426 assert( !atEnd(pReader) );
427
428 if( iType<DL_POSITIONS ){
429 return -1;
430 }
431 pReader->p += getVarint32(pReader->p, &i);
432 if( i==POS_END ){
433 pReader->iLastColumn = pReader->iLastPos = -1;
434 *iColumn = -1;
435 return -1;
436 }
437 if( i==POS_COLUMN ){
438 pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
439 pReader->iLastPos = 0;
440 pReader->p += getVarint32(pReader->p, &i);
441 assert( i>=POS_BASE );
442 }
443 pReader->iLastPos += ((int) i)-POS_BASE;
444 if( iType>=DL_POSITIONS_OFFSETS ){
445 /* Skip over offsets, ignoring them for now. */
446 int iStart, iEnd;
447 pReader->p += getVarint32(pReader->p, &iStart);
448 pReader->p += getVarint32(pReader->p, &iEnd);
449 }
450 *iColumn = pReader->iLastColumn;
451 return pReader->iLastPos;
452}
453
454/* Skip past the end of a position list. */
455static void skipPositionList(DocListReader *pReader){
456 DocList *p = pReader->pDoclist;
457 if( p && p->iType>=DL_POSITIONS ){
458 int iColumn;
459 while( readPosition(pReader, &iColumn)!=-1 ){}
460 }
461}
462
463/* Skip over a docid, including its position list if the doclist has
464 * positions. */
465static void skipDocument(DocListReader *pReader){
466 readDocid(pReader);
467 skipPositionList(pReader);
468}
469
470/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid
471 * matching [iDocid] was found. */
472static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
473 sqlite_int64 d = 0;
474 while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
475 skipDocument(pReader);
476 }
477 return !atEnd(pReader) && d==iDocid;
478}
479
480/* Return the first document in a document list.
481*/
482static sqlite_int64 firstDocid(DocList *d){
483 DocListReader r;
484 readerInit(&r, d);
485 return readDocid(&r);
486}
487
488#ifdef SQLITE_DEBUG
489/*
490** This routine is used for debugging purpose only.
491**
492** Write the content of a doclist to standard output.
493*/
494static void printDoclist(DocList *p){
495 DocListReader r;
496 const char *zSep = "";
497
498 readerInit(&r, p);
499 while( !atEnd(&r) ){
500 sqlite_int64 docid = readDocid(&r);
501 if( docid==0 ){
502 skipPositionList(&r);
503 continue;
504 }
505 printf("%s%lld", zSep, docid);
506 zSep = ",";
507 if( p->iType>=DL_POSITIONS ){
508 int iPos, iCol;
509 const char *zDiv = "";
510 printf("(");
511 while( (iPos = readPosition(&r, &iCol))>=0 ){
512 printf("%s%d:%d", zDiv, iCol, iPos);
513 zDiv = ":";
514 }
515 printf(")");
516 }
517 }
518 printf("\n");
519 fflush(stdout);
520}
521#endif /* SQLITE_DEBUG */
522
523/* Trim the given doclist to contain only positions in column
524 * [iRestrictColumn]. */
525static void docListRestrictColumn(DocList *in, int iRestrictColumn){
526 DocListReader r;
527 DocList out;
528
529 assert( in->iType>=DL_POSITIONS );
530 readerInit(&r, in);
531 docListInit(&out, DL_POSITIONS, NULL, 0);
532
533 while( !atEnd(&r) ){
534 sqlite_int64 iDocid = readDocid(&r);
535 int iPos, iColumn;
536
537 docListAddDocid(&out, iDocid);
538 while( (iPos = readPosition(&r, &iColumn)) != -1 ){
539 if( iColumn==iRestrictColumn ){
540 docListAddPos(&out, iColumn, iPos);
541 }
542 }
543 }
544
545 docListDestroy(in);
546 *in = out;
547}
548
549/* Trim the given doclist by discarding any docids without any remaining
550 * positions. */
551static void docListDiscardEmpty(DocList *in) {
552 DocListReader r;
553 DocList out;
554
555 /* TODO: It would be nice to implement this operation in place; that
556 * could save a significant amount of memory in queries with long doclists. */
557 assert( in->iType>=DL_POSITIONS );
558 readerInit(&r, in);
559 docListInit(&out, DL_POSITIONS, NULL, 0);
560
561 while( !atEnd(&r) ){
562 sqlite_int64 iDocid = readDocid(&r);
563 int match = 0;
564 int iPos, iColumn;
565 while( (iPos = readPosition(&r, &iColumn)) != -1 ){
566 if( !match ){
567 docListAddDocid(&out, iDocid);
568 match = 1;
569 }
570 docListAddPos(&out, iColumn, iPos);
571 }
572 }
573
574 docListDestroy(in);
575 *in = out;
576}
577
578/* Helper function for docListUpdate() and docListAccumulate().
579** Splices a doclist element into the doclist represented by r,
580** leaving r pointing after the newly spliced element.
581*/
582static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
583 const char *pSource, int nSource){
584 DocList *d = r->pDoclist;
585 char *pTarget;
586 int nTarget, found;
587
588 found = skipToDocid(r, iDocid);
589
590 /* Describe slice in d to place pSource/nSource. */
591 pTarget = r->p;
592 if( found ){
593 skipDocument(r);
594 nTarget = r->p-pTarget;
595 }else{
596 nTarget = 0;
597 }
598
599 /* The sense of the following is that there are three possibilities.
600 ** If nTarget==nSource, we should not move any memory nor realloc.
601 ** If nTarget>nSource, trim target and realloc.
602 ** If nTarget<nSource, realloc then expand target.
603 */
604 if( nTarget>nSource ){
605 memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
606 }
607 if( nTarget!=nSource ){
608 int iDoclist = pTarget-d->pData;
609 d->pData = realloc(d->pData, d->nData+nSource-nTarget);
610 pTarget = d->pData+iDoclist;
611 }
612 if( nTarget<nSource ){
613 memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
614 }
615
616 memcpy(pTarget, pSource, nSource);
617 d->nData += nSource-nTarget;
618 r->p = pTarget+nSource;
619}
620
621/* Insert/update pUpdate into the doclist. */
622static void docListUpdate(DocList *d, DocList *pUpdate){
623 DocListReader reader;
624
625 assert( d!=NULL && pUpdate!=NULL );
626 assert( d->iType==pUpdate->iType);
627
628 readerInit(&reader, d);
629 docListSpliceElement(&reader, firstDocid(pUpdate),
630 pUpdate->pData, pUpdate->nData);
631}
632
633/* Propagate elements from pUpdate to pAcc, overwriting elements with
634** matching docids.
635*/
636static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
637 DocListReader accReader, updateReader;
638
639 /* Handle edge cases where one doclist is empty. */
640 assert( pAcc!=NULL );
641 if( pUpdate==NULL || pUpdate->nData==0 ) return;
642 if( pAcc->nData==0 ){
643 pAcc->pData = malloc(pUpdate->nData);
644 memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
645 pAcc->nData = pUpdate->nData;
646 return;
647 }
648
649 readerInit(&accReader, pAcc);
650 readerInit(&updateReader, pUpdate);
651
652 while( !atEnd(&updateReader) ){
653 char *pSource = updateReader.p;
654 sqlite_int64 iDocid = readDocid(&updateReader);
655 skipPositionList(&updateReader);
656 docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
657 }
658}
659
660/*
661** Read the next docid off of pIn. Return 0 if we reach the end.
662*
663* TODO: This assumes that docids are never 0, but they may actually be 0 since
664* users can choose docids when inserting into a full-text table. Fix this.
665*/
666static sqlite_int64 nextDocid(DocListReader *pIn){
667 skipPositionList(pIn);
668 return atEnd(pIn) ? 0 : readDocid(pIn);
669}
670
671/*
672** pLeft and pRight are two DocListReaders that are pointing to
673** positions lists of the same document: iDocid.
674**
675** If there are no instances in pLeft or pRight where the position
676** of pLeft is one less than the position of pRight, then this
677** routine adds nothing to pOut.
678**
679** If there are one or more instances where positions from pLeft
680** are exactly one less than positions from pRight, then add a new
681** document record to pOut. If pOut wants to hold positions, then
682** include the positions from pRight that are one more than a
683** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
684**
685** pLeft and pRight are left pointing at the next document record.
686*/
687static void mergePosList(
688 DocListReader *pLeft, /* Left position list */
689 DocListReader *pRight, /* Right position list */
690 sqlite_int64 iDocid, /* The docid from pLeft and pRight */
691 DocList *pOut /* Write the merged document record here */
692){
693 int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
694 int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
695 int match = 0;
696
697 /* Loop until we've reached the end of both position lists. */
698 while( iLeftPos!=-1 && iRightPos!=-1 ){
699 if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
700 if( !match ){
701 docListAddDocid(pOut, iDocid);
702 match = 1;
703 }
704 if( pOut->iType>=DL_POSITIONS ){
705 docListAddPos(pOut, iRightCol, iRightPos);
706 }
707 iLeftPos = readPosition(pLeft, &iLeftCol);
708 iRightPos = readPosition(pRight, &iRightCol);
709 }else if( iRightCol<iLeftCol ||
710 (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
711 iRightPos = readPosition(pRight, &iRightCol);
712 }else{
713 iLeftPos = readPosition(pLeft, &iLeftCol);
714 }
715 }
716 if( iLeftPos>=0 ) skipPositionList(pLeft);
717 if( iRightPos>=0 ) skipPositionList(pRight);
718}
719
720/* We have two doclists: pLeft and pRight.
721** Write the phrase intersection of these two doclists into pOut.
722**
723** A phrase intersection means that two documents only match
724** if pLeft.iPos+1==pRight.iPos.
725**
726** The output pOut may or may not contain positions. If pOut
727** does contain positions, they are the positions of pRight.
728*/
729static void docListPhraseMerge(
730 DocList *pLeft, /* Doclist resulting from the words on the left */
731 DocList *pRight, /* Doclist for the next word to the right */
732 DocList *pOut /* Write the combined doclist here */
733){
734 DocListReader left, right;
735 sqlite_int64 docidLeft, docidRight;
736
737 readerInit(&left, pLeft);
738 readerInit(&right, pRight);
739 docidLeft = nextDocid(&left);
740 docidRight = nextDocid(&right);
741
742 while( docidLeft>0 && docidRight>0 ){
743 if( docidLeft<docidRight ){
744 docidLeft = nextDocid(&left);
745 }else if( docidRight<docidLeft ){
746 docidRight = nextDocid(&right);
747 }else{
748 mergePosList(&left, &right, docidLeft, pOut);
749 docidLeft = nextDocid(&left);
750 docidRight = nextDocid(&right);
751 }
752 }
753}
754
755/* We have two doclists: pLeft and pRight.
756** Write the intersection of these two doclists into pOut.
757** Only docids are matched. Position information is ignored.
758**
759** The output pOut never holds positions.
760*/
761static void docListAndMerge(
762 DocList *pLeft, /* Doclist resulting from the words on the left */
763 DocList *pRight, /* Doclist for the next word to the right */
764 DocList *pOut /* Write the combined doclist here */
765){
766 DocListReader left, right;
767 sqlite_int64 docidLeft, docidRight;
768
769 assert( pOut->iType<DL_POSITIONS );
770
771 readerInit(&left, pLeft);
772 readerInit(&right, pRight);
773 docidLeft = nextDocid(&left);
774 docidRight = nextDocid(&right);
775
776 while( docidLeft>0 && docidRight>0 ){
777 if( docidLeft<docidRight ){
778 docidLeft = nextDocid(&left);
779 }else if( docidRight<docidLeft ){
780 docidRight = nextDocid(&right);
781 }else{
782 docListAddDocid(pOut, docidLeft);
783 docidLeft = nextDocid(&left);
784 docidRight = nextDocid(&right);
785 }
786 }
787}
788
789/* We have two doclists: pLeft and pRight.
790** Write the union of these two doclists into pOut.
791** Only docids are matched. Position information is ignored.
792**
793** The output pOut never holds positions.
794*/
795static void docListOrMerge(
796 DocList *pLeft, /* Doclist resulting from the words on the left */
797 DocList *pRight, /* Doclist for the next word to the right */
798 DocList *pOut /* Write the combined doclist here */
799){
800 DocListReader left, right;
801 sqlite_int64 docidLeft, docidRight, priorLeft;
802
803 readerInit(&left, pLeft);
804 readerInit(&right, pRight);
805 docidLeft = nextDocid(&left);
806 docidRight = nextDocid(&right);
807
808 while( docidLeft>0 && docidRight>0 ){
809 if( docidLeft<=docidRight ){
810 docListAddDocid(pOut, docidLeft);
811 }else{
812 docListAddDocid(pOut, docidRight);
813 }
814 priorLeft = docidLeft;
815 if( docidLeft<=docidRight ){
816 docidLeft = nextDocid(&left);
817 }
818 if( docidRight>0 && docidRight<=priorLeft ){
819 docidRight = nextDocid(&right);
820 }
821 }
822 while( docidLeft>0 ){
823 docListAddDocid(pOut, docidLeft);
824 docidLeft = nextDocid(&left);
825 }
826 while( docidRight>0 ){
827 docListAddDocid(pOut, docidRight);
828 docidRight = nextDocid(&right);
829 }
830}
831
832/* We have two doclists: pLeft and pRight.
833** Write into pOut all documents that occur in pLeft but not
834** in pRight.
835**
836** Only docids are matched. Position information is ignored.
837**
838** The output pOut never holds positions.
839*/
840static void docListExceptMerge(
841 DocList *pLeft, /* Doclist resulting from the words on the left */
842 DocList *pRight, /* Doclist for the next word to the right */
843 DocList *pOut /* Write the combined doclist here */
844){
845 DocListReader left, right;
846 sqlite_int64 docidLeft, docidRight, priorLeft;
847
848 readerInit(&left, pLeft);
849 readerInit(&right, pRight);
850 docidLeft = nextDocid(&left);
851 docidRight = nextDocid(&right);
852
853 while( docidLeft>0 && docidRight>0 ){
854 priorLeft = docidLeft;
855 if( docidLeft<docidRight ){
856 docListAddDocid(pOut, docidLeft);
857 }
858 if( docidLeft<=docidRight ){
859 docidLeft = nextDocid(&left);
860 }
861 if( docidRight>0 && docidRight<=priorLeft ){
862 docidRight = nextDocid(&right);
863 }
864 }
865 while( docidLeft>0 ){
866 docListAddDocid(pOut, docidLeft);
867 docidLeft = nextDocid(&left);
868 }
869}
870
871static char *string_dup_n(const char *s, int n){
872 char *str = malloc(n + 1);
873 memcpy(str, s, n);
874 str[n] = '\0';
875 return str;
876}
877
878/* Duplicate a string; the caller must free() the returned string.
879 * (We don't use strdup() since it's not part of the standard C library and
880 * may not be available everywhere.) */
881static char *string_dup(const char *s){
882 return string_dup_n(s, strlen(s));
883}
884
885/* Format a string, replacing each occurrence of the % character with
886 * zDb.zName. This may be more convenient than sqlite_mprintf()
887 * when one string is used repeatedly in a format string.
888 * The caller must free() the returned string. */
889static char *string_format(const char *zFormat,
890 const char *zDb, const char *zName){
891 const char *p;
892 size_t len = 0;
893 size_t nDb = strlen(zDb);
894 size_t nName = strlen(zName);
895 size_t nFullTableName = nDb+1+nName;
896 char *result;
897 char *r;
898
899 /* first compute length needed */
900 for(p = zFormat ; *p ; ++p){
901 len += (*p=='%' ? nFullTableName : 1);
902 }
903 len += 1; /* for null terminator */
904
905 r = result = malloc(len);
906 for(p = zFormat; *p; ++p){
907 if( *p=='%' ){
908 memcpy(r, zDb, nDb);
909 r += nDb;
910 *r++ = '.';
911 memcpy(r, zName, nName);
912 r += nName;
913 } else {
914 *r++ = *p;
915 }
916 }
917 *r++ = '\0';
918 assert( r == result + len );
919 return result;
920}
921
922static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
923 const char *zFormat){
924 char *zCommand = string_format(zFormat, zDb, zName);
925 int rc;
926 TRACE(("FTS1 sql: %s\n", zCommand));
927 rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
928 free(zCommand);
929 return rc;
930}
931
932static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
933 sqlite3_stmt **ppStmt, const char *zFormat){
934 char *zCommand = string_format(zFormat, zDb, zName);
935 int rc;
936 TRACE(("FTS1 prepare: %s\n", zCommand));
937 rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
938 free(zCommand);
939 return rc;
940}
941
942/* end utility functions */
943
944/* Forward reference */
945typedef struct fulltext_vtab fulltext_vtab;
946
947/* A single term in a query is represented by an instances of
948** the following structure.
949*/
950typedef struct QueryTerm {
951 short int nPhrase; /* How many following terms are part of the same phrase */
952 short int iPhrase; /* This is the i-th term of a phrase. */
953 short int iColumn; /* Column of the index that must match this term */
954 signed char isOr; /* this term is preceded by "OR" */
955 signed char isNot; /* this term is preceded by "-" */
956 char *pTerm; /* text of the term. '\000' terminated. malloced */
957 int nTerm; /* Number of bytes in pTerm[] */
958} QueryTerm;
959
960
961/* A query string is parsed into a Query structure.
962 *
963 * We could, in theory, allow query strings to be complicated
964 * nested expressions with precedence determined by parentheses.
965 * But none of the major search engines do this. (Perhaps the
966 * feeling is that an parenthesized expression is two complex of
967 * an idea for the average user to grasp.) Taking our lead from
968 * the major search engines, we will allow queries to be a list
969 * of terms (with an implied AND operator) or phrases in double-quotes,
970 * with a single optional "-" before each non-phrase term to designate
971 * negation and an optional OR connector.
972 *
973 * OR binds more tightly than the implied AND, which is what the
974 * major search engines seem to do. So, for example:
975 *
976 * [one two OR three] ==> one AND (two OR three)
977 * [one OR two three] ==> (one OR two) AND three
978 *
979 * A "-" before a term matches all entries that lack that term.
980 * The "-" must occur immediately before the term with in intervening
981 * space. This is how the search engines do it.
982 *
983 * A NOT term cannot be the right-hand operand of an OR. If this
984 * occurs in the query string, the NOT is ignored:
985 *
986 * [one OR -two] ==> one OR two
987 *
988 */
989typedef struct Query {
990 fulltext_vtab *pFts; /* The full text index */
991 int nTerms; /* Number of terms in the query */
992 QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
993 int nextIsOr; /* Set the isOr flag on the next inserted term */
994 int nextColumn; /* Next word parsed must be in this column */
995 int dfltColumn; /* The default column */
996} Query;
997
998
999/*
1000** An instance of the following structure keeps track of generated
1001** matching-word offset information and snippets.
1002*/
1003typedef struct Snippet {
1004 int nMatch; /* Total number of matches */
1005 int nAlloc; /* Space allocated for aMatch[] */
1006 struct snippetMatch { /* One entry for each matching term */
1007 char snStatus; /* Status flag for use while constructing snippets */
1008 short int iCol; /* The column that contains the match */
1009 short int iTerm; /* The index in Query.pTerms[] of the matching term */
1010 short int nByte; /* Number of bytes in the term */
1011 int iStart; /* The offset to the first character of the term */
1012 } *aMatch; /* Points to space obtained from malloc */
1013 char *zOffset; /* Text rendering of aMatch[] */
1014 int nOffset; /* strlen(zOffset) */
1015 char *zSnippet; /* Snippet text */
1016 int nSnippet; /* strlen(zSnippet) */
1017} Snippet;
1018
1019
1020typedef enum QueryType {
1021 QUERY_GENERIC, /* table scan */
1022 QUERY_ROWID, /* lookup by rowid */
1023 QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
1024} QueryType;
1025
1026/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
1027** before we start aggregating into larger segments. Lower CHUNK_MAX
1028** means that for a given input we have more individual segments per
1029** term, which means more rows in the table and a bigger index (due to
1030** both more rows and bigger rowids). But it also reduces the average
1031** cost of adding new elements to the segment 0 doclist, and it seems
1032** to reduce the number of pages read and written during inserts. 256
1033** was chosen by measuring insertion times for a certain input (first
1034** 10k documents of Enron corpus), though including query performance
1035** in the decision may argue for a larger value.
1036*/
1037#define CHUNK_MAX 256
1038
1039typedef enum fulltext_statement {
1040 CONTENT_INSERT_STMT,
1041 CONTENT_SELECT_STMT,
1042 CONTENT_UPDATE_STMT,
1043 CONTENT_DELETE_STMT,
1044
1045 TERM_SELECT_STMT,
1046 TERM_SELECT_ALL_STMT,
1047 TERM_INSERT_STMT,
1048 TERM_UPDATE_STMT,
1049 TERM_DELETE_STMT,
1050
1051 MAX_STMT /* Always at end! */
1052} fulltext_statement;
1053
1054/* These must exactly match the enum above. */
1055/* TODO(adam): Is there some risk that a statement (in particular,
1056** pTermSelectStmt) will be used in two cursors at once, e.g. if a
1057** query joins a virtual table to itself? If so perhaps we should
1058** move some of these to the cursor object.
1059*/
1060static const char *const fulltext_zStatement[MAX_STMT] = {
1061 /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */
1062 /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
1063 /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */
1064 /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
1065
1066 /* TERM_SELECT */
1067 "select rowid, doclist from %_term where term = ? and segment = ?",
1068 /* TERM_SELECT_ALL */
1069 "select doclist from %_term where term = ? order by segment",
1070 /* TERM_INSERT */
1071 "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
1072 /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
1073 /* TERM_DELETE */ "delete from %_term where rowid = ?",
1074};
1075
1076/*
1077** A connection to a fulltext index is an instance of the following
1078** structure. The xCreate and xConnect methods create an instance
1079** of this structure and xDestroy and xDisconnect free that instance.
1080** All other methods receive a pointer to the structure as one of their
1081** arguments.
1082*/
1083struct fulltext_vtab {
1084 sqlite3_vtab base; /* Base class used by SQLite core */
1085 sqlite3 *db; /* The database connection */
1086 const char *zDb; /* logical database name */
1087 const char *zName; /* virtual table name */
1088 int nColumn; /* number of columns in virtual table */
1089 char **azColumn; /* column names. malloced */
1090 char **azContentColumn; /* column names in content table; malloced */
1091 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
1092
1093 /* Precompiled statements which we keep as long as the table is
1094 ** open.
1095 */
1096 sqlite3_stmt *pFulltextStatements[MAX_STMT];
1097};
1098
1099/*
1100** When the core wants to do a query, it create a cursor using a
1101** call to xOpen. This structure is an instance of a cursor. It
1102** is destroyed by xClose.
1103*/
1104typedef struct fulltext_cursor {
1105 sqlite3_vtab_cursor base; /* Base class used by SQLite core */
1106 QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */
1107 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
1108 int eof; /* True if at End Of Results */
1109 Query q; /* Parsed query string */
1110 Snippet snippet; /* Cached snippet for the current row */
1111 int iColumn; /* Column being searched */
1112 DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */
1113} fulltext_cursor;
1114
1115static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
1116 return (fulltext_vtab *) c->base.pVtab;
1117}
1118
1119static const sqlite3_module fulltextModule; /* forward declaration */
1120
1121/* Append a list of strings separated by commas to a StringBuffer. */
1122static void appendList(StringBuffer *sb, int nString, char **azString){
1123 int i;
1124 for(i=0; i<nString; ++i){
1125 if( i>0 ) append(sb, ", ");
1126 append(sb, azString[i]);
1127 }
1128}
1129
1130/* Return a dynamically generated statement of the form
1131 * insert into %_content (rowid, ...) values (?, ...)
1132 */
1133static const char *contentInsertStatement(fulltext_vtab *v){
1134 StringBuffer sb;
1135 int i;
1136
1137 initStringBuffer(&sb);
1138 append(&sb, "insert into %_content (rowid, ");
1139 appendList(&sb, v->nColumn, v->azContentColumn);
1140 append(&sb, ") values (?");
1141 for(i=0; i<v->nColumn; ++i)
1142 append(&sb, ", ?");
1143 append(&sb, ")");
1144 return sb.s;
1145}
1146
1147/* Return a dynamically generated statement of the form
1148 * update %_content set [col_0] = ?, [col_1] = ?, ...
1149 * where rowid = ?
1150 */
1151static const char *contentUpdateStatement(fulltext_vtab *v){
1152 StringBuffer sb;
1153 int i;
1154
1155 initStringBuffer(&sb);
1156 append(&sb, "update %_content set ");
1157 for(i=0; i<v->nColumn; ++i) {
1158 if( i>0 ){
1159 append(&sb, ", ");
1160 }
1161 append(&sb, v->azContentColumn[i]);
1162 append(&sb, " = ?");
1163 }
1164 append(&sb, " where rowid = ?");
1165 return sb.s;
1166}
1167
1168/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
1169** If the indicated statement has never been prepared, it is prepared
1170** and cached, otherwise the cached version is reset.
1171*/
1172static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
1173 sqlite3_stmt **ppStmt){
1174 assert( iStmt<MAX_STMT );
1175 if( v->pFulltextStatements[iStmt]==NULL ){
1176 const char *zStmt;
1177 int rc;
1178 switch( iStmt ){
1179 case CONTENT_INSERT_STMT:
1180 zStmt = contentInsertStatement(v); break;
1181 case CONTENT_UPDATE_STMT:
1182 zStmt = contentUpdateStatement(v); break;
1183 default:
1184 zStmt = fulltext_zStatement[iStmt];
1185 }
1186 rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
1187 zStmt);
1188 if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
1189 if( rc!=SQLITE_OK ) return rc;
1190 } else {
1191 int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
1192 if( rc!=SQLITE_OK ) return rc;
1193 }
1194
1195 *ppStmt = v->pFulltextStatements[iStmt];
1196 return SQLITE_OK;
1197}
1198
1199/* Step the indicated statement, handling errors SQLITE_BUSY (by
1200** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
1201** bindings to the new statement).
1202** TODO(adam): We should extend this function so that it can work with
1203** statements declared locally, not only globally cached statements.
1204*/
1205static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
1206 sqlite3_stmt **ppStmt){
1207 int rc;
1208 sqlite3_stmt *s = *ppStmt;
1209 assert( iStmt<MAX_STMT );
1210 assert( s==v->pFulltextStatements[iStmt] );
1211
1212 while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
1213 if( rc==SQLITE_BUSY ) continue;
1214 if( rc!=SQLITE_ERROR ) return rc;
1215
1216 /* If an SQLITE_SCHEMA error has occured, then finalizing this
1217 * statement is going to delete the fulltext_vtab structure. If
1218 * the statement just executed is in the pFulltextStatements[]
1219 * array, it will be finalized twice. So remove it before
1220 * calling sqlite3_finalize().
1221 */
1222 v->pFulltextStatements[iStmt] = NULL;
1223 rc = sqlite3_finalize(s);
1224 break;
1225 }
1226 return rc;
1227
1228 err:
1229 sqlite3_finalize(s);
1230 return rc;
1231}
1232
1233/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
1234** Useful for statements like UPDATE, where we expect no results.
1235*/
1236static int sql_single_step_statement(fulltext_vtab *v,
1237 fulltext_statement iStmt,
1238 sqlite3_stmt **ppStmt){
1239 int rc = sql_step_statement(v, iStmt, ppStmt);
1240 return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
1241}
1242
1243/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
1244static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
1245 sqlite3_value **pValues){
1246 sqlite3_stmt *s;
1247 int i;
1248 int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
1249 if( rc!=SQLITE_OK ) return rc;
1250
1251 rc = sqlite3_bind_value(s, 1, rowid);
1252 if( rc!=SQLITE_OK ) return rc;
1253
1254 for(i=0; i<v->nColumn; ++i){
1255 rc = sqlite3_bind_value(s, 2+i, pValues[i]);
1256 if( rc!=SQLITE_OK ) return rc;
1257 }
1258
1259 return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
1260}
1261
1262/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
1263 * where rowid = [iRowid] */
1264static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
1265 sqlite_int64 iRowid){
1266 sqlite3_stmt *s;
1267 int i;
1268 int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
1269 if( rc!=SQLITE_OK ) return rc;
1270
1271 for(i=0; i<v->nColumn; ++i){
1272 rc = sqlite3_bind_value(s, 1+i, pValues[i]);
1273 if( rc!=SQLITE_OK ) return rc;
1274 }
1275
1276 rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
1277 if( rc!=SQLITE_OK ) return rc;
1278
1279 return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
1280}
1281
1282static void freeStringArray(int nString, const char **pString){
1283 int i;
1284
1285 for (i=0 ; i < nString ; ++i) {
1286 if( pString[i]!=NULL ) free((void *) pString[i]);
1287 }
1288 free((void *) pString);
1289}
1290
1291/* select * from %_content where rowid = [iRow]
1292 * The caller must delete the returned array and all strings in it.
1293 * null fields will be NULL in the returned array.
1294 *
1295 * TODO: Perhaps we should return pointer/length strings here for consistency
1296 * with other code which uses pointer/length. */
1297static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
1298 const char ***pValues){
1299 sqlite3_stmt *s;
1300 const char **values;
1301 int i;
1302 int rc;
1303
1304 *pValues = NULL;
1305
1306 rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
1307 if( rc!=SQLITE_OK ) return rc;
1308
1309 rc = sqlite3_bind_int64(s, 1, iRow);
1310 if( rc!=SQLITE_OK ) return rc;
1311
1312 rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
1313 if( rc!=SQLITE_ROW ) return rc;
1314
1315 values = (const char **) malloc(v->nColumn * sizeof(const char *));
1316 for(i=0; i<v->nColumn; ++i){
1317 if( sqlite3_column_type(s, i)==SQLITE_NULL ){
1318 values[i] = NULL;
1319 }else{
1320 values[i] = string_dup((char*)sqlite3_column_text(s, i));
1321 }
1322 }
1323
1324 /* We expect only one row. We must execute another sqlite3_step()
1325 * to complete the iteration; otherwise the table will remain locked. */
1326 rc = sqlite3_step(s);
1327 if( rc==SQLITE_DONE ){
1328 *pValues = values;
1329 return SQLITE_OK;
1330 }
1331
1332 freeStringArray(v->nColumn, values);
1333 return rc;
1334}
1335
1336/* delete from %_content where rowid = [iRow ] */
1337static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
1338 sqlite3_stmt *s;
1339 int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
1340 if( rc!=SQLITE_OK ) return rc;
1341
1342 rc = sqlite3_bind_int64(s, 1, iRow);
1343 if( rc!=SQLITE_OK ) return rc;
1344
1345 return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
1346}
1347
1348/* select rowid, doclist from %_term
1349 * where term = [pTerm] and segment = [iSegment]
1350 * If found, returns SQLITE_ROW; the caller must free the
1351 * returned doclist. If no rows found, returns SQLITE_DONE. */
1352static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
1353 int iSegment,
1354 sqlite_int64 *rowid, DocList *out){
1355 sqlite3_stmt *s;
1356 int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
1357 if( rc!=SQLITE_OK ) return rc;
1358
1359 rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
1360 if( rc!=SQLITE_OK ) return rc;
1361
1362 rc = sqlite3_bind_int(s, 2, iSegment);
1363 if( rc!=SQLITE_OK ) return rc;
1364
1365 rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
1366 if( rc!=SQLITE_ROW ) return rc;
1367
1368 *rowid = sqlite3_column_int64(s, 0);
1369 docListInit(out, DL_DEFAULT,
1370 sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
1371
1372 /* We expect only one row. We must execute another sqlite3_step()
1373 * to complete the iteration; otherwise the table will remain locked. */
1374 rc = sqlite3_step(s);
1375 return rc==SQLITE_DONE ? SQLITE_ROW : rc;
1376}
1377
1378/* Load the segment doclists for term pTerm and merge them in
1379** appropriate order into out. Returns SQLITE_OK if successful. If
1380** there are no segments for pTerm, successfully returns an empty
1381** doclist in out.
1382**
1383** Each document consists of 1 or more "columns". The number of
1384** columns is v->nColumn. If iColumn==v->nColumn, then return
1385** position information about all columns. If iColumn<v->nColumn,
1386** then only return position information about the iColumn-th column
1387** (where the first column is 0).
1388*/
1389static int term_select_all(
1390 fulltext_vtab *v, /* The fulltext index we are querying against */
1391 int iColumn, /* If <nColumn, only look at the iColumn-th column */
1392 const char *pTerm, /* The term whose posting lists we want */
1393 int nTerm, /* Number of bytes in pTerm */
1394 DocList *out /* Write the resulting doclist here */
1395){
1396 DocList doclist;
1397 sqlite3_stmt *s;
1398 int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
1399 if( rc!=SQLITE_OK ) return rc;
1400
1401 rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
1402 if( rc!=SQLITE_OK ) return rc;
1403
1404 docListInit(&doclist, DL_DEFAULT, 0, 0);
1405
1406 /* TODO(shess) Handle schema and busy errors. */
1407 while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
1408 DocList old;
1409
1410 /* TODO(shess) If we processed doclists from oldest to newest, we
1411 ** could skip the malloc() involved with the following call. For
1412 ** now, I'd rather keep this logic similar to index_insert_term().
1413 ** We could additionally drop elements when we see deletes, but
1414 ** that would require a distinct version of docListAccumulate().
1415 */
1416 docListInit(&old, DL_DEFAULT,
1417 sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
1418
1419 if( iColumn<v->nColumn ){ /* querying a single column */
1420 docListRestrictColumn(&old, iColumn);
1421 }
1422
1423 /* doclist contains the newer data, so write it over old. Then
1424 ** steal accumulated result for doclist.
1425 */
1426 docListAccumulate(&old, &doclist);
1427 docListDestroy(&doclist);
1428 doclist = old;
1429 }
1430 if( rc!=SQLITE_DONE ){
1431 docListDestroy(&doclist);
1432 return rc;
1433 }
1434
1435 docListDiscardEmpty(&doclist);
1436 *out = doclist;
1437 return SQLITE_OK;
1438}
1439
1440/* insert into %_term (rowid, term, segment, doclist)
1441 values ([piRowid], [pTerm], [iSegment], [doclist])
1442** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
1443**
1444** NOTE(shess) piRowid is IN, with values of "space of int64" plus
1445** null, it is not used to pass data back to the caller.
1446*/
1447static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
1448 const char *pTerm, int nTerm,
1449 int iSegment, DocList *doclist){
1450 sqlite3_stmt *s;
1451 int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
1452 if( rc!=SQLITE_OK ) return rc;
1453
1454 if( piRowid==NULL ){
1455 rc = sqlite3_bind_null(s, 1);
1456 }else{
1457 rc = sqlite3_bind_int64(s, 1, *piRowid);
1458 }
1459 if( rc!=SQLITE_OK ) return rc;
1460
1461 rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
1462 if( rc!=SQLITE_OK ) return rc;
1463
1464 rc = sqlite3_bind_int(s, 3, iSegment);
1465 if( rc!=SQLITE_OK ) return rc;
1466
1467 rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
1468 if( rc!=SQLITE_OK ) return rc;
1469
1470 return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
1471}
1472
1473/* update %_term set doclist = [doclist] where rowid = [rowid] */
1474static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
1475 DocList *doclist){
1476 sqlite3_stmt *s;
1477 int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
1478 if( rc!=SQLITE_OK ) return rc;
1479
1480 rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
1481 if( rc!=SQLITE_OK ) return rc;
1482
1483 rc = sqlite3_bind_int64(s, 2, rowid);
1484 if( rc!=SQLITE_OK ) return rc;
1485
1486 return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
1487}
1488
1489static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
1490 sqlite3_stmt *s;
1491 int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
1492 if( rc!=SQLITE_OK ) return rc;
1493
1494 rc = sqlite3_bind_int64(s, 1, rowid);
1495 if( rc!=SQLITE_OK ) return rc;
1496
1497 return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
1498}
1499
1500/*
1501** Free the memory used to contain a fulltext_vtab structure.
1502*/
1503static void fulltext_vtab_destroy(fulltext_vtab *v){
1504 int iStmt, i;
1505
1506 TRACE(("FTS1 Destroy %p\n", v));
1507 for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
1508 if( v->pFulltextStatements[iStmt]!=NULL ){
1509 sqlite3_finalize(v->pFulltextStatements[iStmt]);
1510 v->pFulltextStatements[iStmt] = NULL;
1511 }
1512 }
1513
1514 if( v->pTokenizer!=NULL ){
1515 v->pTokenizer->pModule->xDestroy(v->pTokenizer);
1516 v->pTokenizer = NULL;
1517 }
1518
1519 free(v->azColumn);
1520 for(i = 0; i < v->nColumn; ++i) {
1521 sqlite3_free(v->azContentColumn[i]);
1522 }
1523 free(v->azContentColumn);
1524 free(v);
1525}
1526
1527/*
1528** Token types for parsing the arguments to xConnect or xCreate.
1529*/
1530#define TOKEN_EOF 0 /* End of file */
1531#define TOKEN_SPACE 1 /* Any kind of whitespace */
1532#define TOKEN_ID 2 /* An identifier */
1533#define TOKEN_STRING 3 /* A string literal */
1534#define TOKEN_PUNCT 4 /* A single punctuation character */
1535
1536/*
1537** If X is a character that can be used in an identifier then
1538** IdChar(X) will be true. Otherwise it is false.
1539**
1540** For ASCII, any character with the high-order bit set is
1541** allowed in an identifier. For 7-bit characters,
1542** sqlite3IsIdChar[X] must be 1.
1543**
1544** Ticket #1066. the SQL standard does not allow '$' in the
1545** middle of identfiers. But many SQL implementations do.
1546** SQLite will allow '$' in identifiers for compatibility.
1547** But the feature is undocumented.
1548*/
1549static const char isIdChar[] = {
1550/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1551 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
1552 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
1553 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
1555 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
1557};
1558#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
1559
1560
1561/*
1562** Return the length of the token that begins at z[0].
1563** Store the token type in *tokenType before returning.
1564*/
1565static int getToken(const char *z, int *tokenType){
1566 int i, c;
1567 switch( *z ){
1568 case 0: {
1569 *tokenType = TOKEN_EOF;
1570 return 0;
1571 }
1572 case ' ': case '\t': case '\n': case '\f': case '\r': {
1573 for(i=1; safe_isspace(z[i]); i++){}
1574 *tokenType = TOKEN_SPACE;
1575 return i;
1576 }
1577 case '`':
1578 case '\'':
1579 case '"': {
1580 int delim = z[0];
1581 for(i=1; (c=z[i])!=0; i++){
1582 if( c==delim ){
1583 if( z[i+1]==delim ){
1584 i++;
1585 }else{
1586 break;
1587 }
1588 }
1589 }
1590 *tokenType = TOKEN_STRING;
1591 return i + (c!=0);
1592 }
1593 case '[': {
1594 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
1595 *tokenType = TOKEN_ID;
1596 return i;
1597 }
1598 default: {
1599 if( !IdChar(*z) ){
1600 break;
1601 }
1602 for(i=1; IdChar(z[i]); i++){}
1603 *tokenType = TOKEN_ID;
1604 return i;
1605 }
1606 }
1607 *tokenType = TOKEN_PUNCT;
1608 return 1;
1609}
1610
1611/*
1612** A token extracted from a string is an instance of the following
1613** structure.
1614*/
1615typedef struct Token {
1616 const char *z; /* Pointer to token text. Not '\000' terminated */
1617 short int n; /* Length of the token text in bytes. */
1618} Token;
1619
1620/*
1621** Given a input string (which is really one of the argv[] parameters
1622** passed into xConnect or xCreate) split the string up into tokens.
1623** Return an array of pointers to '\000' terminated strings, one string
1624** for each non-whitespace token.
1625**
1626** The returned array is terminated by a single NULL pointer.
1627**
1628** Space to hold the returned array is obtained from a single
1629** malloc and should be freed by passing the return value to free().
1630** The individual strings within the token list are all a part of
1631** the single memory allocation and will all be freed at once.
1632*/
1633static char **tokenizeString(const char *z, int *pnToken){
1634 int nToken = 0;
1635 Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
1636 int n = 1;
1637 int e, i;
1638 int totalSize = 0;
1639 char **azToken;
1640 char *zCopy;
1641 while( n>0 ){
1642 n = getToken(z, &e);
1643 if( e!=TOKEN_SPACE ){
1644 aToken[nToken].z = z;
1645 aToken[nToken].n = n;
1646 nToken++;
1647 totalSize += n+1;
1648 }
1649 z += n;
1650 }
1651 azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
1652 zCopy = (char*)&azToken[nToken];
1653 nToken--;
1654 for(i=0; i<nToken; i++){
1655 azToken[i] = zCopy;
1656 n = aToken[i].n;
1657 memcpy(zCopy, aToken[i].z, n);
1658 zCopy[n] = 0;
1659 zCopy += n+1;
1660 }
1661 azToken[nToken] = 0;
1662 free(aToken);
1663 *pnToken = nToken;
1664 return azToken;
1665}
1666
1667/*
1668** Convert an SQL-style quoted string into a normal string by removing
1669** the quote characters. The conversion is done in-place. If the
1670** input does not begin with a quote character, then this routine
1671** is a no-op.
1672**
1673** Examples:
1674**
1675** "abc" becomes abc
1676** 'xyz' becomes xyz
1677** [pqr] becomes pqr
1678** `mno` becomes mno
1679*/
1680static void dequoteString(char *z){
1681 int quote;
1682 int i, j;
1683 if( z==0 ) return;
1684 quote = z[0];
1685 switch( quote ){
1686 case '\'': break;
1687 case '"': break;
1688 case '`': break; /* For MySQL compatibility */
1689 case '[': quote = ']'; break; /* For MS SqlServer compatibility */
1690 default: return;
1691 }
1692 for(i=1, j=0; z[i]; i++){
1693 if( z[i]==quote ){
1694 if( z[i+1]==quote ){
1695 z[j++] = quote;
1696 i++;
1697 }else{
1698 z[j++] = 0;
1699 break;
1700 }
1701 }else{
1702 z[j++] = z[i];
1703 }
1704 }
1705}
1706
1707/*
1708** The input azIn is a NULL-terminated list of tokens. Remove the first
1709** token and all punctuation tokens. Remove the quotes from
1710** around string literal tokens.
1711**
1712** Example:
1713**
1714** input: tokenize chinese ( 'simplifed' , 'mixed' )
1715** output: chinese simplifed mixed
1716**
1717** Another example:
1718**
1719** input: delimiters ( '[' , ']' , '...' )
1720** output: [ ] ...
1721*/
1722static void tokenListToIdList(char **azIn){
1723 int i, j;
1724 if( azIn ){
1725 for(i=0, j=-1; azIn[i]; i++){
1726 if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
1727 dequoteString(azIn[i]);
1728 if( j>=0 ){
1729 azIn[j] = azIn[i];
1730 }
1731 j++;
1732 }
1733 }
1734 azIn[j] = 0;
1735 }
1736}
1737
1738
1739/*
1740** Find the first alphanumeric token in the string zIn. Null-terminate
1741** this token. Remove any quotation marks. And return a pointer to
1742** the result.
1743*/
1744static char *firstToken(char *zIn, char **pzTail){
1745 int n, ttype;
1746 while(1){
1747 n = getToken(zIn, &ttype);
1748 if( ttype==TOKEN_SPACE ){
1749 zIn += n;
1750 }else if( ttype==TOKEN_EOF ){
1751 *pzTail = zIn;
1752 return 0;
1753 }else{
1754 zIn[n] = 0;
1755 *pzTail = &zIn[1];
1756 dequoteString(zIn);
1757 return zIn;
1758 }
1759 }
1760 /*NOTREACHED*/
1761}
1762
1763/* Return true if...
1764**
1765** * s begins with the string t, ignoring case
1766** * s is longer than t
1767** * The first character of s beyond t is not a alphanumeric
1768**
1769** Ignore leading space in *s.
1770**
1771** To put it another way, return true if the first token of
1772** s[] is t[].
1773*/
1774static int startsWith(const char *s, const char *t){
1775 while( safe_isspace(*s) ){ s++; }
1776 while( *t ){
1777 if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
1778 }
1779 return *s!='_' && !safe_isalnum(*s);
1780}
1781
1782/*
1783** An instance of this structure defines the "spec" of a
1784** full text index. This structure is populated by parseSpec
1785** and use by fulltextConnect and fulltextCreate.
1786*/
1787typedef struct TableSpec {
1788 const char *zDb; /* Logical database name */
1789 const char *zName; /* Name of the full-text index */
1790 int nColumn; /* Number of columns to be indexed */
1791 char **azColumn; /* Original names of columns to be indexed */
1792 char **azContentColumn; /* Column names for %_content */
1793 char **azTokenizer; /* Name of tokenizer and its arguments */
1794} TableSpec;
1795
1796/*
1797** Reclaim all of the memory used by a TableSpec
1798*/
1799static void clearTableSpec(TableSpec *p) {
1800 free(p->azColumn);
1801 free(p->azContentColumn);
1802 free(p->azTokenizer);
1803}
1804
1805/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
1806 *
1807 * CREATE VIRTUAL TABLE email
1808 * USING fts1(subject, body, tokenize mytokenizer(myarg))
1809 *
1810 * We return parsed information in a TableSpec structure.
1811 *
1812 */
1813static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
1814 char**pzErr){
1815 int i, n;
1816 char *z, *zDummy;
1817 char **azArg;
1818 const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */
1819
1820 assert( argc>=3 );
1821 /* Current interface:
1822 ** argv[0] - module name
1823 ** argv[1] - database name
1824 ** argv[2] - table name
1825 ** argv[3..] - columns, optionally followed by tokenizer specification
1826 ** and snippet delimiters specification.
1827 */
1828
1829 /* Make a copy of the complete argv[][] array in a single allocation.
1830 ** The argv[][] array is read-only and transient. We can write to the
1831 ** copy in order to modify things and the copy is persistent.
1832 */
1833 memset(pSpec, 0, sizeof(*pSpec));
1834 for(i=n=0; i<argc; i++){
1835 n += strlen(argv[i]) + 1;
1836 }
1837 azArg = malloc( sizeof(char*)*argc + n );
1838 if( azArg==0 ){
1839 return SQLITE_NOMEM;
1840 }
1841 z = (char*)&azArg[argc];
1842 for(i=0; i<argc; i++){
1843 azArg[i] = z;
1844 strcpy(z, argv[i]);
1845 z += strlen(z)+1;
1846 }
1847
1848 /* Identify the column names and the tokenizer and delimiter arguments
1849 ** in the argv[][] array.
1850 */
1851 pSpec->zDb = azArg[1];
1852 pSpec->zName = azArg[2];
1853 pSpec->nColumn = 0;
1854 pSpec->azColumn = azArg;
1855 zTokenizer = "tokenize simple";
1856 for(i=3; i<argc; ++i){
1857 if( startsWith(azArg[i],"tokenize") ){
1858 zTokenizer = azArg[i];
1859 }else{
1860 z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
1861 pSpec->nColumn++;
1862 }
1863 }
1864 if( pSpec->nColumn==0 ){
1865 azArg[0] = "content";
1866 pSpec->nColumn = 1;
1867 }
1868
1869 /*
1870 ** Construct the list of content column names.
1871 **
1872 ** Each content column name will be of the form cNNAAAA
1873 ** where NN is the column number and AAAA is the sanitized
1874 ** column name. "sanitized" means that special characters are
1875 ** converted to "_". The cNN prefix guarantees that all column
1876 ** names are unique.
1877 **
1878 ** The AAAA suffix is not strictly necessary. It is included
1879 ** for the convenience of people who might examine the generated
1880 ** %_content table and wonder what the columns are used for.
1881 */
1882 pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
1883 if( pSpec->azContentColumn==0 ){
1884 clearTableSpec(pSpec);
1885 return SQLITE_NOMEM;
1886 }
1887 for(i=0; i<pSpec->nColumn; i++){
1888 char *p;
1889 pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
1890 for (p = pSpec->azContentColumn[i]; *p ; ++p) {
1891 if( !safe_isalnum(*p) ) *p = '_';
1892 }
1893 }
1894
1895 /*
1896 ** Parse the tokenizer specification string.
1897 */
1898 pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
1899 tokenListToIdList(pSpec->azTokenizer);
1900
1901 return SQLITE_OK;
1902}
1903
1904/*
1905** Generate a CREATE TABLE statement that describes the schema of
1906** the virtual table. Return a pointer to this schema string.
1907**
1908** Space is obtained from sqlite3_mprintf() and should be freed
1909** using sqlite3_free().
1910*/
1911static char *fulltextSchema(
1912 int nColumn, /* Number of columns */
1913 const char *const* azColumn, /* List of columns */
1914 const char *zTableName /* Name of the table */
1915){
1916 int i;
1917 char *zSchema, *zNext;
1918 const char *zSep = "(";
1919 zSchema = sqlite3_mprintf("CREATE TABLE x");
1920 for(i=0; i<nColumn; i++){
1921 zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
1922 sqlite3_free(zSchema);
1923 zSchema = zNext;
1924 zSep = ",";
1925 }
1926 zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
1927 sqlite3_free(zSchema);
1928 return zNext;
1929}
1930
1931/*
1932** Build a new sqlite3_vtab structure that will describe the
1933** fulltext index defined by spec.
1934*/
1935static int constructVtab(
1936 sqlite3 *db, /* The SQLite database connection */
1937 TableSpec *spec, /* Parsed spec information from parseSpec() */
1938 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
1939 char **pzErr /* Write any error message here */
1940){
1941 int rc;
1942 int n;
1943 fulltext_vtab *v = 0;
1944 const sqlite3_tokenizer_module *m = NULL;
1945 char *schema;
1946
1947 v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
1948 if( v==0 ) return SQLITE_NOMEM;
1949 memset(v, 0, sizeof(*v));
1950 /* sqlite will initialize v->base */
1951 v->db = db;
1952 v->zDb = spec->zDb; /* Freed when azColumn is freed */
1953 v->zName = spec->zName; /* Freed when azColumn is freed */
1954 v->nColumn = spec->nColumn;
1955 v->azContentColumn = spec->azContentColumn;
1956 spec->azContentColumn = 0;
1957 v->azColumn = spec->azColumn;
1958 spec->azColumn = 0;
1959
1960 if( spec->azTokenizer==0 ){
1961 return SQLITE_NOMEM;
1962 }
1963 /* TODO(shess) For now, add new tokenizers as else if clauses. */
1964 if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
1965 sqlite3Fts1SimpleTokenizerModule(&m);
1966 }else if( startsWith(spec->azTokenizer[0], "porter") ){
1967 sqlite3Fts1PorterTokenizerModule(&m);
1968 }else{
1969 *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
1970 rc = SQLITE_ERROR;
1971 goto err;
1972 }
1973 for(n=0; spec->azTokenizer[n]; n++){}
1974 if( n ){
1975 rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
1976 &v->pTokenizer);
1977 }else{
1978 rc = m->xCreate(0, 0, &v->pTokenizer);
1979 }
1980 if( rc!=SQLITE_OK ) goto err;
1981 v->pTokenizer->pModule = m;
1982
1983 /* TODO: verify the existence of backing tables foo_content, foo_term */
1984
1985 schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
1986 spec->zName);
1987 rc = sqlite3_declare_vtab(db, schema);
1988 sqlite3_free(schema);
1989 if( rc!=SQLITE_OK ) goto err;
1990
1991 memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
1992
1993 *ppVTab = &v->base;
1994 TRACE(("FTS1 Connect %p\n", v));
1995
1996 return rc;
1997
1998err:
1999 fulltext_vtab_destroy(v);
2000 return rc;
2001}
2002
2003static int fulltextConnect(
2004 sqlite3 *db,
2005 void *pAux,
2006 int argc, const char *const*argv,
2007 sqlite3_vtab **ppVTab,
2008 char **pzErr
2009){
2010 TableSpec spec;
2011 int rc = parseSpec(&spec, argc, argv, pzErr);
2012 if( rc!=SQLITE_OK ) return rc;
2013
2014 rc = constructVtab(db, &spec, ppVTab, pzErr);
2015 clearTableSpec(&spec);
2016 return rc;
2017}
2018
2019 /* The %_content table holds the text of each document, with
2020 ** the rowid used as the docid.
2021 **
2022 ** The %_term table maps each term to a document list blob
2023 ** containing elements sorted by ascending docid, each element
2024 ** encoded as:
2025 **
2026 ** docid varint-encoded
2027 ** token elements:
2028 ** position+1 varint-encoded as delta from previous position
2029 ** start offset varint-encoded as delta from previous start offset
2030 ** end offset varint-encoded as delta from start offset
2031 **
2032 ** The sentinel position of 0 indicates the end of the token list.
2033 **
2034 ** Additionally, doclist blobs are chunked into multiple segments,
2035 ** using segment to order the segments. New elements are added to
2036 ** the segment at segment 0, until it exceeds CHUNK_MAX. Then
2037 ** segment 0 is deleted, and the doclist is inserted at segment 1.
2038 ** If there is already a doclist at segment 1, the segment 0 doclist
2039 ** is merged with it, the segment 1 doclist is deleted, and the
2040 ** merged doclist is inserted at segment 2, repeating those
2041 ** operations until an insert succeeds.
2042 **
2043 ** Since this structure doesn't allow us to update elements in place
2044 ** in case of deletion or update, these are simply written to
2045 ** segment 0 (with an empty token list in case of deletion), with
2046 ** docListAccumulate() taking care to retain lower-segment
2047 ** information in preference to higher-segment information.
2048 */
2049 /* TODO(shess) Provide a VACUUM type operation which both removes
2050 ** deleted elements which are no longer necessary, and duplicated
2051 ** elements. I suspect this will probably not be necessary in
2052 ** practice, though.
2053 */
2054static int fulltextCreate(sqlite3 *db, void *pAux,
2055 int argc, const char * const *argv,
2056 sqlite3_vtab **ppVTab, char **pzErr){
2057 int rc;
2058 TableSpec spec;
2059 StringBuffer schema;
2060 TRACE(("FTS1 Create\n"));
2061
2062 rc = parseSpec(&spec, argc, argv, pzErr);
2063 if( rc!=SQLITE_OK ) return rc;
2064
2065 initStringBuffer(&schema);
2066 append(&schema, "CREATE TABLE %_content(");
2067 appendList(&schema, spec.nColumn, spec.azContentColumn);
2068 append(&schema, ")");
2069 rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
2070 free(schema.s);
2071 if( rc!=SQLITE_OK ) goto out;
2072
2073 rc = sql_exec(db, spec.zDb, spec.zName,
2074 "create table %_term(term text, segment integer, doclist blob, "
2075 "primary key(term, segment));");
2076 if( rc!=SQLITE_OK ) goto out;
2077
2078 rc = constructVtab(db, &spec, ppVTab, pzErr);
2079
2080out:
2081 clearTableSpec(&spec);
2082 return rc;
2083}
2084
2085/* Decide how to handle an SQL query. */
2086static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
2087 int i;
2088 TRACE(("FTS1 BestIndex\n"));
2089
2090 for(i=0; i<pInfo->nConstraint; ++i){
2091 const struct sqlite3_index_constraint *pConstraint;
2092 pConstraint = &pInfo->aConstraint[i];
2093 if( pConstraint->usable ) {
2094 if( pConstraint->iColumn==-1 &&
2095 pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
2096 pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */
2097 TRACE(("FTS1 QUERY_ROWID\n"));
2098 } else if( pConstraint->iColumn>=0 &&
2099 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
2100 /* full-text search */
2101 pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
2102 TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
2103 } else continue;
2104
2105 pInfo->aConstraintUsage[i].argvIndex = 1;
2106 pInfo->aConstraintUsage[i].omit = 1;
2107
2108 /* An arbitrary value for now.
2109 * TODO: Perhaps rowid matches should be considered cheaper than
2110 * full-text searches. */
2111 pInfo->estimatedCost = 1.0;
2112
2113 return SQLITE_OK;
2114 }
2115 }
2116 pInfo->idxNum = QUERY_GENERIC;
2117 return SQLITE_OK;
2118}
2119
2120static int fulltextDisconnect(sqlite3_vtab *pVTab){
2121 TRACE(("FTS1 Disconnect %p\n", pVTab));
2122 fulltext_vtab_destroy((fulltext_vtab *)pVTab);
2123 return SQLITE_OK;
2124}
2125
2126static int fulltextDestroy(sqlite3_vtab *pVTab){
2127 fulltext_vtab *v = (fulltext_vtab *)pVTab;
2128 int rc;
2129
2130 TRACE(("FTS1 Destroy %p\n", pVTab));
2131 rc = sql_exec(v->db, v->zDb, v->zName,
2132 "drop table if exists %_content;"
2133 "drop table if exists %_term;"
2134 );
2135 if( rc!=SQLITE_OK ) return rc;
2136
2137 fulltext_vtab_destroy((fulltext_vtab *)pVTab);
2138 return SQLITE_OK;
2139}
2140
2141static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2142 fulltext_cursor *c;
2143
2144 c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
2145 /* sqlite will initialize c->base */
2146 *ppCursor = &c->base;
2147 TRACE(("FTS1 Open %p: %p\n", pVTab, c));
2148
2149 return SQLITE_OK;
2150}
2151
2152
2153/* Free all of the dynamically allocated memory held by *q
2154*/
2155static void queryClear(Query *q){
2156 int i;
2157 for(i = 0; i < q->nTerms; ++i){
2158 free(q->pTerms[i].pTerm);
2159 }
2160 free(q->pTerms);
2161 memset(q, 0, sizeof(*q));
2162}
2163
2164/* Free all of the dynamically allocated memory held by the
2165** Snippet
2166*/
2167static void snippetClear(Snippet *p){
2168 free(p->aMatch);
2169 free(p->zOffset);
2170 free(p->zSnippet);
2171 memset(p, 0, sizeof(*p));
2172}
2173/*
2174** Append a single entry to the p->aMatch[] log.
2175*/
2176static void snippetAppendMatch(
2177 Snippet *p, /* Append the entry to this snippet */
2178 int iCol, int iTerm, /* The column and query term */
2179 int iStart, int nByte /* Offset and size of the match */
2180){
2181 int i;
2182 struct snippetMatch *pMatch;
2183 if( p->nMatch+1>=p->nAlloc ){
2184 p->nAlloc = p->nAlloc*2 + 10;
2185 p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
2186 if( p->aMatch==0 ){
2187 p->nMatch = 0;
2188 p->nAlloc = 0;
2189 return;
2190 }
2191 }
2192 i = p->nMatch++;
2193 pMatch = &p->aMatch[i];
2194 pMatch->iCol = iCol;
2195 pMatch->iTerm = iTerm;
2196 pMatch->iStart = iStart;
2197 pMatch->nByte = nByte;
2198}
2199
2200/*
2201** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
2202*/
2203#define FTS1_ROTOR_SZ (32)
2204#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
2205
2206/*
2207** Add entries to pSnippet->aMatch[] for every match that occurs against
2208** document zDoc[0..nDoc-1] which is stored in column iColumn.
2209*/
2210static void snippetOffsetsOfColumn(
2211 Query *pQuery,
2212 Snippet *pSnippet,
2213 int iColumn,
2214 const char *zDoc,
2215 int nDoc
2216){
2217 const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */
2218 sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */
2219 sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
2220 fulltext_vtab *pVtab; /* The full text index */
2221 int nColumn; /* Number of columns in the index */
2222 const QueryTerm *aTerm; /* Query string terms */
2223 int nTerm; /* Number of query string terms */
2224 int i, j; /* Loop counters */
2225 int rc; /* Return code */
2226 unsigned int match, prevMatch; /* Phrase search bitmasks */
2227 const char *zToken; /* Next token from the tokenizer */
2228 int nToken; /* Size of zToken */
2229 int iBegin, iEnd, iPos; /* Offsets of beginning and end */
2230
2231 /* The following variables keep a circular buffer of the last
2232 ** few tokens */
2233 unsigned int iRotor = 0; /* Index of current token */
2234 int iRotorBegin[FTS1_ROTOR_SZ]; /* Beginning offset of token */
2235 int iRotorLen[FTS1_ROTOR_SZ]; /* Length of token */
2236
2237 pVtab = pQuery->pFts;
2238 nColumn = pVtab->nColumn;
2239 pTokenizer = pVtab->pTokenizer;
2240 pTModule = pTokenizer->pModule;
2241 rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
2242 if( rc ) return;
2243 pTCursor->pTokenizer = pTokenizer;
2244 aTerm = pQuery->pTerms;
2245 nTerm = pQuery->nTerms;
2246 if( nTerm>=FTS1_ROTOR_SZ ){
2247 nTerm = FTS1_ROTOR_SZ - 1;
2248 }
2249 prevMatch = 0;
2250 while(1){
2251 rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
2252 if( rc ) break;
2253 iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
2254 iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
2255 match = 0;
2256 for(i=0; i<nTerm; i++){
2257 int iCol;
2258 iCol = aTerm[i].iColumn;
2259 if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
2260 if( aTerm[i].nTerm!=nToken ) continue;
2261 if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
2262 if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
2263 match |= 1<<i;
2264 if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
2265 for(j=aTerm[i].iPhrase-1; j>=0; j--){
2266 int k = (iRotor-j) & FTS1_ROTOR_MASK;
2267 snippetAppendMatch(pSnippet, iColumn, i-j,
2268 iRotorBegin[k], iRotorLen[k]);
2269 }
2270 }
2271 }
2272 prevMatch = match<<1;
2273 iRotor++;
2274 }
2275 pTModule->xClose(pTCursor);
2276}
2277
2278
2279/*
2280** Compute all offsets for the current row of the query.
2281** If the offsets have already been computed, this routine is a no-op.
2282*/
2283static void snippetAllOffsets(fulltext_cursor *p){
2284 int nColumn;
2285 int iColumn, i;
2286 int iFirst, iLast;
2287 fulltext_vtab *pFts;
2288
2289 if( p->snippet.nMatch ) return;
2290 if( p->q.nTerms==0 ) return;
2291 pFts = p->q.pFts;
2292 nColumn = pFts->nColumn;
2293 iColumn = p->iCursorType - QUERY_FULLTEXT;
2294 if( iColumn<0 || iColumn>=nColumn ){
2295 iFirst = 0;
2296 iLast = nColumn-1;
2297 }else{
2298 iFirst = iColumn;
2299 iLast = iColumn;
2300 }
2301 for(i=iFirst; i<=iLast; i++){
2302 const char *zDoc;
2303 int nDoc;
2304 zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
2305 nDoc = sqlite3_column_bytes(p->pStmt, i+1);
2306 snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
2307 }
2308}
2309
2310/*
2311** Convert the information in the aMatch[] array of the snippet
2312** into the string zOffset[0..nOffset-1].
2313*/
2314static void snippetOffsetText(Snippet *p){
2315 int i;
2316 int cnt = 0;
2317 StringBuffer sb;
2318 char zBuf[200];
2319 if( p->zOffset ) return;
2320 initStringBuffer(&sb);
2321 for(i=0; i<p->nMatch; i++){
2322 struct snippetMatch *pMatch = &p->aMatch[i];
2323 zBuf[0] = ' ';
2324 sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol,
2325 pMatch->iTerm, pMatch->iStart, pMatch->nByte);
2326 append(&sb, zBuf);
2327 cnt++;
2328 }
2329 p->zOffset = sb.s;
2330 p->nOffset = sb.len;
2331}
2332
2333/*
2334** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
2335** of matching words some of which might be in zDoc. zDoc is column
2336** number iCol.
2337**
2338** iBreak is suggested spot in zDoc where we could begin or end an
2339** excerpt. Return a value similar to iBreak but possibly adjusted
2340** to be a little left or right so that the break point is better.
2341*/
2342static int wordBoundary(
2343 int iBreak, /* The suggested break point */
2344 const char *zDoc, /* Document text */
2345 int nDoc, /* Number of bytes in zDoc[] */
2346 struct snippetMatch *aMatch, /* Matching words */
2347 int nMatch, /* Number of entries in aMatch[] */
2348 int iCol /* The column number for zDoc[] */
2349){
2350 int i;
2351 if( iBreak<=10 ){
2352 return 0;
2353 }
2354 if( iBreak>=nDoc-10 ){
2355 return nDoc;
2356 }
2357 for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
2358 while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
2359 if( i<nMatch ){
2360 if( aMatch[i].iStart<iBreak+10 ){
2361 return aMatch[i].iStart;
2362 }
2363 if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
2364 return aMatch[i-1].iStart;
2365 }
2366 }
2367 for(i=1; i<=10; i++){
2368 if( safe_isspace(zDoc[iBreak-i]) ){
2369 return iBreak - i + 1;
2370 }
2371 if( safe_isspace(zDoc[iBreak+i]) ){
2372 return iBreak + i + 1;
2373 }
2374 }
2375 return iBreak;
2376}
2377
2378/*
2379** If the StringBuffer does not end in white space, add a single
2380** space character to the end.
2381*/
2382static void appendWhiteSpace(StringBuffer *p){
2383 if( p->len==0 ) return;
2384 if( safe_isspace(p->s[p->len-1]) ) return;
2385 append(p, " ");
2386}
2387
2388/*
2389** Remove white space from teh end of the StringBuffer
2390*/
2391static void trimWhiteSpace(StringBuffer *p){
2392 while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
2393 p->len--;
2394 }
2395}
2396
2397
2398
2399/*
2400** Allowed values for Snippet.aMatch[].snStatus
2401*/
2402#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
2403#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
2404
2405/*
2406** Generate the text of a snippet.
2407*/
2408static void snippetText(
2409 fulltext_cursor *pCursor, /* The cursor we need the snippet for */
2410 const char *zStartMark, /* Markup to appear before each match */
2411 const char *zEndMark, /* Markup to appear after each match */
2412 const char *zEllipsis /* Ellipsis mark */
2413){
2414 int i, j;
2415 struct snippetMatch *aMatch;
2416 int nMatch;
2417 int nDesired;
2418 StringBuffer sb;
2419 int tailCol;
2420 int tailOffset;
2421 int iCol;
2422 int nDoc;
2423 const char *zDoc;
2424 int iStart, iEnd;
2425 int tailEllipsis = 0;
2426 int iMatch;
2427
2428
2429 free(pCursor->snippet.zSnippet);
2430 pCursor->snippet.zSnippet = 0;
2431 aMatch = pCursor->snippet.aMatch;
2432 nMatch = pCursor->snippet.nMatch;
2433 initStringBuffer(&sb);
2434
2435 for(i=0; i<nMatch; i++){
2436 aMatch[i].snStatus = SNIPPET_IGNORE;
2437 }
2438 nDesired = 0;
2439 for(i=0; i<pCursor->q.nTerms; i++){
2440 for(j=0; j<nMatch; j++){
2441 if( aMatch[j].iTerm==i ){
2442 aMatch[j].snStatus = SNIPPET_DESIRED;
2443 nDesired++;
2444 break;
2445 }
2446 }
2447 }
2448
2449 iMatch = 0;
2450 tailCol = -1;
2451 tailOffset = 0;
2452 for(i=0; i<nMatch && nDesired>0; i++){
2453 if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
2454 nDesired--;
2455 iCol = aMatch[i].iCol;
2456 zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
2457 nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
2458 iStart = aMatch[i].iStart - 40;
2459 iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
2460 if( iStart<=10 ){
2461 iStart = 0;
2462 }
2463 if( iCol==tailCol && iStart<=tailOffset+20 ){
2464 iStart = tailOffset;
2465 }
2466 if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
2467 trimWhiteSpace(&sb);
2468 appendWhiteSpace(&sb);
2469 append(&sb, zEllipsis);
2470 appendWhiteSpace(&sb);
2471 }
2472 iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
2473 iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
2474 if( iEnd>=nDoc-10 ){
2475 iEnd = nDoc;
2476 tailEllipsis = 0;
2477 }else{
2478 tailEllipsis = 1;
2479 }
2480 while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
2481 while( iStart<iEnd ){
2482 while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
2483 && aMatch[iMatch].iCol<=iCol ){
2484 iMatch++;
2485 }
2486 if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
2487 && aMatch[iMatch].iCol==iCol ){
2488 nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
2489 iStart = aMatch[iMatch].iStart;
2490 append(&sb, zStartMark);
2491 nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
2492 append(&sb, zEndMark);
2493 iStart += aMatch[iMatch].nByte;
2494 for(j=iMatch+1; j<nMatch; j++){
2495 if( aMatch[j].iTerm==aMatch[iMatch].iTerm
2496 && aMatch[j].snStatus==SNIPPET_DESIRED ){
2497 nDesired--;
2498 aMatch[j].snStatus = SNIPPET_IGNORE;
2499 }
2500 }
2501 }else{
2502 nappend(&sb, &zDoc[iStart], iEnd - iStart);
2503 iStart = iEnd;
2504 }
2505 }
2506 tailCol = iCol;
2507 tailOffset = iEnd;
2508 }
2509 trimWhiteSpace(&sb);
2510 if( tailEllipsis ){
2511 appendWhiteSpace(&sb);
2512 append(&sb, zEllipsis);
2513 }
2514 pCursor->snippet.zSnippet = sb.s;
2515 pCursor->snippet.nSnippet = sb.len;
2516}
2517
2518
2519/*
2520** Close the cursor. For additional information see the documentation
2521** on the xClose method of the virtual table interface.
2522*/
2523static int fulltextClose(sqlite3_vtab_cursor *pCursor){
2524 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2525 TRACE(("FTS1 Close %p\n", c));
2526 sqlite3_finalize(c->pStmt);
2527 queryClear(&c->q);
2528 snippetClear(&c->snippet);
2529 if( c->result.pDoclist!=NULL ){
2530 docListDelete(c->result.pDoclist);
2531 }
2532 free(c);
2533 return SQLITE_OK;
2534}
2535
2536static int fulltextNext(sqlite3_vtab_cursor *pCursor){
2537 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2538 sqlite_int64 iDocid;
2539 int rc;
2540
2541 TRACE(("FTS1 Next %p\n", pCursor));
2542 snippetClear(&c->snippet);
2543 if( c->iCursorType < QUERY_FULLTEXT ){
2544 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
2545 rc = sqlite3_step(c->pStmt);
2546 switch( rc ){
2547 case SQLITE_ROW:
2548 c->eof = 0;
2549 return SQLITE_OK;
2550 case SQLITE_DONE:
2551 c->eof = 1;
2552 return SQLITE_OK;
2553 default:
2554 c->eof = 1;
2555 return rc;
2556 }
2557 } else { /* full-text query */
2558 rc = sqlite3_reset(c->pStmt);
2559 if( rc!=SQLITE_OK ) return rc;
2560
2561 iDocid = nextDocid(&c->result);
2562 if( iDocid==0 ){
2563 c->eof = 1;
2564 return SQLITE_OK;
2565 }
2566 rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
2567 if( rc!=SQLITE_OK ) return rc;
2568 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
2569 rc = sqlite3_step(c->pStmt);
2570 if( rc==SQLITE_ROW ){ /* the case we expect */
2571 c->eof = 0;
2572 return SQLITE_OK;
2573 }
2574 /* an error occurred; abort */
2575 return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
2576 }
2577}
2578
2579
2580/* Return a DocList corresponding to the query term *pTerm. If *pTerm
2581** is the first term of a phrase query, go ahead and evaluate the phrase
2582** query and return the doclist for the entire phrase query.
2583**
2584** The result is stored in pTerm->doclist.
2585*/
2586static int docListOfTerm(
2587 fulltext_vtab *v, /* The full text index */
2588 int iColumn, /* column to restrict to. No restrition if >=nColumn */
2589 QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */
2590 DocList **ppResult /* Write the result here */
2591){
2592 DocList *pLeft, *pRight, *pNew;
2593 int i, rc;
2594
2595 pLeft = docListNew(DL_POSITIONS);
2596 rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
2597 if( rc ){
2598 docListDelete(pLeft);
2599 return rc;
2600 }
2601 for(i=1; i<=pQTerm->nPhrase; i++){
2602 pRight = docListNew(DL_POSITIONS);
2603 rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
2604 if( rc ){
2605 docListDelete(pLeft);
2606 return rc;
2607 }
2608 pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
2609 docListPhraseMerge(pLeft, pRight, pNew);
2610 docListDelete(pLeft);
2611 docListDelete(pRight);
2612 pLeft = pNew;
2613 }
2614 *ppResult = pLeft;
2615 return SQLITE_OK;
2616}
2617
2618/* Add a new term pTerm[0..nTerm-1] to the query *q.
2619*/
2620static void queryAdd(Query *q, const char *pTerm, int nTerm){
2621 QueryTerm *t;
2622 ++q->nTerms;
2623 q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
2624 if( q->pTerms==0 ){
2625 q->nTerms = 0;
2626 return;
2627 }
2628 t = &q->pTerms[q->nTerms - 1];
2629 memset(t, 0, sizeof(*t));
2630 t->pTerm = malloc(nTerm+1);
2631 memcpy(t->pTerm, pTerm, nTerm);
2632 t->pTerm[nTerm] = 0;
2633 t->nTerm = nTerm;
2634 t->isOr = q->nextIsOr;
2635 q->nextIsOr = 0;
2636 t->iColumn = q->nextColumn;
2637 q->nextColumn = q->dfltColumn;
2638}
2639
2640/*
2641** Check to see if the string zToken[0...nToken-1] matches any
2642** column name in the virtual table. If it does,
2643** return the zero-indexed column number. If not, return -1.
2644*/
2645static int checkColumnSpecifier(
2646 fulltext_vtab *pVtab, /* The virtual table */
2647 const char *zToken, /* Text of the token */
2648 int nToken /* Number of characters in the token */
2649){
2650 int i;
2651 for(i=0; i<pVtab->nColumn; i++){
2652 if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
2653 && pVtab->azColumn[i][nToken]==0 ){
2654 return i;
2655 }
2656 }
2657 return -1;
2658}
2659
2660/*
2661** Parse the text at pSegment[0..nSegment-1]. Add additional terms
2662** to the query being assemblied in pQuery.
2663**
2664** inPhrase is true if pSegment[0..nSegement-1] is contained within
2665** double-quotes. If inPhrase is true, then the first term
2666** is marked with the number of terms in the phrase less one and
2667** OR and "-" syntax is ignored. If inPhrase is false, then every
2668** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
2669*/
2670static int tokenizeSegment(
2671 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */
2672 const char *pSegment, int nSegment, /* Query expression being parsed */
2673 int inPhrase, /* True if within "..." */
2674 Query *pQuery /* Append results here */
2675){
2676 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
2677 sqlite3_tokenizer_cursor *pCursor;
2678 int firstIndex = pQuery->nTerms;
2679 int iCol;
2680 int nTerm = 1;
2681
2682 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
2683 if( rc!=SQLITE_OK ) return rc;
2684 pCursor->pTokenizer = pTokenizer;
2685
2686 while( 1 ){
2687 const char *pToken;
2688 int nToken, iBegin, iEnd, iPos;
2689
2690 rc = pModule->xNext(pCursor,
2691 &pToken, &nToken,
2692 &iBegin, &iEnd, &iPos);
2693 if( rc!=SQLITE_OK ) break;
2694 if( !inPhrase &&
2695 pSegment[iEnd]==':' &&
2696 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
2697 pQuery->nextColumn = iCol;
2698 continue;
2699 }
2700 if( !inPhrase && pQuery->nTerms>0 && nToken==2
2701 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
2702 pQuery->nextIsOr = 1;
2703 continue;
2704 }
2705 queryAdd(pQuery, pToken, nToken);
2706 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
2707 pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
2708 }
2709 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
2710 if( inPhrase ){
2711 nTerm++;
2712 }
2713 }
2714
2715 if( inPhrase && pQuery->nTerms>firstIndex ){
2716 pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
2717 }
2718
2719 return pModule->xClose(pCursor);
2720}
2721
2722/* Parse a query string, yielding a Query object pQuery.
2723**
2724** The calling function will need to queryClear() to clean up
2725** the dynamically allocated memory held by pQuery.
2726*/
2727static int parseQuery(
2728 fulltext_vtab *v, /* The fulltext index */
2729 const char *zInput, /* Input text of the query string */
2730 int nInput, /* Size of the input text */
2731 int dfltColumn, /* Default column of the index to match against */
2732 Query *pQuery /* Write the parse results here. */
2733){
2734 int iInput, inPhrase = 0;
2735
2736 if( zInput==0 ) nInput = 0;
2737 if( nInput<0 ) nInput = strlen(zInput);
2738 pQuery->nTerms = 0;
2739 pQuery->pTerms = NULL;
2740 pQuery->nextIsOr = 0;
2741 pQuery->nextColumn = dfltColumn;
2742 pQuery->dfltColumn = dfltColumn;
2743 pQuery->pFts = v;
2744
2745 for(iInput=0; iInput<nInput; ++iInput){
2746 int i;
2747 for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
2748 if( i>iInput ){
2749 tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
2750 pQuery);
2751 }
2752 iInput = i;
2753 if( i<nInput ){
2754 assert( zInput[i]=='"' );
2755 inPhrase = !inPhrase;
2756 }
2757 }
2758
2759 if( inPhrase ){
2760 /* unmatched quote */
2761 queryClear(pQuery);
2762 return SQLITE_ERROR;
2763 }
2764 return SQLITE_OK;
2765}
2766
2767/* Perform a full-text query using the search expression in
2768** zInput[0..nInput-1]. Return a list of matching documents
2769** in pResult.
2770**
2771** Queries must match column iColumn. Or if iColumn>=nColumn
2772** they are allowed to match against any column.
2773*/
2774static int fulltextQuery(
2775 fulltext_vtab *v, /* The full text index */
2776 int iColumn, /* Match against this column by default */
2777 const char *zInput, /* The query string */
2778 int nInput, /* Number of bytes in zInput[] */
2779 DocList **pResult, /* Write the result doclist here */
2780 Query *pQuery /* Put parsed query string here */
2781){
2782 int i, iNext, rc;
2783 DocList *pLeft = NULL;
2784 DocList *pRight, *pNew, *pOr;
2785 int nNot = 0;
2786 QueryTerm *aTerm;
2787
2788 rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
2789 if( rc!=SQLITE_OK ) return rc;
2790
2791 /* Merge AND terms. */
2792 aTerm = pQuery->pTerms;
2793 for(i = 0; i<pQuery->nTerms; i=iNext){
2794 if( aTerm[i].isNot ){
2795 /* Handle all NOT terms in a separate pass */
2796 nNot++;
2797 iNext = i + aTerm[i].nPhrase+1;
2798 continue;
2799 }
2800 iNext = i + aTerm[i].nPhrase + 1;
2801 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
2802 if( rc ){
2803 queryClear(pQuery);
2804 return rc;
2805 }
2806 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
2807 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
2808 iNext += aTerm[iNext].nPhrase + 1;
2809 if( rc ){
2810 queryClear(pQuery);
2811 return rc;
2812 }
2813 pNew = docListNew(DL_DOCIDS);
2814 docListOrMerge(pRight, pOr, pNew);
2815 docListDelete(pRight);
2816 docListDelete(pOr);
2817 pRight = pNew;
2818 }
2819 if( pLeft==0 ){
2820 pLeft = pRight;
2821 }else{
2822 pNew = docListNew(DL_DOCIDS);
2823 docListAndMerge(pLeft, pRight, pNew);
2824 docListDelete(pRight);
2825 docListDelete(pLeft);
2826 pLeft = pNew;
2827 }
2828 }
2829
2830 if( nNot && pLeft==0 ){
2831 /* We do not yet know how to handle a query of only NOT terms */
2832 return SQLITE_ERROR;
2833 }
2834
2835 /* Do the EXCEPT terms */
2836 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){
2837 if( !aTerm[i].isNot ) continue;
2838 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
2839 if( rc ){
2840 queryClear(pQuery);
2841 docListDelete(pLeft);
2842 return rc;
2843 }
2844 pNew = docListNew(DL_DOCIDS);
2845 docListExceptMerge(pLeft, pRight, pNew);
2846 docListDelete(pRight);
2847 docListDelete(pLeft);
2848 pLeft = pNew;
2849 }
2850
2851 *pResult = pLeft;
2852 return rc;
2853}
2854
2855/*
2856** This is the xFilter interface for the virtual table. See
2857** the virtual table xFilter method documentation for additional
2858** information.
2859**
2860** If idxNum==QUERY_GENERIC then do a full table scan against
2861** the %_content table.
2862**
2863** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
2864** in the %_content table.
2865**
2866** If idxNum>=QUERY_FULLTEXT then use the full text index. The
2867** column on the left-hand side of the MATCH operator is column
2868** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand
2869** side of the MATCH operator.
2870*/
2871/* TODO(shess) Upgrade the cursor initialization and destruction to
2872** account for fulltextFilter() being called multiple times on the
2873** same cursor. The current solution is very fragile. Apply fix to
2874** fts2 as appropriate.
2875*/
2876static int fulltextFilter(
2877 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
2878 int idxNum, const char *idxStr, /* Which indexing scheme to use */
2879 int argc, sqlite3_value **argv /* Arguments for the indexing scheme */
2880){
2881 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2882 fulltext_vtab *v = cursor_vtab(c);
2883 int rc;
2884 char *zSql;
2885
2886 TRACE(("FTS1 Filter %p\n",pCursor));
2887
2888 zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
2889 idxNum==QUERY_GENERIC ? "" : "where rowid=?");
2890 sqlite3_finalize(c->pStmt);
2891 rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
2892 sqlite3_free(zSql);
2893 if( rc!=SQLITE_OK ) return rc;
2894
2895 c->iCursorType = idxNum;
2896 switch( idxNum ){
2897 case QUERY_GENERIC:
2898 break;
2899
2900 case QUERY_ROWID:
2901 rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
2902 if( rc!=SQLITE_OK ) return rc;
2903 break;
2904
2905 default: /* full-text search */
2906 {
2907 const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
2908 DocList *pResult;
2909 assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
2910 assert( argc==1 );
2911 queryClear(&c->q);
2912 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
2913 if( rc!=SQLITE_OK ) return rc;
2914 if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
2915 readerInit(&c->result, pResult);
2916 break;
2917 }
2918 }
2919
2920 return fulltextNext(pCursor);
2921}
2922
2923/* This is the xEof method of the virtual table. The SQLite core
2924** calls this routine to find out if it has reached the end of
2925** a query's results set.
2926*/
2927static int fulltextEof(sqlite3_vtab_cursor *pCursor){
2928 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2929 return c->eof;
2930}
2931
2932/* This is the xColumn method of the virtual table. The SQLite
2933** core calls this method during a query when it needs the value
2934** of a column from the virtual table. This method needs to use
2935** one of the sqlite3_result_*() routines to store the requested
2936** value back in the pContext.
2937*/
2938static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
2939 sqlite3_context *pContext, int idxCol){
2940 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2941 fulltext_vtab *v = cursor_vtab(c);
2942
2943 if( idxCol<v->nColumn ){
2944 sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
2945 sqlite3_result_value(pContext, pVal);
2946 }else if( idxCol==v->nColumn ){
2947 /* The extra column whose name is the same as the table.
2948 ** Return a blob which is a pointer to the cursor
2949 */
2950 sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
2951 }
2952 return SQLITE_OK;
2953}
2954
2955/* This is the xRowid method. The SQLite core calls this routine to
2956** retrive the rowid for the current row of the result set. The
2957** rowid should be written to *pRowid.
2958*/
2959static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
2960 fulltext_cursor *c = (fulltext_cursor *) pCursor;
2961
2962 *pRowid = sqlite3_column_int64(c->pStmt, 0);
2963 return SQLITE_OK;
2964}
2965
2966/* Add all terms in [zText] to the given hash table. If [iColumn] > 0,
2967 * we also store positions and offsets in the hash table using the given
2968 * column number. */
2969static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
2970 const char *zText, int iColumn){
2971 sqlite3_tokenizer *pTokenizer = v->pTokenizer;
2972 sqlite3_tokenizer_cursor *pCursor;
2973 const char *pToken;
2974 int nTokenBytes;
2975 int iStartOffset, iEndOffset, iPosition;
2976 int rc;
2977
2978 rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
2979 if( rc!=SQLITE_OK ) return rc;
2980
2981 pCursor->pTokenizer = pTokenizer;
2982 while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
2983 &pToken, &nTokenBytes,
2984 &iStartOffset, &iEndOffset,
2985 &iPosition) ){
2986 DocList *p;
2987
2988 /* Positions can't be negative; we use -1 as a terminator internally. */
2989 if( iPosition<0 ){
2990 pTokenizer->pModule->xClose(pCursor);
2991 return SQLITE_ERROR;
2992 }
2993
2994 p = fts1HashFind(terms, pToken, nTokenBytes);
2995 if( p==NULL ){
2996 p = docListNew(DL_DEFAULT);
2997 docListAddDocid(p, iDocid);
2998 fts1HashInsert(terms, pToken, nTokenBytes, p);
2999 }
3000 if( iColumn>=0 ){
3001 docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
3002 }
3003 }
3004
3005 /* TODO(shess) Check return? Should this be able to cause errors at
3006 ** this point? Actually, same question about sqlite3_finalize(),
3007 ** though one could argue that failure there means that the data is
3008 ** not durable. *ponder*
3009 */
3010 pTokenizer->pModule->xClose(pCursor);
3011 return rc;
3012}
3013
3014/* Update the %_terms table to map the term [pTerm] to the given rowid. */
3015static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
3016 DocList *d){
3017 sqlite_int64 iIndexRow;
3018 DocList doclist;
3019 int iSegment = 0, rc;
3020
3021 rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
3022 if( rc==SQLITE_DONE ){
3023 docListInit(&doclist, DL_DEFAULT, 0, 0);
3024 docListUpdate(&doclist, d);
3025 /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
3026 rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
3027 goto err;
3028 }
3029 if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
3030
3031 docListUpdate(&doclist, d);
3032 if( doclist.nData<=CHUNK_MAX ){
3033 rc = term_update(v, iIndexRow, &doclist);
3034 goto err;
3035 }
3036
3037 /* Doclist doesn't fit, delete what's there, and accumulate
3038 ** forward.
3039 */
3040 rc = term_delete(v, iIndexRow);
3041 if( rc!=SQLITE_OK ) goto err;
3042
3043 /* Try to insert the doclist into a higher segment bucket. On
3044 ** failure, accumulate existing doclist with the doclist from that
3045 ** bucket, and put results in the next bucket.
3046 */
3047 iSegment++;
3048 while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
3049 &doclist))!=SQLITE_OK ){
3050 sqlite_int64 iSegmentRow;
3051 DocList old;
3052 int rc2;
3053
3054 /* Retain old error in case the term_insert() error was really an
3055 ** error rather than a bounced insert.
3056 */
3057 rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
3058 if( rc2!=SQLITE_ROW ) goto err;
3059
3060 rc = term_delete(v, iSegmentRow);
3061 if( rc!=SQLITE_OK ) goto err;
3062
3063 /* Reusing lowest-number deleted row keeps the index smaller. */
3064 if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
3065
3066 /* doclist contains the newer data, so accumulate it over old.
3067 ** Then steal accumulated data for doclist.
3068 */
3069 docListAccumulate(&old, &doclist);
3070 docListDestroy(&doclist);
3071 doclist = old;
3072
3073 iSegment++;
3074 }
3075
3076 err:
3077 docListDestroy(&doclist);
3078 return rc;
3079}
3080
3081/* Add doclists for all terms in [pValues] to the hash table [terms]. */
3082static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
3083 sqlite3_value **pValues){
3084 int i;
3085 for(i = 0; i < v->nColumn ; ++i){
3086 char *zText = (char*)sqlite3_value_text(pValues[i]);
3087 int rc = buildTerms(v, terms, iRowid, zText, i);
3088 if( rc!=SQLITE_OK ) return rc;
3089 }
3090 return SQLITE_OK;
3091}
3092
3093/* Add empty doclists for all terms in the given row's content to the hash
3094 * table [pTerms]. */
3095static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
3096 const char **pValues;
3097 int i;
3098
3099 int rc = content_select(v, iRowid, &pValues);
3100 if( rc!=SQLITE_OK ) return rc;
3101
3102 for(i = 0 ; i < v->nColumn; ++i) {
3103 rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
3104 if( rc!=SQLITE_OK ) break;
3105 }
3106
3107 freeStringArray(v->nColumn, pValues);
3108 return SQLITE_OK;
3109}
3110
3111/* Insert a row into the %_content table; set *piRowid to be the ID of the
3112 * new row. Fill [pTerms] with new doclists for the %_term table. */
3113static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
3114 sqlite3_value **pValues,
3115 sqlite_int64 *piRowid, fts1Hash *pTerms){
3116 int rc;
3117
3118 rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */
3119 if( rc!=SQLITE_OK ) return rc;
3120 *piRowid = sqlite3_last_insert_rowid(v->db);
3121 return insertTerms(v, pTerms, *piRowid, pValues);
3122}
3123
3124/* Delete a row from the %_content table; fill [pTerms] with empty doclists
3125 * to be written to the %_term table. */
3126static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
3127 int rc = deleteTerms(v, pTerms, iRow);
3128 if( rc!=SQLITE_OK ) return rc;
3129 return content_delete(v, iRow); /* execute an SQL DELETE */
3130}
3131
3132/* Update a row in the %_content table; fill [pTerms] with new doclists for the
3133 * %_term table. */
3134static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
3135 sqlite3_value **pValues, fts1Hash *pTerms){
3136 /* Generate an empty doclist for each term that previously appeared in this
3137 * row. */
3138 int rc = deleteTerms(v, pTerms, iRow);
3139 if( rc!=SQLITE_OK ) return rc;
3140
3141 rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */
3142 if( rc!=SQLITE_OK ) return rc;
3143
3144 /* Now add positions for terms which appear in the updated row. */
3145 return insertTerms(v, pTerms, iRow, pValues);
3146}
3147
3148/* This function implements the xUpdate callback; it's the top-level entry
3149 * point for inserting, deleting or updating a row in a full-text table. */
3150static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
3151 sqlite_int64 *pRowid){
3152 fulltext_vtab *v = (fulltext_vtab *) pVtab;
3153 fts1Hash terms; /* maps term string -> PosList */
3154 int rc;
3155 fts1HashElem *e;
3156
3157 TRACE(("FTS1 Update %p\n", pVtab));
3158
3159 fts1HashInit(&terms, FTS1_HASH_STRING, 1);
3160
3161 if( nArg<2 ){
3162 rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
3163 } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
3164 /* An update:
3165 * ppArg[0] = old rowid
3166 * ppArg[1] = new rowid
3167 * ppArg[2..2+v->nColumn-1] = values
3168 * ppArg[2+v->nColumn] = value for magic column (we ignore this)
3169 */
3170 sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
3171 if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
3172 sqlite3_value_int64(ppArg[1]) != rowid ){
3173 rc = SQLITE_ERROR; /* we don't allow changing the rowid */
3174 } else {
3175 assert( nArg==2+v->nColumn+1);
3176 rc = index_update(v, rowid, &ppArg[2], &terms);
3177 }
3178 } else {
3179 /* An insert:
3180 * ppArg[1] = requested rowid
3181 * ppArg[2..2+v->nColumn-1] = values
3182 * ppArg[2+v->nColumn] = value for magic column (we ignore this)
3183 */
3184 assert( nArg==2+v->nColumn+1);
3185 rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
3186 }
3187
3188 if( rc==SQLITE_OK ){
3189 /* Write updated doclists to disk. */
3190 for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
3191 DocList *p = fts1HashData(e);
3192 rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
3193 if( rc!=SQLITE_OK ) break;
3194 }
3195 }
3196
3197 /* clean up */
3198 for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
3199 DocList *p = fts1HashData(e);
3200 docListDelete(p);
3201 }
3202 fts1HashClear(&terms);
3203
3204 return rc;
3205}
3206
3207/*
3208** Implementation of the snippet() function for FTS1
3209*/
3210static void snippetFunc(
3211 sqlite3_context *pContext,
3212 int argc,
3213 sqlite3_value **argv
3214){
3215 fulltext_cursor *pCursor;
3216 if( argc<1 ) return;
3217 if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
3218 sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
3219 sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
3220 }else{
3221 const char *zStart = "<b>";
3222 const char *zEnd = "</b>";
3223 const char *zEllipsis = "<b>...</b>";
3224 memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
3225 if( argc>=2 ){
3226 zStart = (const char*)sqlite3_value_text(argv[1]);
3227 if( argc>=3 ){
3228 zEnd = (const char*)sqlite3_value_text(argv[2]);
3229 if( argc>=4 ){
3230 zEllipsis = (const char*)sqlite3_value_text(argv[3]);
3231 }
3232 }
3233 }
3234 snippetAllOffsets(pCursor);
3235 snippetText(pCursor, zStart, zEnd, zEllipsis);
3236 sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
3237 pCursor->snippet.nSnippet, SQLITE_STATIC);
3238 }
3239}
3240
3241/*
3242** Implementation of the offsets() function for FTS1
3243*/
3244static void snippetOffsetsFunc(
3245 sqlite3_context *pContext,
3246 int argc,
3247 sqlite3_value **argv
3248){
3249 fulltext_cursor *pCursor;
3250 if( argc<1 ) return;
3251 if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
3252 sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
3253 sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
3254 }else{
3255 memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
3256 snippetAllOffsets(pCursor);
3257 snippetOffsetText(&pCursor->snippet);
3258 sqlite3_result_text(pContext,
3259 pCursor->snippet.zOffset, pCursor->snippet.nOffset,
3260 SQLITE_STATIC);
3261 }
3262}
3263
3264/*
3265** This routine implements the xFindFunction method for the FTS1
3266** virtual table.
3267*/
3268static int fulltextFindFunction(
3269 sqlite3_vtab *pVtab,
3270 int nArg,
3271 const char *zName,
3272 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
3273 void **ppArg
3274){
3275 if( strcmp(zName,"snippet")==0 ){
3276 *pxFunc = snippetFunc;
3277 return 1;
3278 }else if( strcmp(zName,"offsets")==0 ){
3279 *pxFunc = snippetOffsetsFunc;
3280 return 1;
3281 }
3282 return 0;
3283}
3284
3285/*
3286** Rename an fts1 table.
3287*/
3288static int fulltextRename(
3289 sqlite3_vtab *pVtab,
3290 const char *zName
3291){
3292 fulltext_vtab *p = (fulltext_vtab *)pVtab;
3293 int rc = SQLITE_NOMEM;
3294 char *zSql = sqlite3_mprintf(
3295 "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';"
3296 "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
3297 , p->zDb, p->zName, zName
3298 , p->zDb, p->zName, zName
3299 );
3300 if( zSql ){
3301 rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
3302 sqlite3_free(zSql);
3303 }
3304 return rc;
3305}
3306
3307static const sqlite3_module fulltextModule = {
3308 /* iVersion */ 0,
3309 /* xCreate */ fulltextCreate,
3310 /* xConnect */ fulltextConnect,
3311 /* xBestIndex */ fulltextBestIndex,
3312 /* xDisconnect */ fulltextDisconnect,
3313 /* xDestroy */ fulltextDestroy,
3314 /* xOpen */ fulltextOpen,
3315 /* xClose */ fulltextClose,
3316 /* xFilter */ fulltextFilter,
3317 /* xNext */ fulltextNext,
3318 /* xEof */ fulltextEof,
3319 /* xColumn */ fulltextColumn,
3320 /* xRowid */ fulltextRowid,
3321 /* xUpdate */ fulltextUpdate,
3322 /* xBegin */ 0,
3323 /* xSync */ 0,
3324 /* xCommit */ 0,
3325 /* xRollback */ 0,
3326 /* xFindFunction */ fulltextFindFunction,
3327 /* xRename */ fulltextRename,
3328};
3329
3330int sqlite3Fts1Init(sqlite3 *db){
3331 sqlite3_overload_function(db, "snippet", -1);
3332 sqlite3_overload_function(db, "offsets", -1);
3333 return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
3334}
3335
3336#if !SQLITE_CORE
3337int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
3338 const sqlite3_api_routines *pApi){
3339 SQLITE_EXTENSION_INIT2(pApi)
3340 return sqlite3Fts1Init(db);
3341}
3342#endif
3343
3344#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */