diff options
Diffstat (limited to 'libraries/sqlite/win32/btree.c')
-rwxr-xr-x | libraries/sqlite/win32/btree.c | 6890 |
1 files changed, 6890 insertions, 0 deletions
diff --git a/libraries/sqlite/win32/btree.c b/libraries/sqlite/win32/btree.c new file mode 100755 index 0000000..de8821a --- /dev/null +++ b/libraries/sqlite/win32/btree.c | |||
@@ -0,0 +1,6890 @@ | |||
1 | /* | ||
2 | ** 2004 April 6 | ||
3 | ** | ||
4 | ** The author disclaims copyright to this source code. In place of | ||
5 | ** a legal notice, here is a blessing: | ||
6 | ** | ||
7 | ** May you do good and not evil. | ||
8 | ** May you find forgiveness for yourself and forgive others. | ||
9 | ** May you share freely, never taking more than you give. | ||
10 | ** | ||
11 | ************************************************************************* | ||
12 | ** $Id: btree.c,v 1.428 2007/10/03 08:46:44 danielk1977 Exp $ | ||
13 | ** | ||
14 | ** This file implements a external (disk-based) database using BTrees. | ||
15 | ** See the header comment on "btreeInt.h" for additional information. | ||
16 | ** Including a description of file format and an overview of operation. | ||
17 | */ | ||
18 | #include "btreeInt.h" | ||
19 | |||
20 | /* | ||
21 | ** The header string that appears at the beginning of every | ||
22 | ** SQLite database. | ||
23 | */ | ||
24 | static const char zMagicHeader[] = SQLITE_FILE_HEADER; | ||
25 | |||
26 | /* | ||
27 | ** Set this global variable to 1 to enable tracing using the TRACE | ||
28 | ** macro. | ||
29 | */ | ||
30 | #if SQLITE_TEST | ||
31 | int sqlite3_btree_trace=0; /* True to enable tracing */ | ||
32 | #endif | ||
33 | |||
34 | |||
35 | |||
36 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
37 | /* | ||
38 | ** A flag to indicate whether or not shared cache is enabled. Also, | ||
39 | ** a list of BtShared objects that are eligible for participation | ||
40 | ** in shared cache. The variables have file scope during normal builds, | ||
41 | ** but the test harness needs to access these variables so we make them | ||
42 | ** global for test builds. | ||
43 | */ | ||
44 | #ifdef SQLITE_TEST | ||
45 | BtShared *sqlite3SharedCacheList = 0; | ||
46 | int sqlite3SharedCacheEnabled = 0; | ||
47 | #else | ||
48 | static BtShared *sqlite3SharedCacheList = 0; | ||
49 | static int sqlite3SharedCacheEnabled = 0; | ||
50 | #endif | ||
51 | #endif /* SQLITE_OMIT_SHARED_CACHE */ | ||
52 | |||
53 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
54 | /* | ||
55 | ** Enable or disable the shared pager and schema features. | ||
56 | ** | ||
57 | ** This routine has no effect on existing database connections. | ||
58 | ** The shared cache setting effects only future calls to | ||
59 | ** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). | ||
60 | */ | ||
61 | int sqlite3_enable_shared_cache(int enable){ | ||
62 | sqlite3SharedCacheEnabled = enable; | ||
63 | return SQLITE_OK; | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | |||
68 | /* | ||
69 | ** Forward declaration | ||
70 | */ | ||
71 | static int checkReadLocks(Btree*,Pgno,BtCursor*); | ||
72 | |||
73 | |||
74 | #ifdef SQLITE_OMIT_SHARED_CACHE | ||
75 | /* | ||
76 | ** The functions queryTableLock(), lockTable() and unlockAllTables() | ||
77 | ** manipulate entries in the BtShared.pLock linked list used to store | ||
78 | ** shared-cache table level locks. If the library is compiled with the | ||
79 | ** shared-cache feature disabled, then there is only ever one user | ||
80 | ** of each BtShared structure and so this locking is not necessary. | ||
81 | ** So define the lock related functions as no-ops. | ||
82 | */ | ||
83 | #define queryTableLock(a,b,c) SQLITE_OK | ||
84 | #define lockTable(a,b,c) SQLITE_OK | ||
85 | #define unlockAllTables(a) | ||
86 | #endif | ||
87 | |||
88 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
89 | /* | ||
90 | ** Query to see if btree handle p may obtain a lock of type eLock | ||
91 | ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return | ||
92 | ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or | ||
93 | ** SQLITE_LOCKED if not. | ||
94 | */ | ||
95 | static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){ | ||
96 | BtShared *pBt = p->pBt; | ||
97 | BtLock *pIter; | ||
98 | |||
99 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
100 | |||
101 | /* This is a no-op if the shared-cache is not enabled */ | ||
102 | if( !p->sharable ){ | ||
103 | return SQLITE_OK; | ||
104 | } | ||
105 | |||
106 | /* This (along with lockTable()) is where the ReadUncommitted flag is | ||
107 | ** dealt with. If the caller is querying for a read-lock and the flag is | ||
108 | ** set, it is unconditionally granted - even if there are write-locks | ||
109 | ** on the table. If a write-lock is requested, the ReadUncommitted flag | ||
110 | ** is not considered. | ||
111 | ** | ||
112 | ** In function lockTable(), if a read-lock is demanded and the | ||
113 | ** ReadUncommitted flag is set, no entry is added to the locks list | ||
114 | ** (BtShared.pLock). | ||
115 | ** | ||
116 | ** To summarize: If the ReadUncommitted flag is set, then read cursors do | ||
117 | ** not create or respect table locks. The locking procedure for a | ||
118 | ** write-cursor does not change. | ||
119 | */ | ||
120 | if( | ||
121 | !p->pSqlite || | ||
122 | 0==(p->pSqlite->flags&SQLITE_ReadUncommitted) || | ||
123 | eLock==WRITE_LOCK || | ||
124 | iTab==MASTER_ROOT | ||
125 | ){ | ||
126 | for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ | ||
127 | if( pIter->pBtree!=p && pIter->iTable==iTab && | ||
128 | (pIter->eLock!=eLock || eLock!=READ_LOCK) ){ | ||
129 | return SQLITE_LOCKED; | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | return SQLITE_OK; | ||
134 | } | ||
135 | #endif /* !SQLITE_OMIT_SHARED_CACHE */ | ||
136 | |||
137 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
138 | /* | ||
139 | ** Add a lock on the table with root-page iTable to the shared-btree used | ||
140 | ** by Btree handle p. Parameter eLock must be either READ_LOCK or | ||
141 | ** WRITE_LOCK. | ||
142 | ** | ||
143 | ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and | ||
144 | ** SQLITE_NOMEM may also be returned. | ||
145 | */ | ||
146 | static int lockTable(Btree *p, Pgno iTable, u8 eLock){ | ||
147 | BtShared *pBt = p->pBt; | ||
148 | BtLock *pLock = 0; | ||
149 | BtLock *pIter; | ||
150 | |||
151 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
152 | |||
153 | /* This is a no-op if the shared-cache is not enabled */ | ||
154 | if( !p->sharable ){ | ||
155 | return SQLITE_OK; | ||
156 | } | ||
157 | |||
158 | assert( SQLITE_OK==queryTableLock(p, iTable, eLock) ); | ||
159 | |||
160 | /* If the read-uncommitted flag is set and a read-lock is requested, | ||
161 | ** return early without adding an entry to the BtShared.pLock list. See | ||
162 | ** comment in function queryTableLock() for more info on handling | ||
163 | ** the ReadUncommitted flag. | ||
164 | */ | ||
165 | if( | ||
166 | (p->pSqlite) && | ||
167 | (p->pSqlite->flags&SQLITE_ReadUncommitted) && | ||
168 | (eLock==READ_LOCK) && | ||
169 | iTable!=MASTER_ROOT | ||
170 | ){ | ||
171 | return SQLITE_OK; | ||
172 | } | ||
173 | |||
174 | /* First search the list for an existing lock on this table. */ | ||
175 | for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ | ||
176 | if( pIter->iTable==iTable && pIter->pBtree==p ){ | ||
177 | pLock = pIter; | ||
178 | break; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* If the above search did not find a BtLock struct associating Btree p | ||
183 | ** with table iTable, allocate one and link it into the list. | ||
184 | */ | ||
185 | if( !pLock ){ | ||
186 | pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); | ||
187 | if( !pLock ){ | ||
188 | return SQLITE_NOMEM; | ||
189 | } | ||
190 | pLock->iTable = iTable; | ||
191 | pLock->pBtree = p; | ||
192 | pLock->pNext = pBt->pLock; | ||
193 | pBt->pLock = pLock; | ||
194 | } | ||
195 | |||
196 | /* Set the BtLock.eLock variable to the maximum of the current lock | ||
197 | ** and the requested lock. This means if a write-lock was already held | ||
198 | ** and a read-lock requested, we don't incorrectly downgrade the lock. | ||
199 | */ | ||
200 | assert( WRITE_LOCK>READ_LOCK ); | ||
201 | if( eLock>pLock->eLock ){ | ||
202 | pLock->eLock = eLock; | ||
203 | } | ||
204 | |||
205 | return SQLITE_OK; | ||
206 | } | ||
207 | #endif /* !SQLITE_OMIT_SHARED_CACHE */ | ||
208 | |||
209 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
210 | /* | ||
211 | ** Release all the table locks (locks obtained via calls to the lockTable() | ||
212 | ** procedure) held by Btree handle p. | ||
213 | */ | ||
214 | static void unlockAllTables(Btree *p){ | ||
215 | BtLock **ppIter = &p->pBt->pLock; | ||
216 | |||
217 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
218 | assert( p->sharable || 0==*ppIter ); | ||
219 | |||
220 | while( *ppIter ){ | ||
221 | BtLock *pLock = *ppIter; | ||
222 | if( pLock->pBtree==p ){ | ||
223 | *ppIter = pLock->pNext; | ||
224 | sqlite3_free(pLock); | ||
225 | }else{ | ||
226 | ppIter = &pLock->pNext; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | #endif /* SQLITE_OMIT_SHARED_CACHE */ | ||
231 | |||
232 | static void releasePage(MemPage *pPage); /* Forward reference */ | ||
233 | |||
234 | /* | ||
235 | ** Verify that the cursor holds a mutex on the BtShared | ||
236 | */ | ||
237 | #ifndef NDEBUG | ||
238 | static int cursorHoldsMutex(BtCursor *p){ | ||
239 | return sqlite3_mutex_held(p->pBt->mutex); | ||
240 | } | ||
241 | #endif | ||
242 | |||
243 | |||
244 | #ifndef SQLITE_OMIT_INCRBLOB | ||
245 | /* | ||
246 | ** Invalidate the overflow page-list cache for cursor pCur, if any. | ||
247 | */ | ||
248 | static void invalidateOverflowCache(BtCursor *pCur){ | ||
249 | assert( cursorHoldsMutex(pCur) ); | ||
250 | sqlite3_free(pCur->aOverflow); | ||
251 | pCur->aOverflow = 0; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | ** Invalidate the overflow page-list cache for all cursors opened | ||
256 | ** on the shared btree structure pBt. | ||
257 | */ | ||
258 | static void invalidateAllOverflowCache(BtShared *pBt){ | ||
259 | BtCursor *p; | ||
260 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
261 | for(p=pBt->pCursor; p; p=p->pNext){ | ||
262 | invalidateOverflowCache(p); | ||
263 | } | ||
264 | } | ||
265 | #else | ||
266 | #define invalidateOverflowCache(x) | ||
267 | #define invalidateAllOverflowCache(x) | ||
268 | #endif | ||
269 | |||
270 | /* | ||
271 | ** Save the current cursor position in the variables BtCursor.nKey | ||
272 | ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. | ||
273 | */ | ||
274 | static int saveCursorPosition(BtCursor *pCur){ | ||
275 | int rc; | ||
276 | |||
277 | assert( CURSOR_VALID==pCur->eState ); | ||
278 | assert( 0==pCur->pKey ); | ||
279 | assert( cursorHoldsMutex(pCur) ); | ||
280 | |||
281 | rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); | ||
282 | |||
283 | /* If this is an intKey table, then the above call to BtreeKeySize() | ||
284 | ** stores the integer key in pCur->nKey. In this case this value is | ||
285 | ** all that is required. Otherwise, if pCur is not open on an intKey | ||
286 | ** table, then malloc space for and store the pCur->nKey bytes of key | ||
287 | ** data. | ||
288 | */ | ||
289 | if( rc==SQLITE_OK && 0==pCur->pPage->intKey){ | ||
290 | void *pKey = sqlite3_malloc(pCur->nKey); | ||
291 | if( pKey ){ | ||
292 | rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey); | ||
293 | if( rc==SQLITE_OK ){ | ||
294 | pCur->pKey = pKey; | ||
295 | }else{ | ||
296 | sqlite3_free(pKey); | ||
297 | } | ||
298 | }else{ | ||
299 | rc = SQLITE_NOMEM; | ||
300 | } | ||
301 | } | ||
302 | assert( !pCur->pPage->intKey || !pCur->pKey ); | ||
303 | |||
304 | if( rc==SQLITE_OK ){ | ||
305 | releasePage(pCur->pPage); | ||
306 | pCur->pPage = 0; | ||
307 | pCur->eState = CURSOR_REQUIRESEEK; | ||
308 | } | ||
309 | |||
310 | invalidateOverflowCache(pCur); | ||
311 | return rc; | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | ** Save the positions of all cursors except pExcept open on the table | ||
316 | ** with root-page iRoot. Usually, this is called just before cursor | ||
317 | ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()). | ||
318 | */ | ||
319 | static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ | ||
320 | BtCursor *p; | ||
321 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
322 | assert( pExcept==0 || pExcept->pBt==pBt ); | ||
323 | for(p=pBt->pCursor; p; p=p->pNext){ | ||
324 | if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) && | ||
325 | p->eState==CURSOR_VALID ){ | ||
326 | int rc = saveCursorPosition(p); | ||
327 | if( SQLITE_OK!=rc ){ | ||
328 | return rc; | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | return SQLITE_OK; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | ** Clear the current cursor position. | ||
337 | */ | ||
338 | static void clearCursorPosition(BtCursor *pCur){ | ||
339 | assert( cursorHoldsMutex(pCur) ); | ||
340 | sqlite3_free(pCur->pKey); | ||
341 | pCur->pKey = 0; | ||
342 | pCur->eState = CURSOR_INVALID; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | ** Restore the cursor to the position it was in (or as close to as possible) | ||
347 | ** when saveCursorPosition() was called. Note that this call deletes the | ||
348 | ** saved position info stored by saveCursorPosition(), so there can be | ||
349 | ** at most one effective restoreOrClearCursorPosition() call after each | ||
350 | ** saveCursorPosition(). | ||
351 | ** | ||
352 | ** If the second argument argument - doSeek - is false, then instead of | ||
353 | ** returning the cursor to it's saved position, any saved position is deleted | ||
354 | ** and the cursor state set to CURSOR_INVALID. | ||
355 | */ | ||
356 | int sqlite3BtreeRestoreOrClearCursorPosition(BtCursor *pCur){ | ||
357 | int rc; | ||
358 | assert( cursorHoldsMutex(pCur) ); | ||
359 | assert( pCur->eState>=CURSOR_REQUIRESEEK ); | ||
360 | if( pCur->eState==CURSOR_FAULT ){ | ||
361 | return pCur->skip; | ||
362 | } | ||
363 | #ifndef SQLITE_OMIT_INCRBLOB | ||
364 | if( pCur->isIncrblobHandle ){ | ||
365 | return SQLITE_ABORT; | ||
366 | } | ||
367 | #endif | ||
368 | pCur->eState = CURSOR_INVALID; | ||
369 | rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skip); | ||
370 | if( rc==SQLITE_OK ){ | ||
371 | sqlite3_free(pCur->pKey); | ||
372 | pCur->pKey = 0; | ||
373 | assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); | ||
374 | } | ||
375 | return rc; | ||
376 | } | ||
377 | |||
378 | #define restoreOrClearCursorPosition(p) \ | ||
379 | (p->eState>=CURSOR_REQUIRESEEK ? \ | ||
380 | sqlite3BtreeRestoreOrClearCursorPosition(p) : \ | ||
381 | SQLITE_OK) | ||
382 | |||
383 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
384 | /* | ||
385 | ** Given a page number of a regular database page, return the page | ||
386 | ** number for the pointer-map page that contains the entry for the | ||
387 | ** input page number. | ||
388 | */ | ||
389 | static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ | ||
390 | int nPagesPerMapPage, iPtrMap, ret; | ||
391 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
392 | nPagesPerMapPage = (pBt->usableSize/5)+1; | ||
393 | iPtrMap = (pgno-2)/nPagesPerMapPage; | ||
394 | ret = (iPtrMap*nPagesPerMapPage) + 2; | ||
395 | if( ret==PENDING_BYTE_PAGE(pBt) ){ | ||
396 | ret++; | ||
397 | } | ||
398 | return ret; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | ** Write an entry into the pointer map. | ||
403 | ** | ||
404 | ** This routine updates the pointer map entry for page number 'key' | ||
405 | ** so that it maps to type 'eType' and parent page number 'pgno'. | ||
406 | ** An error code is returned if something goes wrong, otherwise SQLITE_OK. | ||
407 | */ | ||
408 | static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){ | ||
409 | DbPage *pDbPage; /* The pointer map page */ | ||
410 | u8 *pPtrmap; /* The pointer map data */ | ||
411 | Pgno iPtrmap; /* The pointer map page number */ | ||
412 | int offset; /* Offset in pointer map page */ | ||
413 | int rc; | ||
414 | |||
415 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
416 | /* The master-journal page number must never be used as a pointer map page */ | ||
417 | assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); | ||
418 | |||
419 | assert( pBt->autoVacuum ); | ||
420 | if( key==0 ){ | ||
421 | return SQLITE_CORRUPT_BKPT; | ||
422 | } | ||
423 | iPtrmap = PTRMAP_PAGENO(pBt, key); | ||
424 | rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); | ||
425 | if( rc!=SQLITE_OK ){ | ||
426 | return rc; | ||
427 | } | ||
428 | offset = PTRMAP_PTROFFSET(pBt, key); | ||
429 | pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); | ||
430 | |||
431 | if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ | ||
432 | TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); | ||
433 | rc = sqlite3PagerWrite(pDbPage); | ||
434 | if( rc==SQLITE_OK ){ | ||
435 | pPtrmap[offset] = eType; | ||
436 | put4byte(&pPtrmap[offset+1], parent); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | sqlite3PagerUnref(pDbPage); | ||
441 | return rc; | ||
442 | } | ||
443 | |||
444 | /* | ||
445 | ** Read an entry from the pointer map. | ||
446 | ** | ||
447 | ** This routine retrieves the pointer map entry for page 'key', writing | ||
448 | ** the type and parent page number to *pEType and *pPgno respectively. | ||
449 | ** An error code is returned if something goes wrong, otherwise SQLITE_OK. | ||
450 | */ | ||
451 | static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ | ||
452 | DbPage *pDbPage; /* The pointer map page */ | ||
453 | int iPtrmap; /* Pointer map page index */ | ||
454 | u8 *pPtrmap; /* Pointer map page data */ | ||
455 | int offset; /* Offset of entry in pointer map */ | ||
456 | int rc; | ||
457 | |||
458 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
459 | |||
460 | iPtrmap = PTRMAP_PAGENO(pBt, key); | ||
461 | rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); | ||
462 | if( rc!=0 ){ | ||
463 | return rc; | ||
464 | } | ||
465 | pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); | ||
466 | |||
467 | offset = PTRMAP_PTROFFSET(pBt, key); | ||
468 | assert( pEType!=0 ); | ||
469 | *pEType = pPtrmap[offset]; | ||
470 | if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); | ||
471 | |||
472 | sqlite3PagerUnref(pDbPage); | ||
473 | if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT; | ||
474 | return SQLITE_OK; | ||
475 | } | ||
476 | |||
477 | #endif /* SQLITE_OMIT_AUTOVACUUM */ | ||
478 | |||
479 | /* | ||
480 | ** Given a btree page and a cell index (0 means the first cell on | ||
481 | ** the page, 1 means the second cell, and so forth) return a pointer | ||
482 | ** to the cell content. | ||
483 | ** | ||
484 | ** This routine works only for pages that do not contain overflow cells. | ||
485 | */ | ||
486 | #define findCell(pPage, iCell) \ | ||
487 | ((pPage)->aData + get2byte(&(pPage)->aData[(pPage)->cellOffset+2*(iCell)])) | ||
488 | #ifdef SQLITE_TEST | ||
489 | u8 *sqlite3BtreeFindCell(MemPage *pPage, int iCell){ | ||
490 | assert( iCell>=0 ); | ||
491 | assert( iCell<get2byte(&pPage->aData[pPage->hdrOffset+3]) ); | ||
492 | return findCell(pPage, iCell); | ||
493 | } | ||
494 | #endif | ||
495 | |||
496 | /* | ||
497 | ** This a more complex version of sqlite3BtreeFindCell() that works for | ||
498 | ** pages that do contain overflow cells. See insert | ||
499 | */ | ||
500 | static u8 *findOverflowCell(MemPage *pPage, int iCell){ | ||
501 | int i; | ||
502 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
503 | for(i=pPage->nOverflow-1; i>=0; i--){ | ||
504 | int k; | ||
505 | struct _OvflCell *pOvfl; | ||
506 | pOvfl = &pPage->aOvfl[i]; | ||
507 | k = pOvfl->idx; | ||
508 | if( k<=iCell ){ | ||
509 | if( k==iCell ){ | ||
510 | return pOvfl->pCell; | ||
511 | } | ||
512 | iCell--; | ||
513 | } | ||
514 | } | ||
515 | return findCell(pPage, iCell); | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | ** Parse a cell content block and fill in the CellInfo structure. There | ||
520 | ** are two versions of this function. sqlite3BtreeParseCell() takes a | ||
521 | ** cell index as the second argument and sqlite3BtreeParseCellPtr() | ||
522 | ** takes a pointer to the body of the cell as its second argument. | ||
523 | ** | ||
524 | ** Within this file, the parseCell() macro can be called instead of | ||
525 | ** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster. | ||
526 | */ | ||
527 | void sqlite3BtreeParseCellPtr( | ||
528 | MemPage *pPage, /* Page containing the cell */ | ||
529 | u8 *pCell, /* Pointer to the cell text. */ | ||
530 | CellInfo *pInfo /* Fill in this structure */ | ||
531 | ){ | ||
532 | int n; /* Number bytes in cell content header */ | ||
533 | u32 nPayload; /* Number of bytes of cell payload */ | ||
534 | |||
535 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
536 | |||
537 | pInfo->pCell = pCell; | ||
538 | assert( pPage->leaf==0 || pPage->leaf==1 ); | ||
539 | n = pPage->childPtrSize; | ||
540 | assert( n==4-4*pPage->leaf ); | ||
541 | if( pPage->hasData ){ | ||
542 | n += getVarint32(&pCell[n], &nPayload); | ||
543 | }else{ | ||
544 | nPayload = 0; | ||
545 | } | ||
546 | pInfo->nData = nPayload; | ||
547 | if( pPage->intKey ){ | ||
548 | n += getVarint(&pCell[n], (u64 *)&pInfo->nKey); | ||
549 | }else{ | ||
550 | u32 x; | ||
551 | n += getVarint32(&pCell[n], &x); | ||
552 | pInfo->nKey = x; | ||
553 | nPayload += x; | ||
554 | } | ||
555 | pInfo->nPayload = nPayload; | ||
556 | pInfo->nHeader = n; | ||
557 | if( nPayload<=pPage->maxLocal ){ | ||
558 | /* This is the (easy) common case where the entire payload fits | ||
559 | ** on the local page. No overflow is required. | ||
560 | */ | ||
561 | int nSize; /* Total size of cell content in bytes */ | ||
562 | pInfo->nLocal = nPayload; | ||
563 | pInfo->iOverflow = 0; | ||
564 | nSize = nPayload + n; | ||
565 | if( nSize<4 ){ | ||
566 | nSize = 4; /* Minimum cell size is 4 */ | ||
567 | } | ||
568 | pInfo->nSize = nSize; | ||
569 | }else{ | ||
570 | /* If the payload will not fit completely on the local page, we have | ||
571 | ** to decide how much to store locally and how much to spill onto | ||
572 | ** overflow pages. The strategy is to minimize the amount of unused | ||
573 | ** space on overflow pages while keeping the amount of local storage | ||
574 | ** in between minLocal and maxLocal. | ||
575 | ** | ||
576 | ** Warning: changing the way overflow payload is distributed in any | ||
577 | ** way will result in an incompatible file format. | ||
578 | */ | ||
579 | int minLocal; /* Minimum amount of payload held locally */ | ||
580 | int maxLocal; /* Maximum amount of payload held locally */ | ||
581 | int surplus; /* Overflow payload available for local storage */ | ||
582 | |||
583 | minLocal = pPage->minLocal; | ||
584 | maxLocal = pPage->maxLocal; | ||
585 | surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4); | ||
586 | if( surplus <= maxLocal ){ | ||
587 | pInfo->nLocal = surplus; | ||
588 | }else{ | ||
589 | pInfo->nLocal = minLocal; | ||
590 | } | ||
591 | pInfo->iOverflow = pInfo->nLocal + n; | ||
592 | pInfo->nSize = pInfo->iOverflow + 4; | ||
593 | } | ||
594 | } | ||
595 | #define parseCell(pPage, iCell, pInfo) \ | ||
596 | sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo)) | ||
597 | void sqlite3BtreeParseCell( | ||
598 | MemPage *pPage, /* Page containing the cell */ | ||
599 | int iCell, /* The cell index. First cell is 0 */ | ||
600 | CellInfo *pInfo /* Fill in this structure */ | ||
601 | ){ | ||
602 | parseCell(pPage, iCell, pInfo); | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | ** Compute the total number of bytes that a Cell needs in the cell | ||
607 | ** data area of the btree-page. The return number includes the cell | ||
608 | ** data header and the local payload, but not any overflow page or | ||
609 | ** the space used by the cell pointer. | ||
610 | */ | ||
611 | #ifndef NDEBUG | ||
612 | static int cellSize(MemPage *pPage, int iCell){ | ||
613 | CellInfo info; | ||
614 | sqlite3BtreeParseCell(pPage, iCell, &info); | ||
615 | return info.nSize; | ||
616 | } | ||
617 | #endif | ||
618 | static int cellSizePtr(MemPage *pPage, u8 *pCell){ | ||
619 | CellInfo info; | ||
620 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
621 | return info.nSize; | ||
622 | } | ||
623 | |||
624 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
625 | /* | ||
626 | ** If the cell pCell, part of page pPage contains a pointer | ||
627 | ** to an overflow page, insert an entry into the pointer-map | ||
628 | ** for the overflow page. | ||
629 | */ | ||
630 | static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){ | ||
631 | if( pCell ){ | ||
632 | CellInfo info; | ||
633 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
634 | assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); | ||
635 | if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){ | ||
636 | Pgno ovfl = get4byte(&pCell[info.iOverflow]); | ||
637 | return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno); | ||
638 | } | ||
639 | } | ||
640 | return SQLITE_OK; | ||
641 | } | ||
642 | /* | ||
643 | ** If the cell with index iCell on page pPage contains a pointer | ||
644 | ** to an overflow page, insert an entry into the pointer-map | ||
645 | ** for the overflow page. | ||
646 | */ | ||
647 | static int ptrmapPutOvfl(MemPage *pPage, int iCell){ | ||
648 | u8 *pCell; | ||
649 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
650 | pCell = findOverflowCell(pPage, iCell); | ||
651 | return ptrmapPutOvflPtr(pPage, pCell); | ||
652 | } | ||
653 | #endif | ||
654 | |||
655 | |||
656 | /* | ||
657 | ** Defragment the page given. All Cells are moved to the | ||
658 | ** end of the page and all free space is collected into one | ||
659 | ** big FreeBlk that occurs in between the header and cell | ||
660 | ** pointer array and the cell content area. | ||
661 | */ | ||
662 | static int defragmentPage(MemPage *pPage){ | ||
663 | int i; /* Loop counter */ | ||
664 | int pc; /* Address of a i-th cell */ | ||
665 | int addr; /* Offset of first byte after cell pointer array */ | ||
666 | int hdr; /* Offset to the page header */ | ||
667 | int size; /* Size of a cell */ | ||
668 | int usableSize; /* Number of usable bytes on a page */ | ||
669 | int cellOffset; /* Offset to the cell pointer array */ | ||
670 | int brk; /* Offset to the cell content area */ | ||
671 | int nCell; /* Number of cells on the page */ | ||
672 | unsigned char *data; /* The page data */ | ||
673 | unsigned char *temp; /* Temp area for cell content */ | ||
674 | |||
675 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
676 | assert( pPage->pBt!=0 ); | ||
677 | assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); | ||
678 | assert( pPage->nOverflow==0 ); | ||
679 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
680 | temp = sqlite3_malloc( pPage->pBt->pageSize ); | ||
681 | if( temp==0 ) return SQLITE_NOMEM; | ||
682 | data = pPage->aData; | ||
683 | hdr = pPage->hdrOffset; | ||
684 | cellOffset = pPage->cellOffset; | ||
685 | nCell = pPage->nCell; | ||
686 | assert( nCell==get2byte(&data[hdr+3]) ); | ||
687 | usableSize = pPage->pBt->usableSize; | ||
688 | brk = get2byte(&data[hdr+5]); | ||
689 | memcpy(&temp[brk], &data[brk], usableSize - brk); | ||
690 | brk = usableSize; | ||
691 | for(i=0; i<nCell; i++){ | ||
692 | u8 *pAddr; /* The i-th cell pointer */ | ||
693 | pAddr = &data[cellOffset + i*2]; | ||
694 | pc = get2byte(pAddr); | ||
695 | assert( pc<pPage->pBt->usableSize ); | ||
696 | size = cellSizePtr(pPage, &temp[pc]); | ||
697 | brk -= size; | ||
698 | memcpy(&data[brk], &temp[pc], size); | ||
699 | put2byte(pAddr, brk); | ||
700 | } | ||
701 | assert( brk>=cellOffset+2*nCell ); | ||
702 | put2byte(&data[hdr+5], brk); | ||
703 | data[hdr+1] = 0; | ||
704 | data[hdr+2] = 0; | ||
705 | data[hdr+7] = 0; | ||
706 | addr = cellOffset+2*nCell; | ||
707 | memset(&data[addr], 0, brk-addr); | ||
708 | sqlite3_free(temp); | ||
709 | return SQLITE_OK; | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | ** Allocate nByte bytes of space on a page. | ||
714 | ** | ||
715 | ** Return the index into pPage->aData[] of the first byte of | ||
716 | ** the new allocation. Or return 0 if there is not enough free | ||
717 | ** space on the page to satisfy the allocation request. | ||
718 | ** | ||
719 | ** If the page contains nBytes of free space but does not contain | ||
720 | ** nBytes of contiguous free space, then this routine automatically | ||
721 | ** calls defragementPage() to consolidate all free space before | ||
722 | ** allocating the new chunk. | ||
723 | */ | ||
724 | static int allocateSpace(MemPage *pPage, int nByte){ | ||
725 | int addr, pc, hdr; | ||
726 | int size; | ||
727 | int nFrag; | ||
728 | int top; | ||
729 | int nCell; | ||
730 | int cellOffset; | ||
731 | unsigned char *data; | ||
732 | |||
733 | data = pPage->aData; | ||
734 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
735 | assert( pPage->pBt ); | ||
736 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
737 | if( nByte<4 ) nByte = 4; | ||
738 | if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0; | ||
739 | pPage->nFree -= nByte; | ||
740 | hdr = pPage->hdrOffset; | ||
741 | |||
742 | nFrag = data[hdr+7]; | ||
743 | if( nFrag<60 ){ | ||
744 | /* Search the freelist looking for a slot big enough to satisfy the | ||
745 | ** space request. */ | ||
746 | addr = hdr+1; | ||
747 | while( (pc = get2byte(&data[addr]))>0 ){ | ||
748 | size = get2byte(&data[pc+2]); | ||
749 | if( size>=nByte ){ | ||
750 | if( size<nByte+4 ){ | ||
751 | memcpy(&data[addr], &data[pc], 2); | ||
752 | data[hdr+7] = nFrag + size - nByte; | ||
753 | return pc; | ||
754 | }else{ | ||
755 | put2byte(&data[pc+2], size-nByte); | ||
756 | return pc + size - nByte; | ||
757 | } | ||
758 | } | ||
759 | addr = pc; | ||
760 | } | ||
761 | } | ||
762 | |||
763 | /* Allocate memory from the gap in between the cell pointer array | ||
764 | ** and the cell content area. | ||
765 | */ | ||
766 | top = get2byte(&data[hdr+5]); | ||
767 | nCell = get2byte(&data[hdr+3]); | ||
768 | cellOffset = pPage->cellOffset; | ||
769 | if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){ | ||
770 | if( defragmentPage(pPage) ) return 0; | ||
771 | top = get2byte(&data[hdr+5]); | ||
772 | } | ||
773 | top -= nByte; | ||
774 | assert( cellOffset + 2*nCell <= top ); | ||
775 | put2byte(&data[hdr+5], top); | ||
776 | return top; | ||
777 | } | ||
778 | |||
779 | /* | ||
780 | ** Return a section of the pPage->aData to the freelist. | ||
781 | ** The first byte of the new free block is pPage->aDisk[start] | ||
782 | ** and the size of the block is "size" bytes. | ||
783 | ** | ||
784 | ** Most of the effort here is involved in coalesing adjacent | ||
785 | ** free blocks into a single big free block. | ||
786 | */ | ||
787 | static void freeSpace(MemPage *pPage, int start, int size){ | ||
788 | int addr, pbegin, hdr; | ||
789 | unsigned char *data = pPage->aData; | ||
790 | |||
791 | assert( pPage->pBt!=0 ); | ||
792 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
793 | assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) ); | ||
794 | assert( (start + size)<=pPage->pBt->usableSize ); | ||
795 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
796 | if( size<4 ) size = 4; | ||
797 | |||
798 | #ifdef SQLITE_SECURE_DELETE | ||
799 | /* Overwrite deleted information with zeros when the SECURE_DELETE | ||
800 | ** option is enabled at compile-time */ | ||
801 | memset(&data[start], 0, size); | ||
802 | #endif | ||
803 | |||
804 | /* Add the space back into the linked list of freeblocks */ | ||
805 | hdr = pPage->hdrOffset; | ||
806 | addr = hdr + 1; | ||
807 | while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){ | ||
808 | assert( pbegin<=pPage->pBt->usableSize-4 ); | ||
809 | assert( pbegin>addr ); | ||
810 | addr = pbegin; | ||
811 | } | ||
812 | assert( pbegin<=pPage->pBt->usableSize-4 ); | ||
813 | assert( pbegin>addr || pbegin==0 ); | ||
814 | put2byte(&data[addr], start); | ||
815 | put2byte(&data[start], pbegin); | ||
816 | put2byte(&data[start+2], size); | ||
817 | pPage->nFree += size; | ||
818 | |||
819 | /* Coalesce adjacent free blocks */ | ||
820 | addr = pPage->hdrOffset + 1; | ||
821 | while( (pbegin = get2byte(&data[addr]))>0 ){ | ||
822 | int pnext, psize; | ||
823 | assert( pbegin>addr ); | ||
824 | assert( pbegin<=pPage->pBt->usableSize-4 ); | ||
825 | pnext = get2byte(&data[pbegin]); | ||
826 | psize = get2byte(&data[pbegin+2]); | ||
827 | if( pbegin + psize + 3 >= pnext && pnext>0 ){ | ||
828 | int frag = pnext - (pbegin+psize); | ||
829 | assert( frag<=data[pPage->hdrOffset+7] ); | ||
830 | data[pPage->hdrOffset+7] -= frag; | ||
831 | put2byte(&data[pbegin], get2byte(&data[pnext])); | ||
832 | put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin); | ||
833 | }else{ | ||
834 | addr = pbegin; | ||
835 | } | ||
836 | } | ||
837 | |||
838 | /* If the cell content area begins with a freeblock, remove it. */ | ||
839 | if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){ | ||
840 | int top; | ||
841 | pbegin = get2byte(&data[hdr+1]); | ||
842 | memcpy(&data[hdr+1], &data[pbegin], 2); | ||
843 | top = get2byte(&data[hdr+5]); | ||
844 | put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2])); | ||
845 | } | ||
846 | } | ||
847 | |||
848 | /* | ||
849 | ** Decode the flags byte (the first byte of the header) for a page | ||
850 | ** and initialize fields of the MemPage structure accordingly. | ||
851 | */ | ||
852 | static void decodeFlags(MemPage *pPage, int flagByte){ | ||
853 | BtShared *pBt; /* A copy of pPage->pBt */ | ||
854 | |||
855 | assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); | ||
856 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
857 | pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0; | ||
858 | pPage->zeroData = (flagByte & PTF_ZERODATA)!=0; | ||
859 | pPage->leaf = (flagByte & PTF_LEAF)!=0; | ||
860 | pPage->childPtrSize = 4*(pPage->leaf==0); | ||
861 | pBt = pPage->pBt; | ||
862 | if( flagByte & PTF_LEAFDATA ){ | ||
863 | pPage->leafData = 1; | ||
864 | pPage->maxLocal = pBt->maxLeaf; | ||
865 | pPage->minLocal = pBt->minLeaf; | ||
866 | }else{ | ||
867 | pPage->leafData = 0; | ||
868 | pPage->maxLocal = pBt->maxLocal; | ||
869 | pPage->minLocal = pBt->minLocal; | ||
870 | } | ||
871 | pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); | ||
872 | } | ||
873 | |||
874 | /* | ||
875 | ** Initialize the auxiliary information for a disk block. | ||
876 | ** | ||
877 | ** The pParent parameter must be a pointer to the MemPage which | ||
878 | ** is the parent of the page being initialized. The root of a | ||
879 | ** BTree has no parent and so for that page, pParent==NULL. | ||
880 | ** | ||
881 | ** Return SQLITE_OK on success. If we see that the page does | ||
882 | ** not contain a well-formed database page, then return | ||
883 | ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not | ||
884 | ** guarantee that the page is well-formed. It only shows that | ||
885 | ** we failed to detect any corruption. | ||
886 | */ | ||
887 | int sqlite3BtreeInitPage( | ||
888 | MemPage *pPage, /* The page to be initialized */ | ||
889 | MemPage *pParent /* The parent. Might be NULL */ | ||
890 | ){ | ||
891 | int pc; /* Address of a freeblock within pPage->aData[] */ | ||
892 | int hdr; /* Offset to beginning of page header */ | ||
893 | u8 *data; /* Equal to pPage->aData */ | ||
894 | BtShared *pBt; /* The main btree structure */ | ||
895 | int usableSize; /* Amount of usable space on each page */ | ||
896 | int cellOffset; /* Offset from start of page to first cell pointer */ | ||
897 | int nFree; /* Number of unused bytes on the page */ | ||
898 | int top; /* First byte of the cell content area */ | ||
899 | |||
900 | pBt = pPage->pBt; | ||
901 | assert( pBt!=0 ); | ||
902 | assert( pParent==0 || pParent->pBt==pBt ); | ||
903 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
904 | assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); | ||
905 | assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); | ||
906 | assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); | ||
907 | if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){ | ||
908 | /* The parent page should never change unless the file is corrupt */ | ||
909 | return SQLITE_CORRUPT_BKPT; | ||
910 | } | ||
911 | if( pPage->isInit ) return SQLITE_OK; | ||
912 | if( pPage->pParent==0 && pParent!=0 ){ | ||
913 | pPage->pParent = pParent; | ||
914 | sqlite3PagerRef(pParent->pDbPage); | ||
915 | } | ||
916 | hdr = pPage->hdrOffset; | ||
917 | data = pPage->aData; | ||
918 | decodeFlags(pPage, data[hdr]); | ||
919 | pPage->nOverflow = 0; | ||
920 | pPage->idxShift = 0; | ||
921 | usableSize = pBt->usableSize; | ||
922 | pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf; | ||
923 | top = get2byte(&data[hdr+5]); | ||
924 | pPage->nCell = get2byte(&data[hdr+3]); | ||
925 | if( pPage->nCell>MX_CELL(pBt) ){ | ||
926 | /* To many cells for a single page. The page must be corrupt */ | ||
927 | return SQLITE_CORRUPT_BKPT; | ||
928 | } | ||
929 | if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){ | ||
930 | /* All pages must have at least one cell, except for root pages */ | ||
931 | return SQLITE_CORRUPT_BKPT; | ||
932 | } | ||
933 | |||
934 | /* Compute the total free space on the page */ | ||
935 | pc = get2byte(&data[hdr+1]); | ||
936 | nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell); | ||
937 | while( pc>0 ){ | ||
938 | int next, size; | ||
939 | if( pc>usableSize-4 ){ | ||
940 | /* Free block is off the page */ | ||
941 | return SQLITE_CORRUPT_BKPT; | ||
942 | } | ||
943 | next = get2byte(&data[pc]); | ||
944 | size = get2byte(&data[pc+2]); | ||
945 | if( next>0 && next<=pc+size+3 ){ | ||
946 | /* Free blocks must be in accending order */ | ||
947 | return SQLITE_CORRUPT_BKPT; | ||
948 | } | ||
949 | nFree += size; | ||
950 | pc = next; | ||
951 | } | ||
952 | pPage->nFree = nFree; | ||
953 | if( nFree>=usableSize ){ | ||
954 | /* Free space cannot exceed total page size */ | ||
955 | return SQLITE_CORRUPT_BKPT; | ||
956 | } | ||
957 | |||
958 | pPage->isInit = 1; | ||
959 | return SQLITE_OK; | ||
960 | } | ||
961 | |||
962 | /* | ||
963 | ** Set up a raw page so that it looks like a database page holding | ||
964 | ** no entries. | ||
965 | */ | ||
966 | static void zeroPage(MemPage *pPage, int flags){ | ||
967 | unsigned char *data = pPage->aData; | ||
968 | BtShared *pBt = pPage->pBt; | ||
969 | int hdr = pPage->hdrOffset; | ||
970 | int first; | ||
971 | |||
972 | assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno ); | ||
973 | assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); | ||
974 | assert( sqlite3PagerGetData(pPage->pDbPage) == data ); | ||
975 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
976 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
977 | memset(&data[hdr], 0, pBt->usableSize - hdr); | ||
978 | data[hdr] = flags; | ||
979 | first = hdr + 8 + 4*((flags&PTF_LEAF)==0); | ||
980 | memset(&data[hdr+1], 0, 4); | ||
981 | data[hdr+7] = 0; | ||
982 | put2byte(&data[hdr+5], pBt->usableSize); | ||
983 | pPage->nFree = pBt->usableSize - first; | ||
984 | decodeFlags(pPage, flags); | ||
985 | pPage->hdrOffset = hdr; | ||
986 | pPage->cellOffset = first; | ||
987 | pPage->nOverflow = 0; | ||
988 | pPage->idxShift = 0; | ||
989 | pPage->nCell = 0; | ||
990 | pPage->isInit = 1; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | ** Get a page from the pager. Initialize the MemPage.pBt and | ||
995 | ** MemPage.aData elements if needed. | ||
996 | ** | ||
997 | ** If the noContent flag is set, it means that we do not care about | ||
998 | ** the content of the page at this time. So do not go to the disk | ||
999 | ** to fetch the content. Just fill in the content with zeros for now. | ||
1000 | ** If in the future we call sqlite3PagerWrite() on this page, that | ||
1001 | ** means we have started to be concerned about content and the disk | ||
1002 | ** read should occur at that point. | ||
1003 | */ | ||
1004 | int sqlite3BtreeGetPage( | ||
1005 | BtShared *pBt, /* The btree */ | ||
1006 | Pgno pgno, /* Number of the page to fetch */ | ||
1007 | MemPage **ppPage, /* Return the page in this parameter */ | ||
1008 | int noContent /* Do not load page content if true */ | ||
1009 | ){ | ||
1010 | int rc; | ||
1011 | MemPage *pPage; | ||
1012 | DbPage *pDbPage; | ||
1013 | |||
1014 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
1015 | rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent); | ||
1016 | if( rc ) return rc; | ||
1017 | pPage = (MemPage *)sqlite3PagerGetExtra(pDbPage); | ||
1018 | pPage->aData = sqlite3PagerGetData(pDbPage); | ||
1019 | pPage->pDbPage = pDbPage; | ||
1020 | pPage->pBt = pBt; | ||
1021 | pPage->pgno = pgno; | ||
1022 | pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; | ||
1023 | *ppPage = pPage; | ||
1024 | return SQLITE_OK; | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | ** Get a page from the pager and initialize it. This routine | ||
1029 | ** is just a convenience wrapper around separate calls to | ||
1030 | ** sqlite3BtreeGetPage() and sqlite3BtreeInitPage(). | ||
1031 | */ | ||
1032 | static int getAndInitPage( | ||
1033 | BtShared *pBt, /* The database file */ | ||
1034 | Pgno pgno, /* Number of the page to get */ | ||
1035 | MemPage **ppPage, /* Write the page pointer here */ | ||
1036 | MemPage *pParent /* Parent of the page */ | ||
1037 | ){ | ||
1038 | int rc; | ||
1039 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
1040 | if( pgno==0 ){ | ||
1041 | return SQLITE_CORRUPT_BKPT; | ||
1042 | } | ||
1043 | rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0); | ||
1044 | if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ | ||
1045 | rc = sqlite3BtreeInitPage(*ppPage, pParent); | ||
1046 | } | ||
1047 | return rc; | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | ** Release a MemPage. This should be called once for each prior | ||
1052 | ** call to sqlite3BtreeGetPage. | ||
1053 | */ | ||
1054 | static void releasePage(MemPage *pPage){ | ||
1055 | if( pPage ){ | ||
1056 | assert( pPage->aData ); | ||
1057 | assert( pPage->pBt ); | ||
1058 | assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); | ||
1059 | assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); | ||
1060 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
1061 | sqlite3PagerUnref(pPage->pDbPage); | ||
1062 | } | ||
1063 | } | ||
1064 | |||
1065 | /* | ||
1066 | ** This routine is called when the reference count for a page | ||
1067 | ** reaches zero. We need to unref the pParent pointer when that | ||
1068 | ** happens. | ||
1069 | */ | ||
1070 | static void pageDestructor(DbPage *pData, int pageSize){ | ||
1071 | MemPage *pPage; | ||
1072 | assert( (pageSize & 7)==0 ); | ||
1073 | pPage = (MemPage *)sqlite3PagerGetExtra(pData); | ||
1074 | assert( pPage->isInit==0 || sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
1075 | if( pPage->pParent ){ | ||
1076 | MemPage *pParent = pPage->pParent; | ||
1077 | assert( pParent->pBt==pPage->pBt ); | ||
1078 | pPage->pParent = 0; | ||
1079 | releasePage(pParent); | ||
1080 | } | ||
1081 | pPage->isInit = 0; | ||
1082 | } | ||
1083 | |||
1084 | /* | ||
1085 | ** During a rollback, when the pager reloads information into the cache | ||
1086 | ** so that the cache is restored to its original state at the start of | ||
1087 | ** the transaction, for each page restored this routine is called. | ||
1088 | ** | ||
1089 | ** This routine needs to reset the extra data section at the end of the | ||
1090 | ** page to agree with the restored data. | ||
1091 | */ | ||
1092 | static void pageReinit(DbPage *pData, int pageSize){ | ||
1093 | MemPage *pPage; | ||
1094 | assert( (pageSize & 7)==0 ); | ||
1095 | pPage = (MemPage *)sqlite3PagerGetExtra(pData); | ||
1096 | if( pPage->isInit ){ | ||
1097 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
1098 | pPage->isInit = 0; | ||
1099 | sqlite3BtreeInitPage(pPage, pPage->pParent); | ||
1100 | } | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | ** Open a database file. | ||
1105 | ** | ||
1106 | ** zFilename is the name of the database file. If zFilename is NULL | ||
1107 | ** a new database with a random name is created. This randomly named | ||
1108 | ** database file will be deleted when sqlite3BtreeClose() is called. | ||
1109 | ** If zFilename is ":memory:" then an in-memory database is created | ||
1110 | ** that is automatically destroyed when it is closed. | ||
1111 | */ | ||
1112 | int sqlite3BtreeOpen( | ||
1113 | const char *zFilename, /* Name of the file containing the BTree database */ | ||
1114 | sqlite3 *pSqlite, /* Associated database handle */ | ||
1115 | Btree **ppBtree, /* Pointer to new Btree object written here */ | ||
1116 | int flags, /* Options */ | ||
1117 | int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ | ||
1118 | ){ | ||
1119 | sqlite3_vfs *pVfs; /* The VFS to use for this btree */ | ||
1120 | BtShared *pBt = 0; /* Shared part of btree structure */ | ||
1121 | Btree *p; /* Handle to return */ | ||
1122 | int rc = SQLITE_OK; | ||
1123 | int nReserve; | ||
1124 | unsigned char zDbHeader[100]; | ||
1125 | |||
1126 | /* Set the variable isMemdb to true for an in-memory database, or | ||
1127 | ** false for a file-based database. This symbol is only required if | ||
1128 | ** either of the shared-data or autovacuum features are compiled | ||
1129 | ** into the library. | ||
1130 | */ | ||
1131 | #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM) | ||
1132 | #ifdef SQLITE_OMIT_MEMORYDB | ||
1133 | const int isMemdb = 0; | ||
1134 | #else | ||
1135 | const int isMemdb = zFilename && !strcmp(zFilename, ":memory:"); | ||
1136 | #endif | ||
1137 | #endif | ||
1138 | |||
1139 | assert( pSqlite!=0 ); | ||
1140 | assert( sqlite3_mutex_held(pSqlite->mutex) ); | ||
1141 | |||
1142 | pVfs = pSqlite->pVfs; | ||
1143 | p = sqlite3MallocZero(sizeof(Btree)); | ||
1144 | if( !p ){ | ||
1145 | return SQLITE_NOMEM; | ||
1146 | } | ||
1147 | p->inTrans = TRANS_NONE; | ||
1148 | p->pSqlite = pSqlite; | ||
1149 | |||
1150 | #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) | ||
1151 | /* | ||
1152 | ** If this Btree is a candidate for shared cache, try to find an | ||
1153 | ** existing BtShared object that we can share with | ||
1154 | */ | ||
1155 | if( (flags & BTREE_PRIVATE)==0 | ||
1156 | && isMemdb==0 | ||
1157 | && (pSqlite->flags & SQLITE_Vtab)==0 | ||
1158 | && zFilename && zFilename[0] | ||
1159 | ){ | ||
1160 | if( sqlite3SharedCacheEnabled ){ | ||
1161 | int nFullPathname = pVfs->mxPathname+1; | ||
1162 | char *zFullPathname = (char *)sqlite3_malloc(nFullPathname); | ||
1163 | sqlite3_mutex *mutexShared; | ||
1164 | p->sharable = 1; | ||
1165 | if( pSqlite ){ | ||
1166 | pSqlite->flags |= SQLITE_SharedCache; | ||
1167 | } | ||
1168 | if( !zFullPathname ){ | ||
1169 | sqlite3_free(p); | ||
1170 | return SQLITE_NOMEM; | ||
1171 | } | ||
1172 | sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname); | ||
1173 | mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER); | ||
1174 | sqlite3_mutex_enter(mutexShared); | ||
1175 | for(pBt=sqlite3SharedCacheList; pBt; pBt=pBt->pNext){ | ||
1176 | assert( pBt->nRef>0 ); | ||
1177 | if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager)) | ||
1178 | && sqlite3PagerVfs(pBt->pPager)==pVfs ){ | ||
1179 | p->pBt = pBt; | ||
1180 | pBt->nRef++; | ||
1181 | break; | ||
1182 | } | ||
1183 | } | ||
1184 | sqlite3_mutex_leave(mutexShared); | ||
1185 | sqlite3_free(zFullPathname); | ||
1186 | } | ||
1187 | #ifdef SQLITE_DEBUG | ||
1188 | else{ | ||
1189 | /* In debug mode, we mark all persistent databases as sharable | ||
1190 | ** even when they are not. This exercises the locking code and | ||
1191 | ** gives more opportunity for asserts(sqlite3_mutex_held()) | ||
1192 | ** statements to find locking problems. | ||
1193 | */ | ||
1194 | p->sharable = 1; | ||
1195 | } | ||
1196 | #endif | ||
1197 | } | ||
1198 | #endif | ||
1199 | if( pBt==0 ){ | ||
1200 | /* | ||
1201 | ** The following asserts make sure that structures used by the btree are | ||
1202 | ** the right size. This is to guard against size changes that result | ||
1203 | ** when compiling on a different architecture. | ||
1204 | */ | ||
1205 | assert( sizeof(i64)==8 || sizeof(i64)==4 ); | ||
1206 | assert( sizeof(u64)==8 || sizeof(u64)==4 ); | ||
1207 | assert( sizeof(u32)==4 ); | ||
1208 | assert( sizeof(u16)==2 ); | ||
1209 | assert( sizeof(Pgno)==4 ); | ||
1210 | |||
1211 | pBt = sqlite3MallocZero( sizeof(*pBt) ); | ||
1212 | if( pBt==0 ){ | ||
1213 | rc = SQLITE_NOMEM; | ||
1214 | goto btree_open_out; | ||
1215 | } | ||
1216 | rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, | ||
1217 | EXTRA_SIZE, flags, vfsFlags); | ||
1218 | if( rc==SQLITE_OK ){ | ||
1219 | rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); | ||
1220 | } | ||
1221 | if( rc!=SQLITE_OK ){ | ||
1222 | goto btree_open_out; | ||
1223 | } | ||
1224 | p->pBt = pBt; | ||
1225 | |||
1226 | sqlite3PagerSetDestructor(pBt->pPager, pageDestructor); | ||
1227 | sqlite3PagerSetReiniter(pBt->pPager, pageReinit); | ||
1228 | pBt->pCursor = 0; | ||
1229 | pBt->pPage1 = 0; | ||
1230 | pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager); | ||
1231 | pBt->pageSize = get2byte(&zDbHeader[16]); | ||
1232 | if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE | ||
1233 | || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ | ||
1234 | pBt->pageSize = 0; | ||
1235 | sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); | ||
1236 | pBt->maxEmbedFrac = 64; /* 25% */ | ||
1237 | pBt->minEmbedFrac = 32; /* 12.5% */ | ||
1238 | pBt->minLeafFrac = 32; /* 12.5% */ | ||
1239 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
1240 | /* If the magic name ":memory:" will create an in-memory database, then | ||
1241 | ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if | ||
1242 | ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if | ||
1243 | ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a | ||
1244 | ** regular file-name. In this case the auto-vacuum applies as per normal. | ||
1245 | */ | ||
1246 | if( zFilename && !isMemdb ){ | ||
1247 | pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); | ||
1248 | pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); | ||
1249 | } | ||
1250 | #endif | ||
1251 | nReserve = 0; | ||
1252 | }else{ | ||
1253 | nReserve = zDbHeader[20]; | ||
1254 | pBt->maxEmbedFrac = zDbHeader[21]; | ||
1255 | pBt->minEmbedFrac = zDbHeader[22]; | ||
1256 | pBt->minLeafFrac = zDbHeader[23]; | ||
1257 | pBt->pageSizeFixed = 1; | ||
1258 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
1259 | pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); | ||
1260 | pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); | ||
1261 | #endif | ||
1262 | } | ||
1263 | pBt->usableSize = pBt->pageSize - nReserve; | ||
1264 | assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ | ||
1265 | sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); | ||
1266 | |||
1267 | #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) | ||
1268 | /* Add the new BtShared object to the linked list sharable BtShareds. | ||
1269 | */ | ||
1270 | if( p->sharable ){ | ||
1271 | sqlite3_mutex *mutexShared; | ||
1272 | pBt->nRef = 1; | ||
1273 | mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER); | ||
1274 | if( SQLITE_THREADSAFE ){ | ||
1275 | pBt->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); | ||
1276 | if( pBt->mutex==0 ){ | ||
1277 | rc = SQLITE_NOMEM; | ||
1278 | pSqlite->mallocFailed = 0; | ||
1279 | goto btree_open_out; | ||
1280 | } | ||
1281 | } | ||
1282 | sqlite3_mutex_enter(mutexShared); | ||
1283 | pBt->pNext = sqlite3SharedCacheList; | ||
1284 | sqlite3SharedCacheList = pBt; | ||
1285 | sqlite3_mutex_leave(mutexShared); | ||
1286 | } | ||
1287 | #endif | ||
1288 | } | ||
1289 | |||
1290 | #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) | ||
1291 | /* If the new Btree uses a sharable pBtShared, then link the new | ||
1292 | ** Btree into the list of all sharable Btrees for the same connection. | ||
1293 | ** The list is kept in ascending order by pBt address. | ||
1294 | */ | ||
1295 | if( p->sharable ){ | ||
1296 | int i; | ||
1297 | Btree *pSib; | ||
1298 | for(i=0; i<pSqlite->nDb; i++){ | ||
1299 | if( (pSib = pSqlite->aDb[i].pBt)!=0 && pSib->sharable ){ | ||
1300 | while( pSib->pPrev ){ pSib = pSib->pPrev; } | ||
1301 | if( p->pBt<pSib->pBt ){ | ||
1302 | p->pNext = pSib; | ||
1303 | p->pPrev = 0; | ||
1304 | pSib->pPrev = p; | ||
1305 | }else{ | ||
1306 | while( pSib->pNext && pSib->pNext->pBt<p->pBt ){ | ||
1307 | pSib = pSib->pNext; | ||
1308 | } | ||
1309 | p->pNext = pSib->pNext; | ||
1310 | p->pPrev = pSib; | ||
1311 | if( p->pNext ){ | ||
1312 | p->pNext->pPrev = p; | ||
1313 | } | ||
1314 | pSib->pNext = p; | ||
1315 | } | ||
1316 | break; | ||
1317 | } | ||
1318 | } | ||
1319 | } | ||
1320 | #endif | ||
1321 | *ppBtree = p; | ||
1322 | |||
1323 | btree_open_out: | ||
1324 | if( rc!=SQLITE_OK ){ | ||
1325 | if( pBt && pBt->pPager ){ | ||
1326 | sqlite3PagerClose(pBt->pPager); | ||
1327 | } | ||
1328 | sqlite3_free(pBt); | ||
1329 | sqlite3_free(p); | ||
1330 | *ppBtree = 0; | ||
1331 | } | ||
1332 | return rc; | ||
1333 | } | ||
1334 | |||
1335 | /* | ||
1336 | ** Decrement the BtShared.nRef counter. When it reaches zero, | ||
1337 | ** remove the BtShared structure from the sharing list. Return | ||
1338 | ** true if the BtShared.nRef counter reaches zero and return | ||
1339 | ** false if it is still positive. | ||
1340 | */ | ||
1341 | static int removeFromSharingList(BtShared *pBt){ | ||
1342 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
1343 | sqlite3_mutex *pMaster; | ||
1344 | BtShared *pList; | ||
1345 | int removed = 0; | ||
1346 | |||
1347 | assert( sqlite3_mutex_notheld(pBt->mutex) ); | ||
1348 | pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER); | ||
1349 | sqlite3_mutex_enter(pMaster); | ||
1350 | pBt->nRef--; | ||
1351 | if( pBt->nRef<=0 ){ | ||
1352 | if( sqlite3SharedCacheList==pBt ){ | ||
1353 | sqlite3SharedCacheList = pBt->pNext; | ||
1354 | }else{ | ||
1355 | pList = sqlite3SharedCacheList; | ||
1356 | while( pList && pList->pNext!=pBt ){ | ||
1357 | pList=pList->pNext; | ||
1358 | } | ||
1359 | if( pList ){ | ||
1360 | pList->pNext = pBt->pNext; | ||
1361 | } | ||
1362 | } | ||
1363 | if( SQLITE_THREADSAFE ){ | ||
1364 | sqlite3_mutex_free(pBt->mutex); | ||
1365 | } | ||
1366 | removed = 1; | ||
1367 | } | ||
1368 | sqlite3_mutex_leave(pMaster); | ||
1369 | return removed; | ||
1370 | #else | ||
1371 | return 1; | ||
1372 | #endif | ||
1373 | } | ||
1374 | |||
1375 | /* | ||
1376 | ** Close an open database and invalidate all cursors. | ||
1377 | */ | ||
1378 | int sqlite3BtreeClose(Btree *p){ | ||
1379 | BtShared *pBt = p->pBt; | ||
1380 | BtCursor *pCur; | ||
1381 | |||
1382 | /* Close all cursors opened via this handle. */ | ||
1383 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
1384 | sqlite3BtreeEnter(p); | ||
1385 | pCur = pBt->pCursor; | ||
1386 | while( pCur ){ | ||
1387 | BtCursor *pTmp = pCur; | ||
1388 | pCur = pCur->pNext; | ||
1389 | if( pTmp->pBtree==p ){ | ||
1390 | sqlite3BtreeCloseCursor(pTmp); | ||
1391 | } | ||
1392 | } | ||
1393 | |||
1394 | /* Rollback any active transaction and free the handle structure. | ||
1395 | ** The call to sqlite3BtreeRollback() drops any table-locks held by | ||
1396 | ** this handle. | ||
1397 | */ | ||
1398 | sqlite3BtreeRollback(p); | ||
1399 | sqlite3BtreeLeave(p); | ||
1400 | |||
1401 | /* If there are still other outstanding references to the shared-btree | ||
1402 | ** structure, return now. The remainder of this procedure cleans | ||
1403 | ** up the shared-btree. | ||
1404 | */ | ||
1405 | assert( p->wantToLock==0 && p->locked==0 ); | ||
1406 | if( !p->sharable || removeFromSharingList(pBt) ){ | ||
1407 | /* The pBt is no longer on the sharing list, so we can access | ||
1408 | ** it without having to hold the mutex. | ||
1409 | ** | ||
1410 | ** Clean out and delete the BtShared object. | ||
1411 | */ | ||
1412 | assert( !pBt->pCursor ); | ||
1413 | sqlite3PagerClose(pBt->pPager); | ||
1414 | if( pBt->xFreeSchema && pBt->pSchema ){ | ||
1415 | pBt->xFreeSchema(pBt->pSchema); | ||
1416 | } | ||
1417 | sqlite3_free(pBt->pSchema); | ||
1418 | sqlite3_free(pBt); | ||
1419 | } | ||
1420 | |||
1421 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
1422 | assert( p->wantToLock==0 ); | ||
1423 | assert( p->locked==0 ); | ||
1424 | if( p->pPrev ) p->pPrev->pNext = p->pNext; | ||
1425 | if( p->pNext ) p->pNext->pPrev = p->pPrev; | ||
1426 | #endif | ||
1427 | |||
1428 | sqlite3_free(p); | ||
1429 | return SQLITE_OK; | ||
1430 | } | ||
1431 | |||
1432 | /* | ||
1433 | ** Change the busy handler callback function. | ||
1434 | */ | ||
1435 | int sqlite3BtreeSetBusyHandler(Btree *p, BusyHandler *pHandler){ | ||
1436 | BtShared *pBt = p->pBt; | ||
1437 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
1438 | sqlite3BtreeEnter(p); | ||
1439 | pBt->pBusyHandler = pHandler; | ||
1440 | sqlite3PagerSetBusyhandler(pBt->pPager, pHandler); | ||
1441 | sqlite3BtreeLeave(p); | ||
1442 | return SQLITE_OK; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | ** Change the limit on the number of pages allowed in the cache. | ||
1447 | ** | ||
1448 | ** The maximum number of cache pages is set to the absolute | ||
1449 | ** value of mxPage. If mxPage is negative, the pager will | ||
1450 | ** operate asynchronously - it will not stop to do fsync()s | ||
1451 | ** to insure data is written to the disk surface before | ||
1452 | ** continuing. Transactions still work if synchronous is off, | ||
1453 | ** and the database cannot be corrupted if this program | ||
1454 | ** crashes. But if the operating system crashes or there is | ||
1455 | ** an abrupt power failure when synchronous is off, the database | ||
1456 | ** could be left in an inconsistent and unrecoverable state. | ||
1457 | ** Synchronous is on by default so database corruption is not | ||
1458 | ** normally a worry. | ||
1459 | */ | ||
1460 | int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ | ||
1461 | BtShared *pBt = p->pBt; | ||
1462 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
1463 | sqlite3BtreeEnter(p); | ||
1464 | sqlite3PagerSetCachesize(pBt->pPager, mxPage); | ||
1465 | sqlite3BtreeLeave(p); | ||
1466 | return SQLITE_OK; | ||
1467 | } | ||
1468 | |||
1469 | /* | ||
1470 | ** Change the way data is synced to disk in order to increase or decrease | ||
1471 | ** how well the database resists damage due to OS crashes and power | ||
1472 | ** failures. Level 1 is the same as asynchronous (no syncs() occur and | ||
1473 | ** there is a high probability of damage) Level 2 is the default. There | ||
1474 | ** is a very low but non-zero probability of damage. Level 3 reduces the | ||
1475 | ** probability of damage to near zero but with a write performance reduction. | ||
1476 | */ | ||
1477 | #ifndef SQLITE_OMIT_PAGER_PRAGMAS | ||
1478 | int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){ | ||
1479 | BtShared *pBt = p->pBt; | ||
1480 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
1481 | sqlite3BtreeEnter(p); | ||
1482 | sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync); | ||
1483 | sqlite3BtreeLeave(p); | ||
1484 | return SQLITE_OK; | ||
1485 | } | ||
1486 | #endif | ||
1487 | |||
1488 | /* | ||
1489 | ** Return TRUE if the given btree is set to safety level 1. In other | ||
1490 | ** words, return TRUE if no sync() occurs on the disk files. | ||
1491 | */ | ||
1492 | int sqlite3BtreeSyncDisabled(Btree *p){ | ||
1493 | BtShared *pBt = p->pBt; | ||
1494 | int rc; | ||
1495 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
1496 | sqlite3BtreeEnter(p); | ||
1497 | assert( pBt && pBt->pPager ); | ||
1498 | rc = sqlite3PagerNosync(pBt->pPager); | ||
1499 | sqlite3BtreeLeave(p); | ||
1500 | return rc; | ||
1501 | } | ||
1502 | |||
1503 | #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) | ||
1504 | /* | ||
1505 | ** Change the default pages size and the number of reserved bytes per page. | ||
1506 | ** | ||
1507 | ** The page size must be a power of 2 between 512 and 65536. If the page | ||
1508 | ** size supplied does not meet this constraint then the page size is not | ||
1509 | ** changed. | ||
1510 | ** | ||
1511 | ** Page sizes are constrained to be a power of two so that the region | ||
1512 | ** of the database file used for locking (beginning at PENDING_BYTE, | ||
1513 | ** the first byte past the 1GB boundary, 0x40000000) needs to occur | ||
1514 | ** at the beginning of a page. | ||
1515 | ** | ||
1516 | ** If parameter nReserve is less than zero, then the number of reserved | ||
1517 | ** bytes per page is left unchanged. | ||
1518 | */ | ||
1519 | int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){ | ||
1520 | int rc = SQLITE_OK; | ||
1521 | BtShared *pBt = p->pBt; | ||
1522 | sqlite3BtreeEnter(p); | ||
1523 | if( pBt->pageSizeFixed ){ | ||
1524 | sqlite3BtreeLeave(p); | ||
1525 | return SQLITE_READONLY; | ||
1526 | } | ||
1527 | if( nReserve<0 ){ | ||
1528 | nReserve = pBt->pageSize - pBt->usableSize; | ||
1529 | } | ||
1530 | if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && | ||
1531 | ((pageSize-1)&pageSize)==0 ){ | ||
1532 | assert( (pageSize & 7)==0 ); | ||
1533 | assert( !pBt->pPage1 && !pBt->pCursor ); | ||
1534 | pBt->pageSize = pageSize; | ||
1535 | rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize); | ||
1536 | } | ||
1537 | pBt->usableSize = pBt->pageSize - nReserve; | ||
1538 | sqlite3BtreeLeave(p); | ||
1539 | return rc; | ||
1540 | } | ||
1541 | |||
1542 | /* | ||
1543 | ** Return the currently defined page size | ||
1544 | */ | ||
1545 | int sqlite3BtreeGetPageSize(Btree *p){ | ||
1546 | return p->pBt->pageSize; | ||
1547 | } | ||
1548 | int sqlite3BtreeGetReserve(Btree *p){ | ||
1549 | int n; | ||
1550 | sqlite3BtreeEnter(p); | ||
1551 | n = p->pBt->pageSize - p->pBt->usableSize; | ||
1552 | sqlite3BtreeLeave(p); | ||
1553 | return n; | ||
1554 | } | ||
1555 | |||
1556 | /* | ||
1557 | ** Set the maximum page count for a database if mxPage is positive. | ||
1558 | ** No changes are made if mxPage is 0 or negative. | ||
1559 | ** Regardless of the value of mxPage, return the maximum page count. | ||
1560 | */ | ||
1561 | int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ | ||
1562 | int n; | ||
1563 | sqlite3BtreeEnter(p); | ||
1564 | n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); | ||
1565 | sqlite3BtreeLeave(p); | ||
1566 | return n; | ||
1567 | } | ||
1568 | #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */ | ||
1569 | |||
1570 | /* | ||
1571 | ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' | ||
1572 | ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it | ||
1573 | ** is disabled. The default value for the auto-vacuum property is | ||
1574 | ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. | ||
1575 | */ | ||
1576 | int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ | ||
1577 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
1578 | return SQLITE_READONLY; | ||
1579 | #else | ||
1580 | BtShared *pBt = p->pBt; | ||
1581 | int rc = SQLITE_OK; | ||
1582 | int av = (autoVacuum?1:0); | ||
1583 | |||
1584 | sqlite3BtreeEnter(p); | ||
1585 | if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){ | ||
1586 | rc = SQLITE_READONLY; | ||
1587 | }else{ | ||
1588 | pBt->autoVacuum = av; | ||
1589 | } | ||
1590 | sqlite3BtreeLeave(p); | ||
1591 | return rc; | ||
1592 | #endif | ||
1593 | } | ||
1594 | |||
1595 | /* | ||
1596 | ** Return the value of the 'auto-vacuum' property. If auto-vacuum is | ||
1597 | ** enabled 1 is returned. Otherwise 0. | ||
1598 | */ | ||
1599 | int sqlite3BtreeGetAutoVacuum(Btree *p){ | ||
1600 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
1601 | return BTREE_AUTOVACUUM_NONE; | ||
1602 | #else | ||
1603 | int rc; | ||
1604 | sqlite3BtreeEnter(p); | ||
1605 | rc = ( | ||
1606 | (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: | ||
1607 | (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: | ||
1608 | BTREE_AUTOVACUUM_INCR | ||
1609 | ); | ||
1610 | sqlite3BtreeLeave(p); | ||
1611 | return rc; | ||
1612 | #endif | ||
1613 | } | ||
1614 | |||
1615 | |||
1616 | /* | ||
1617 | ** Get a reference to pPage1 of the database file. This will | ||
1618 | ** also acquire a readlock on that file. | ||
1619 | ** | ||
1620 | ** SQLITE_OK is returned on success. If the file is not a | ||
1621 | ** well-formed database file, then SQLITE_CORRUPT is returned. | ||
1622 | ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM | ||
1623 | ** is returned if we run out of memory. | ||
1624 | */ | ||
1625 | static int lockBtree(BtShared *pBt){ | ||
1626 | int rc, pageSize; | ||
1627 | MemPage *pPage1; | ||
1628 | |||
1629 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
1630 | if( pBt->pPage1 ) return SQLITE_OK; | ||
1631 | rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0); | ||
1632 | if( rc!=SQLITE_OK ) return rc; | ||
1633 | |||
1634 | |||
1635 | /* Do some checking to help insure the file we opened really is | ||
1636 | ** a valid database file. | ||
1637 | */ | ||
1638 | rc = SQLITE_NOTADB; | ||
1639 | if( sqlite3PagerPagecount(pBt->pPager)>0 ){ | ||
1640 | u8 *page1 = pPage1->aData; | ||
1641 | if( memcmp(page1, zMagicHeader, 16)!=0 ){ | ||
1642 | goto page1_init_failed; | ||
1643 | } | ||
1644 | if( page1[18]>1 ){ | ||
1645 | pBt->readOnly = 1; | ||
1646 | } | ||
1647 | if( page1[19]>1 ){ | ||
1648 | goto page1_init_failed; | ||
1649 | } | ||
1650 | pageSize = get2byte(&page1[16]); | ||
1651 | if( ((pageSize-1)&pageSize)!=0 || pageSize<512 || | ||
1652 | (SQLITE_MAX_PAGE_SIZE<32768 && pageSize>SQLITE_MAX_PAGE_SIZE) | ||
1653 | ){ | ||
1654 | goto page1_init_failed; | ||
1655 | } | ||
1656 | assert( (pageSize & 7)==0 ); | ||
1657 | pBt->pageSize = pageSize; | ||
1658 | pBt->usableSize = pageSize - page1[20]; | ||
1659 | if( pBt->usableSize<500 ){ | ||
1660 | goto page1_init_failed; | ||
1661 | } | ||
1662 | pBt->maxEmbedFrac = page1[21]; | ||
1663 | pBt->minEmbedFrac = page1[22]; | ||
1664 | pBt->minLeafFrac = page1[23]; | ||
1665 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
1666 | pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); | ||
1667 | pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); | ||
1668 | #endif | ||
1669 | } | ||
1670 | |||
1671 | /* maxLocal is the maximum amount of payload to store locally for | ||
1672 | ** a cell. Make sure it is small enough so that at least minFanout | ||
1673 | ** cells can will fit on one page. We assume a 10-byte page header. | ||
1674 | ** Besides the payload, the cell must store: | ||
1675 | ** 2-byte pointer to the cell | ||
1676 | ** 4-byte child pointer | ||
1677 | ** 9-byte nKey value | ||
1678 | ** 4-byte nData value | ||
1679 | ** 4-byte overflow page pointer | ||
1680 | ** So a cell consists of a 2-byte poiner, a header which is as much as | ||
1681 | ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow | ||
1682 | ** page pointer. | ||
1683 | */ | ||
1684 | pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23; | ||
1685 | pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23; | ||
1686 | pBt->maxLeaf = pBt->usableSize - 35; | ||
1687 | pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23; | ||
1688 | if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){ | ||
1689 | goto page1_init_failed; | ||
1690 | } | ||
1691 | assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); | ||
1692 | pBt->pPage1 = pPage1; | ||
1693 | return SQLITE_OK; | ||
1694 | |||
1695 | page1_init_failed: | ||
1696 | releasePage(pPage1); | ||
1697 | pBt->pPage1 = 0; | ||
1698 | return rc; | ||
1699 | } | ||
1700 | |||
1701 | /* | ||
1702 | ** This routine works like lockBtree() except that it also invokes the | ||
1703 | ** busy callback if there is lock contention. | ||
1704 | */ | ||
1705 | static int lockBtreeWithRetry(Btree *pRef){ | ||
1706 | int rc = SQLITE_OK; | ||
1707 | |||
1708 | assert( sqlite3BtreeHoldsMutex(pRef) ); | ||
1709 | if( pRef->inTrans==TRANS_NONE ){ | ||
1710 | u8 inTransaction = pRef->pBt->inTransaction; | ||
1711 | btreeIntegrity(pRef); | ||
1712 | rc = sqlite3BtreeBeginTrans(pRef, 0); | ||
1713 | pRef->pBt->inTransaction = inTransaction; | ||
1714 | pRef->inTrans = TRANS_NONE; | ||
1715 | if( rc==SQLITE_OK ){ | ||
1716 | pRef->pBt->nTransaction--; | ||
1717 | } | ||
1718 | btreeIntegrity(pRef); | ||
1719 | } | ||
1720 | return rc; | ||
1721 | } | ||
1722 | |||
1723 | |||
1724 | /* | ||
1725 | ** If there are no outstanding cursors and we are not in the middle | ||
1726 | ** of a transaction but there is a read lock on the database, then | ||
1727 | ** this routine unrefs the first page of the database file which | ||
1728 | ** has the effect of releasing the read lock. | ||
1729 | ** | ||
1730 | ** If there are any outstanding cursors, this routine is a no-op. | ||
1731 | ** | ||
1732 | ** If there is a transaction in progress, this routine is a no-op. | ||
1733 | */ | ||
1734 | static void unlockBtreeIfUnused(BtShared *pBt){ | ||
1735 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
1736 | if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){ | ||
1737 | if( sqlite3PagerRefcount(pBt->pPager)>=1 ){ | ||
1738 | if( pBt->pPage1->aData==0 ){ | ||
1739 | MemPage *pPage = pBt->pPage1; | ||
1740 | pPage->aData = sqlite3PagerGetData(pPage->pDbPage); | ||
1741 | pPage->pBt = pBt; | ||
1742 | pPage->pgno = 1; | ||
1743 | } | ||
1744 | releasePage(pBt->pPage1); | ||
1745 | } | ||
1746 | pBt->pPage1 = 0; | ||
1747 | pBt->inStmt = 0; | ||
1748 | } | ||
1749 | } | ||
1750 | |||
1751 | /* | ||
1752 | ** Create a new database by initializing the first page of the | ||
1753 | ** file. | ||
1754 | */ | ||
1755 | static int newDatabase(BtShared *pBt){ | ||
1756 | MemPage *pP1; | ||
1757 | unsigned char *data; | ||
1758 | int rc; | ||
1759 | |||
1760 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
1761 | if( sqlite3PagerPagecount(pBt->pPager)>0 ) return SQLITE_OK; | ||
1762 | pP1 = pBt->pPage1; | ||
1763 | assert( pP1!=0 ); | ||
1764 | data = pP1->aData; | ||
1765 | rc = sqlite3PagerWrite(pP1->pDbPage); | ||
1766 | if( rc ) return rc; | ||
1767 | memcpy(data, zMagicHeader, sizeof(zMagicHeader)); | ||
1768 | assert( sizeof(zMagicHeader)==16 ); | ||
1769 | put2byte(&data[16], pBt->pageSize); | ||
1770 | data[18] = 1; | ||
1771 | data[19] = 1; | ||
1772 | data[20] = pBt->pageSize - pBt->usableSize; | ||
1773 | data[21] = pBt->maxEmbedFrac; | ||
1774 | data[22] = pBt->minEmbedFrac; | ||
1775 | data[23] = pBt->minLeafFrac; | ||
1776 | memset(&data[24], 0, 100-24); | ||
1777 | zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); | ||
1778 | pBt->pageSizeFixed = 1; | ||
1779 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
1780 | assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); | ||
1781 | assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); | ||
1782 | put4byte(&data[36 + 4*4], pBt->autoVacuum); | ||
1783 | put4byte(&data[36 + 7*4], pBt->incrVacuum); | ||
1784 | #endif | ||
1785 | return SQLITE_OK; | ||
1786 | } | ||
1787 | |||
1788 | /* | ||
1789 | ** Attempt to start a new transaction. A write-transaction | ||
1790 | ** is started if the second argument is nonzero, otherwise a read- | ||
1791 | ** transaction. If the second argument is 2 or more and exclusive | ||
1792 | ** transaction is started, meaning that no other process is allowed | ||
1793 | ** to access the database. A preexisting transaction may not be | ||
1794 | ** upgraded to exclusive by calling this routine a second time - the | ||
1795 | ** exclusivity flag only works for a new transaction. | ||
1796 | ** | ||
1797 | ** A write-transaction must be started before attempting any | ||
1798 | ** changes to the database. None of the following routines | ||
1799 | ** will work unless a transaction is started first: | ||
1800 | ** | ||
1801 | ** sqlite3BtreeCreateTable() | ||
1802 | ** sqlite3BtreeCreateIndex() | ||
1803 | ** sqlite3BtreeClearTable() | ||
1804 | ** sqlite3BtreeDropTable() | ||
1805 | ** sqlite3BtreeInsert() | ||
1806 | ** sqlite3BtreeDelete() | ||
1807 | ** sqlite3BtreeUpdateMeta() | ||
1808 | ** | ||
1809 | ** If an initial attempt to acquire the lock fails because of lock contention | ||
1810 | ** and the database was previously unlocked, then invoke the busy handler | ||
1811 | ** if there is one. But if there was previously a read-lock, do not | ||
1812 | ** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is | ||
1813 | ** returned when there is already a read-lock in order to avoid a deadlock. | ||
1814 | ** | ||
1815 | ** Suppose there are two processes A and B. A has a read lock and B has | ||
1816 | ** a reserved lock. B tries to promote to exclusive but is blocked because | ||
1817 | ** of A's read lock. A tries to promote to reserved but is blocked by B. | ||
1818 | ** One or the other of the two processes must give way or there can be | ||
1819 | ** no progress. By returning SQLITE_BUSY and not invoking the busy callback | ||
1820 | ** when A already has a read lock, we encourage A to give up and let B | ||
1821 | ** proceed. | ||
1822 | */ | ||
1823 | int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | ||
1824 | BtShared *pBt = p->pBt; | ||
1825 | int rc = SQLITE_OK; | ||
1826 | |||
1827 | sqlite3BtreeEnter(p); | ||
1828 | btreeIntegrity(p); | ||
1829 | |||
1830 | /* If the btree is already in a write-transaction, or it | ||
1831 | ** is already in a read-transaction and a read-transaction | ||
1832 | ** is requested, this is a no-op. | ||
1833 | */ | ||
1834 | if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ | ||
1835 | goto trans_begun; | ||
1836 | } | ||
1837 | |||
1838 | /* Write transactions are not possible on a read-only database */ | ||
1839 | if( pBt->readOnly && wrflag ){ | ||
1840 | rc = SQLITE_READONLY; | ||
1841 | goto trans_begun; | ||
1842 | } | ||
1843 | |||
1844 | /* If another database handle has already opened a write transaction | ||
1845 | ** on this shared-btree structure and a second write transaction is | ||
1846 | ** requested, return SQLITE_BUSY. | ||
1847 | */ | ||
1848 | if( pBt->inTransaction==TRANS_WRITE && wrflag ){ | ||
1849 | rc = SQLITE_BUSY; | ||
1850 | goto trans_begun; | ||
1851 | } | ||
1852 | |||
1853 | do { | ||
1854 | if( pBt->pPage1==0 ){ | ||
1855 | rc = lockBtree(pBt); | ||
1856 | } | ||
1857 | |||
1858 | if( rc==SQLITE_OK && wrflag ){ | ||
1859 | if( pBt->readOnly ){ | ||
1860 | rc = SQLITE_READONLY; | ||
1861 | }else{ | ||
1862 | rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1); | ||
1863 | if( rc==SQLITE_OK ){ | ||
1864 | rc = newDatabase(pBt); | ||
1865 | } | ||
1866 | } | ||
1867 | } | ||
1868 | |||
1869 | if( rc==SQLITE_OK ){ | ||
1870 | if( wrflag ) pBt->inStmt = 0; | ||
1871 | }else{ | ||
1872 | unlockBtreeIfUnused(pBt); | ||
1873 | } | ||
1874 | }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && | ||
1875 | sqlite3InvokeBusyHandler(pBt->pBusyHandler) ); | ||
1876 | |||
1877 | if( rc==SQLITE_OK ){ | ||
1878 | if( p->inTrans==TRANS_NONE ){ | ||
1879 | pBt->nTransaction++; | ||
1880 | } | ||
1881 | p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); | ||
1882 | if( p->inTrans>pBt->inTransaction ){ | ||
1883 | pBt->inTransaction = p->inTrans; | ||
1884 | } | ||
1885 | } | ||
1886 | |||
1887 | |||
1888 | trans_begun: | ||
1889 | btreeIntegrity(p); | ||
1890 | sqlite3BtreeLeave(p); | ||
1891 | return rc; | ||
1892 | } | ||
1893 | |||
1894 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
1895 | |||
1896 | /* | ||
1897 | ** Set the pointer-map entries for all children of page pPage. Also, if | ||
1898 | ** pPage contains cells that point to overflow pages, set the pointer | ||
1899 | ** map entries for the overflow pages as well. | ||
1900 | */ | ||
1901 | static int setChildPtrmaps(MemPage *pPage){ | ||
1902 | int i; /* Counter variable */ | ||
1903 | int nCell; /* Number of cells in page pPage */ | ||
1904 | int rc; /* Return code */ | ||
1905 | BtShared *pBt = pPage->pBt; | ||
1906 | int isInitOrig = pPage->isInit; | ||
1907 | Pgno pgno = pPage->pgno; | ||
1908 | |||
1909 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
1910 | rc = sqlite3BtreeInitPage(pPage, pPage->pParent); | ||
1911 | if( rc!=SQLITE_OK ){ | ||
1912 | goto set_child_ptrmaps_out; | ||
1913 | } | ||
1914 | nCell = pPage->nCell; | ||
1915 | |||
1916 | for(i=0; i<nCell; i++){ | ||
1917 | u8 *pCell = findCell(pPage, i); | ||
1918 | |||
1919 | rc = ptrmapPutOvflPtr(pPage, pCell); | ||
1920 | if( rc!=SQLITE_OK ){ | ||
1921 | goto set_child_ptrmaps_out; | ||
1922 | } | ||
1923 | |||
1924 | if( !pPage->leaf ){ | ||
1925 | Pgno childPgno = get4byte(pCell); | ||
1926 | rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno); | ||
1927 | if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out; | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | if( !pPage->leaf ){ | ||
1932 | Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); | ||
1933 | rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno); | ||
1934 | } | ||
1935 | |||
1936 | set_child_ptrmaps_out: | ||
1937 | pPage->isInit = isInitOrig; | ||
1938 | return rc; | ||
1939 | } | ||
1940 | |||
1941 | /* | ||
1942 | ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow | ||
1943 | ** page, is a pointer to page iFrom. Modify this pointer so that it points to | ||
1944 | ** iTo. Parameter eType describes the type of pointer to be modified, as | ||
1945 | ** follows: | ||
1946 | ** | ||
1947 | ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child | ||
1948 | ** page of pPage. | ||
1949 | ** | ||
1950 | ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow | ||
1951 | ** page pointed to by one of the cells on pPage. | ||
1952 | ** | ||
1953 | ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next | ||
1954 | ** overflow page in the list. | ||
1955 | */ | ||
1956 | static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ | ||
1957 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
1958 | if( eType==PTRMAP_OVERFLOW2 ){ | ||
1959 | /* The pointer is always the first 4 bytes of the page in this case. */ | ||
1960 | if( get4byte(pPage->aData)!=iFrom ){ | ||
1961 | return SQLITE_CORRUPT_BKPT; | ||
1962 | } | ||
1963 | put4byte(pPage->aData, iTo); | ||
1964 | }else{ | ||
1965 | int isInitOrig = pPage->isInit; | ||
1966 | int i; | ||
1967 | int nCell; | ||
1968 | |||
1969 | sqlite3BtreeInitPage(pPage, 0); | ||
1970 | nCell = pPage->nCell; | ||
1971 | |||
1972 | for(i=0; i<nCell; i++){ | ||
1973 | u8 *pCell = findCell(pPage, i); | ||
1974 | if( eType==PTRMAP_OVERFLOW1 ){ | ||
1975 | CellInfo info; | ||
1976 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
1977 | if( info.iOverflow ){ | ||
1978 | if( iFrom==get4byte(&pCell[info.iOverflow]) ){ | ||
1979 | put4byte(&pCell[info.iOverflow], iTo); | ||
1980 | break; | ||
1981 | } | ||
1982 | } | ||
1983 | }else{ | ||
1984 | if( get4byte(pCell)==iFrom ){ | ||
1985 | put4byte(pCell, iTo); | ||
1986 | break; | ||
1987 | } | ||
1988 | } | ||
1989 | } | ||
1990 | |||
1991 | if( i==nCell ){ | ||
1992 | if( eType!=PTRMAP_BTREE || | ||
1993 | get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ | ||
1994 | return SQLITE_CORRUPT_BKPT; | ||
1995 | } | ||
1996 | put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); | ||
1997 | } | ||
1998 | |||
1999 | pPage->isInit = isInitOrig; | ||
2000 | } | ||
2001 | return SQLITE_OK; | ||
2002 | } | ||
2003 | |||
2004 | |||
2005 | /* | ||
2006 | ** Move the open database page pDbPage to location iFreePage in the | ||
2007 | ** database. The pDbPage reference remains valid. | ||
2008 | */ | ||
2009 | static int relocatePage( | ||
2010 | BtShared *pBt, /* Btree */ | ||
2011 | MemPage *pDbPage, /* Open page to move */ | ||
2012 | u8 eType, /* Pointer map 'type' entry for pDbPage */ | ||
2013 | Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ | ||
2014 | Pgno iFreePage /* The location to move pDbPage to */ | ||
2015 | ){ | ||
2016 | MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ | ||
2017 | Pgno iDbPage = pDbPage->pgno; | ||
2018 | Pager *pPager = pBt->pPager; | ||
2019 | int rc; | ||
2020 | |||
2021 | assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || | ||
2022 | eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); | ||
2023 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
2024 | assert( pDbPage->pBt==pBt ); | ||
2025 | |||
2026 | /* Move page iDbPage from it's current location to page number iFreePage */ | ||
2027 | TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", | ||
2028 | iDbPage, iFreePage, iPtrPage, eType)); | ||
2029 | rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage); | ||
2030 | if( rc!=SQLITE_OK ){ | ||
2031 | return rc; | ||
2032 | } | ||
2033 | pDbPage->pgno = iFreePage; | ||
2034 | |||
2035 | /* If pDbPage was a btree-page, then it may have child pages and/or cells | ||
2036 | ** that point to overflow pages. The pointer map entries for all these | ||
2037 | ** pages need to be changed. | ||
2038 | ** | ||
2039 | ** If pDbPage is an overflow page, then the first 4 bytes may store a | ||
2040 | ** pointer to a subsequent overflow page. If this is the case, then | ||
2041 | ** the pointer map needs to be updated for the subsequent overflow page. | ||
2042 | */ | ||
2043 | if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ | ||
2044 | rc = setChildPtrmaps(pDbPage); | ||
2045 | if( rc!=SQLITE_OK ){ | ||
2046 | return rc; | ||
2047 | } | ||
2048 | }else{ | ||
2049 | Pgno nextOvfl = get4byte(pDbPage->aData); | ||
2050 | if( nextOvfl!=0 ){ | ||
2051 | rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage); | ||
2052 | if( rc!=SQLITE_OK ){ | ||
2053 | return rc; | ||
2054 | } | ||
2055 | } | ||
2056 | } | ||
2057 | |||
2058 | /* Fix the database pointer on page iPtrPage that pointed at iDbPage so | ||
2059 | ** that it points at iFreePage. Also fix the pointer map entry for | ||
2060 | ** iPtrPage. | ||
2061 | */ | ||
2062 | if( eType!=PTRMAP_ROOTPAGE ){ | ||
2063 | rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0); | ||
2064 | if( rc!=SQLITE_OK ){ | ||
2065 | return rc; | ||
2066 | } | ||
2067 | rc = sqlite3PagerWrite(pPtrPage->pDbPage); | ||
2068 | if( rc!=SQLITE_OK ){ | ||
2069 | releasePage(pPtrPage); | ||
2070 | return rc; | ||
2071 | } | ||
2072 | rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); | ||
2073 | releasePage(pPtrPage); | ||
2074 | if( rc==SQLITE_OK ){ | ||
2075 | rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage); | ||
2076 | } | ||
2077 | } | ||
2078 | return rc; | ||
2079 | } | ||
2080 | |||
2081 | /* Forward declaration required by incrVacuumStep(). */ | ||
2082 | static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); | ||
2083 | |||
2084 | /* | ||
2085 | ** Perform a single step of an incremental-vacuum. If successful, | ||
2086 | ** return SQLITE_OK. If there is no work to do (and therefore no | ||
2087 | ** point in calling this function again), return SQLITE_DONE. | ||
2088 | ** | ||
2089 | ** More specificly, this function attempts to re-organize the | ||
2090 | ** database so that the last page of the file currently in use | ||
2091 | ** is no longer in use. | ||
2092 | ** | ||
2093 | ** If the nFin parameter is non-zero, the implementation assumes | ||
2094 | ** that the caller will keep calling incrVacuumStep() until | ||
2095 | ** it returns SQLITE_DONE or an error, and that nFin is the | ||
2096 | ** number of pages the database file will contain after this | ||
2097 | ** process is complete. | ||
2098 | */ | ||
2099 | static int incrVacuumStep(BtShared *pBt, Pgno nFin){ | ||
2100 | Pgno iLastPg; /* Last page in the database */ | ||
2101 | Pgno nFreeList; /* Number of pages still on the free-list */ | ||
2102 | |||
2103 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
2104 | iLastPg = pBt->nTrunc; | ||
2105 | if( iLastPg==0 ){ | ||
2106 | iLastPg = sqlite3PagerPagecount(pBt->pPager); | ||
2107 | } | ||
2108 | |||
2109 | if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ | ||
2110 | int rc; | ||
2111 | u8 eType; | ||
2112 | Pgno iPtrPage; | ||
2113 | |||
2114 | nFreeList = get4byte(&pBt->pPage1->aData[36]); | ||
2115 | if( nFreeList==0 || nFin==iLastPg ){ | ||
2116 | return SQLITE_DONE; | ||
2117 | } | ||
2118 | |||
2119 | rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); | ||
2120 | if( rc!=SQLITE_OK ){ | ||
2121 | return rc; | ||
2122 | } | ||
2123 | if( eType==PTRMAP_ROOTPAGE ){ | ||
2124 | return SQLITE_CORRUPT_BKPT; | ||
2125 | } | ||
2126 | |||
2127 | if( eType==PTRMAP_FREEPAGE ){ | ||
2128 | if( nFin==0 ){ | ||
2129 | /* Remove the page from the files free-list. This is not required | ||
2130 | ** if nFin is non-zero. In that case, the free-list will be | ||
2131 | ** truncated to zero after this function returns, so it doesn't | ||
2132 | ** matter if it still contains some garbage entries. | ||
2133 | */ | ||
2134 | Pgno iFreePg; | ||
2135 | MemPage *pFreePg; | ||
2136 | rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1); | ||
2137 | if( rc!=SQLITE_OK ){ | ||
2138 | return rc; | ||
2139 | } | ||
2140 | assert( iFreePg==iLastPg ); | ||
2141 | releasePage(pFreePg); | ||
2142 | } | ||
2143 | } else { | ||
2144 | Pgno iFreePg; /* Index of free page to move pLastPg to */ | ||
2145 | MemPage *pLastPg; | ||
2146 | |||
2147 | rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0); | ||
2148 | if( rc!=SQLITE_OK ){ | ||
2149 | return rc; | ||
2150 | } | ||
2151 | |||
2152 | /* If nFin is zero, this loop runs exactly once and page pLastPg | ||
2153 | ** is swapped with the first free page pulled off the free list. | ||
2154 | ** | ||
2155 | ** On the other hand, if nFin is greater than zero, then keep | ||
2156 | ** looping until a free-page located within the first nFin pages | ||
2157 | ** of the file is found. | ||
2158 | */ | ||
2159 | do { | ||
2160 | MemPage *pFreePg; | ||
2161 | rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0); | ||
2162 | if( rc!=SQLITE_OK ){ | ||
2163 | releasePage(pLastPg); | ||
2164 | return rc; | ||
2165 | } | ||
2166 | releasePage(pFreePg); | ||
2167 | }while( nFin!=0 && iFreePg>nFin ); | ||
2168 | assert( iFreePg<iLastPg ); | ||
2169 | |||
2170 | rc = sqlite3PagerWrite(pLastPg->pDbPage); | ||
2171 | if( rc!=SQLITE_OK ){ | ||
2172 | return rc; | ||
2173 | } | ||
2174 | rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg); | ||
2175 | releasePage(pLastPg); | ||
2176 | if( rc!=SQLITE_OK ){ | ||
2177 | return rc; | ||
2178 | } | ||
2179 | } | ||
2180 | } | ||
2181 | |||
2182 | pBt->nTrunc = iLastPg - 1; | ||
2183 | while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){ | ||
2184 | pBt->nTrunc--; | ||
2185 | } | ||
2186 | return SQLITE_OK; | ||
2187 | } | ||
2188 | |||
2189 | /* | ||
2190 | ** A write-transaction must be opened before calling this function. | ||
2191 | ** It performs a single unit of work towards an incremental vacuum. | ||
2192 | ** | ||
2193 | ** If the incremental vacuum is finished after this function has run, | ||
2194 | ** SQLITE_DONE is returned. If it is not finished, but no error occured, | ||
2195 | ** SQLITE_OK is returned. Otherwise an SQLite error code. | ||
2196 | */ | ||
2197 | int sqlite3BtreeIncrVacuum(Btree *p){ | ||
2198 | int rc; | ||
2199 | BtShared *pBt = p->pBt; | ||
2200 | |||
2201 | sqlite3BtreeEnter(p); | ||
2202 | assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); | ||
2203 | if( !pBt->autoVacuum ){ | ||
2204 | rc = SQLITE_DONE; | ||
2205 | }else{ | ||
2206 | invalidateAllOverflowCache(pBt); | ||
2207 | rc = incrVacuumStep(pBt, 0); | ||
2208 | } | ||
2209 | sqlite3BtreeLeave(p); | ||
2210 | return rc; | ||
2211 | } | ||
2212 | |||
2213 | /* | ||
2214 | ** This routine is called prior to sqlite3PagerCommit when a transaction | ||
2215 | ** is commited for an auto-vacuum database. | ||
2216 | ** | ||
2217 | ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages | ||
2218 | ** the database file should be truncated to during the commit process. | ||
2219 | ** i.e. the database has been reorganized so that only the first *pnTrunc | ||
2220 | ** pages are in use. | ||
2221 | */ | ||
2222 | static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){ | ||
2223 | int rc = SQLITE_OK; | ||
2224 | Pager *pPager = pBt->pPager; | ||
2225 | #ifndef NDEBUG | ||
2226 | int nRef = sqlite3PagerRefcount(pPager); | ||
2227 | #endif | ||
2228 | |||
2229 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
2230 | invalidateAllOverflowCache(pBt); | ||
2231 | assert(pBt->autoVacuum); | ||
2232 | if( !pBt->incrVacuum ){ | ||
2233 | Pgno nFin = 0; | ||
2234 | |||
2235 | if( pBt->nTrunc==0 ){ | ||
2236 | Pgno nFree; | ||
2237 | Pgno nPtrmap; | ||
2238 | const int pgsz = pBt->pageSize; | ||
2239 | Pgno nOrig = sqlite3PagerPagecount(pBt->pPager); | ||
2240 | |||
2241 | if( PTRMAP_ISPAGE(pBt, nOrig) ){ | ||
2242 | return SQLITE_CORRUPT_BKPT; | ||
2243 | } | ||
2244 | if( nOrig==PENDING_BYTE_PAGE(pBt) ){ | ||
2245 | nOrig--; | ||
2246 | } | ||
2247 | nFree = get4byte(&pBt->pPage1->aData[36]); | ||
2248 | nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5); | ||
2249 | nFin = nOrig - nFree - nPtrmap; | ||
2250 | if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){ | ||
2251 | nFin--; | ||
2252 | } | ||
2253 | while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ | ||
2254 | nFin--; | ||
2255 | } | ||
2256 | } | ||
2257 | |||
2258 | while( rc==SQLITE_OK ){ | ||
2259 | rc = incrVacuumStep(pBt, nFin); | ||
2260 | } | ||
2261 | if( rc==SQLITE_DONE ){ | ||
2262 | assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc); | ||
2263 | rc = SQLITE_OK; | ||
2264 | if( pBt->nTrunc ){ | ||
2265 | rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | ||
2266 | put4byte(&pBt->pPage1->aData[32], 0); | ||
2267 | put4byte(&pBt->pPage1->aData[36], 0); | ||
2268 | pBt->nTrunc = nFin; | ||
2269 | } | ||
2270 | } | ||
2271 | if( rc!=SQLITE_OK ){ | ||
2272 | sqlite3PagerRollback(pPager); | ||
2273 | } | ||
2274 | } | ||
2275 | |||
2276 | if( rc==SQLITE_OK ){ | ||
2277 | *pnTrunc = pBt->nTrunc; | ||
2278 | pBt->nTrunc = 0; | ||
2279 | } | ||
2280 | assert( nRef==sqlite3PagerRefcount(pPager) ); | ||
2281 | return rc; | ||
2282 | } | ||
2283 | |||
2284 | #endif | ||
2285 | |||
2286 | /* | ||
2287 | ** This routine does the first phase of a two-phase commit. This routine | ||
2288 | ** causes a rollback journal to be created (if it does not already exist) | ||
2289 | ** and populated with enough information so that if a power loss occurs | ||
2290 | ** the database can be restored to its original state by playing back | ||
2291 | ** the journal. Then the contents of the journal are flushed out to | ||
2292 | ** the disk. After the journal is safely on oxide, the changes to the | ||
2293 | ** database are written into the database file and flushed to oxide. | ||
2294 | ** At the end of this call, the rollback journal still exists on the | ||
2295 | ** disk and we are still holding all locks, so the transaction has not | ||
2296 | ** committed. See sqlite3BtreeCommit() for the second phase of the | ||
2297 | ** commit process. | ||
2298 | ** | ||
2299 | ** This call is a no-op if no write-transaction is currently active on pBt. | ||
2300 | ** | ||
2301 | ** Otherwise, sync the database file for the btree pBt. zMaster points to | ||
2302 | ** the name of a master journal file that should be written into the | ||
2303 | ** individual journal file, or is NULL, indicating no master journal file | ||
2304 | ** (single database transaction). | ||
2305 | ** | ||
2306 | ** When this is called, the master journal should already have been | ||
2307 | ** created, populated with this journal pointer and synced to disk. | ||
2308 | ** | ||
2309 | ** Once this is routine has returned, the only thing required to commit | ||
2310 | ** the write-transaction for this database file is to delete the journal. | ||
2311 | */ | ||
2312 | int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ | ||
2313 | int rc = SQLITE_OK; | ||
2314 | if( p->inTrans==TRANS_WRITE ){ | ||
2315 | BtShared *pBt = p->pBt; | ||
2316 | Pgno nTrunc = 0; | ||
2317 | sqlite3BtreeEnter(p); | ||
2318 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
2319 | if( pBt->autoVacuum ){ | ||
2320 | rc = autoVacuumCommit(pBt, &nTrunc); | ||
2321 | if( rc!=SQLITE_OK ){ | ||
2322 | sqlite3BtreeLeave(p); | ||
2323 | return rc; | ||
2324 | } | ||
2325 | } | ||
2326 | #endif | ||
2327 | rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc); | ||
2328 | sqlite3BtreeLeave(p); | ||
2329 | } | ||
2330 | return rc; | ||
2331 | } | ||
2332 | |||
2333 | /* | ||
2334 | ** Commit the transaction currently in progress. | ||
2335 | ** | ||
2336 | ** This routine implements the second phase of a 2-phase commit. The | ||
2337 | ** sqlite3BtreeSync() routine does the first phase and should be invoked | ||
2338 | ** prior to calling this routine. The sqlite3BtreeSync() routine did | ||
2339 | ** all the work of writing information out to disk and flushing the | ||
2340 | ** contents so that they are written onto the disk platter. All this | ||
2341 | ** routine has to do is delete or truncate the rollback journal | ||
2342 | ** (which causes the transaction to commit) and drop locks. | ||
2343 | ** | ||
2344 | ** This will release the write lock on the database file. If there | ||
2345 | ** are no active cursors, it also releases the read lock. | ||
2346 | */ | ||
2347 | int sqlite3BtreeCommitPhaseTwo(Btree *p){ | ||
2348 | BtShared *pBt = p->pBt; | ||
2349 | |||
2350 | sqlite3BtreeEnter(p); | ||
2351 | btreeIntegrity(p); | ||
2352 | |||
2353 | /* If the handle has a write-transaction open, commit the shared-btrees | ||
2354 | ** transaction and set the shared state to TRANS_READ. | ||
2355 | */ | ||
2356 | if( p->inTrans==TRANS_WRITE ){ | ||
2357 | int rc; | ||
2358 | assert( pBt->inTransaction==TRANS_WRITE ); | ||
2359 | assert( pBt->nTransaction>0 ); | ||
2360 | rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); | ||
2361 | if( rc!=SQLITE_OK ){ | ||
2362 | sqlite3BtreeLeave(p); | ||
2363 | return rc; | ||
2364 | } | ||
2365 | pBt->inTransaction = TRANS_READ; | ||
2366 | pBt->inStmt = 0; | ||
2367 | } | ||
2368 | unlockAllTables(p); | ||
2369 | |||
2370 | /* If the handle has any kind of transaction open, decrement the transaction | ||
2371 | ** count of the shared btree. If the transaction count reaches 0, set | ||
2372 | ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below | ||
2373 | ** will unlock the pager. | ||
2374 | */ | ||
2375 | if( p->inTrans!=TRANS_NONE ){ | ||
2376 | pBt->nTransaction--; | ||
2377 | if( 0==pBt->nTransaction ){ | ||
2378 | pBt->inTransaction = TRANS_NONE; | ||
2379 | } | ||
2380 | } | ||
2381 | |||
2382 | /* Set the handles current transaction state to TRANS_NONE and unlock | ||
2383 | ** the pager if this call closed the only read or write transaction. | ||
2384 | */ | ||
2385 | p->inTrans = TRANS_NONE; | ||
2386 | unlockBtreeIfUnused(pBt); | ||
2387 | |||
2388 | btreeIntegrity(p); | ||
2389 | sqlite3BtreeLeave(p); | ||
2390 | return SQLITE_OK; | ||
2391 | } | ||
2392 | |||
2393 | /* | ||
2394 | ** Do both phases of a commit. | ||
2395 | */ | ||
2396 | int sqlite3BtreeCommit(Btree *p){ | ||
2397 | int rc; | ||
2398 | sqlite3BtreeEnter(p); | ||
2399 | rc = sqlite3BtreeCommitPhaseOne(p, 0); | ||
2400 | if( rc==SQLITE_OK ){ | ||
2401 | rc = sqlite3BtreeCommitPhaseTwo(p); | ||
2402 | } | ||
2403 | sqlite3BtreeLeave(p); | ||
2404 | return rc; | ||
2405 | } | ||
2406 | |||
2407 | #ifndef NDEBUG | ||
2408 | /* | ||
2409 | ** Return the number of write-cursors open on this handle. This is for use | ||
2410 | ** in assert() expressions, so it is only compiled if NDEBUG is not | ||
2411 | ** defined. | ||
2412 | ** | ||
2413 | ** For the purposes of this routine, a write-cursor is any cursor that | ||
2414 | ** is capable of writing to the databse. That means the cursor was | ||
2415 | ** originally opened for writing and the cursor has not be disabled | ||
2416 | ** by having its state changed to CURSOR_FAULT. | ||
2417 | */ | ||
2418 | static int countWriteCursors(BtShared *pBt){ | ||
2419 | BtCursor *pCur; | ||
2420 | int r = 0; | ||
2421 | for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ | ||
2422 | if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++; | ||
2423 | } | ||
2424 | return r; | ||
2425 | } | ||
2426 | #endif | ||
2427 | |||
2428 | /* | ||
2429 | ** This routine sets the state to CURSOR_FAULT and the error | ||
2430 | ** code to errCode for every cursor on BtShared that pBtree | ||
2431 | ** references. | ||
2432 | ** | ||
2433 | ** Every cursor is tripped, including cursors that belong | ||
2434 | ** to other database connections that happen to be sharing | ||
2435 | ** the cache with pBtree. | ||
2436 | ** | ||
2437 | ** This routine gets called when a rollback occurs. | ||
2438 | ** All cursors using the same cache must be tripped | ||
2439 | ** to prevent them from trying to use the btree after | ||
2440 | ** the rollback. The rollback may have deleted tables | ||
2441 | ** or moved root pages, so it is not sufficient to | ||
2442 | ** save the state of the cursor. The cursor must be | ||
2443 | ** invalidated. | ||
2444 | */ | ||
2445 | void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){ | ||
2446 | BtCursor *p; | ||
2447 | sqlite3BtreeEnter(pBtree); | ||
2448 | for(p=pBtree->pBt->pCursor; p; p=p->pNext){ | ||
2449 | clearCursorPosition(p); | ||
2450 | p->eState = CURSOR_FAULT; | ||
2451 | p->skip = errCode; | ||
2452 | } | ||
2453 | sqlite3BtreeLeave(pBtree); | ||
2454 | } | ||
2455 | |||
2456 | /* | ||
2457 | ** Rollback the transaction in progress. All cursors will be | ||
2458 | ** invalided by this operation. Any attempt to use a cursor | ||
2459 | ** that was open at the beginning of this operation will result | ||
2460 | ** in an error. | ||
2461 | ** | ||
2462 | ** This will release the write lock on the database file. If there | ||
2463 | ** are no active cursors, it also releases the read lock. | ||
2464 | */ | ||
2465 | int sqlite3BtreeRollback(Btree *p){ | ||
2466 | int rc; | ||
2467 | BtShared *pBt = p->pBt; | ||
2468 | MemPage *pPage1; | ||
2469 | |||
2470 | sqlite3BtreeEnter(p); | ||
2471 | rc = saveAllCursors(pBt, 0, 0); | ||
2472 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
2473 | if( rc!=SQLITE_OK ){ | ||
2474 | /* This is a horrible situation. An IO or malloc() error occured whilst | ||
2475 | ** trying to save cursor positions. If this is an automatic rollback (as | ||
2476 | ** the result of a constraint, malloc() failure or IO error) then | ||
2477 | ** the cache may be internally inconsistent (not contain valid trees) so | ||
2478 | ** we cannot simply return the error to the caller. Instead, abort | ||
2479 | ** all queries that may be using any of the cursors that failed to save. | ||
2480 | */ | ||
2481 | sqlite3BtreeTripAllCursors(p, rc); | ||
2482 | } | ||
2483 | #endif | ||
2484 | btreeIntegrity(p); | ||
2485 | unlockAllTables(p); | ||
2486 | |||
2487 | if( p->inTrans==TRANS_WRITE ){ | ||
2488 | int rc2; | ||
2489 | |||
2490 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
2491 | pBt->nTrunc = 0; | ||
2492 | #endif | ||
2493 | |||
2494 | assert( TRANS_WRITE==pBt->inTransaction ); | ||
2495 | rc2 = sqlite3PagerRollback(pBt->pPager); | ||
2496 | if( rc2!=SQLITE_OK ){ | ||
2497 | rc = rc2; | ||
2498 | } | ||
2499 | |||
2500 | /* The rollback may have destroyed the pPage1->aData value. So | ||
2501 | ** call sqlite3BtreeGetPage() on page 1 again to make | ||
2502 | ** sure pPage1->aData is set correctly. */ | ||
2503 | if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ | ||
2504 | releasePage(pPage1); | ||
2505 | } | ||
2506 | assert( countWriteCursors(pBt)==0 ); | ||
2507 | pBt->inTransaction = TRANS_READ; | ||
2508 | } | ||
2509 | |||
2510 | if( p->inTrans!=TRANS_NONE ){ | ||
2511 | assert( pBt->nTransaction>0 ); | ||
2512 | pBt->nTransaction--; | ||
2513 | if( 0==pBt->nTransaction ){ | ||
2514 | pBt->inTransaction = TRANS_NONE; | ||
2515 | } | ||
2516 | } | ||
2517 | |||
2518 | p->inTrans = TRANS_NONE; | ||
2519 | pBt->inStmt = 0; | ||
2520 | unlockBtreeIfUnused(pBt); | ||
2521 | |||
2522 | btreeIntegrity(p); | ||
2523 | sqlite3BtreeLeave(p); | ||
2524 | return rc; | ||
2525 | } | ||
2526 | |||
2527 | /* | ||
2528 | ** Start a statement subtransaction. The subtransaction can | ||
2529 | ** can be rolled back independently of the main transaction. | ||
2530 | ** You must start a transaction before starting a subtransaction. | ||
2531 | ** The subtransaction is ended automatically if the main transaction | ||
2532 | ** commits or rolls back. | ||
2533 | ** | ||
2534 | ** Only one subtransaction may be active at a time. It is an error to try | ||
2535 | ** to start a new subtransaction if another subtransaction is already active. | ||
2536 | ** | ||
2537 | ** Statement subtransactions are used around individual SQL statements | ||
2538 | ** that are contained within a BEGIN...COMMIT block. If a constraint | ||
2539 | ** error occurs within the statement, the effect of that one statement | ||
2540 | ** can be rolled back without having to rollback the entire transaction. | ||
2541 | */ | ||
2542 | int sqlite3BtreeBeginStmt(Btree *p){ | ||
2543 | int rc; | ||
2544 | BtShared *pBt = p->pBt; | ||
2545 | sqlite3BtreeEnter(p); | ||
2546 | if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){ | ||
2547 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
2548 | }else{ | ||
2549 | assert( pBt->inTransaction==TRANS_WRITE ); | ||
2550 | rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager); | ||
2551 | pBt->inStmt = 1; | ||
2552 | } | ||
2553 | sqlite3BtreeLeave(p); | ||
2554 | return rc; | ||
2555 | } | ||
2556 | |||
2557 | |||
2558 | /* | ||
2559 | ** Commit the statment subtransaction currently in progress. If no | ||
2560 | ** subtransaction is active, this is a no-op. | ||
2561 | */ | ||
2562 | int sqlite3BtreeCommitStmt(Btree *p){ | ||
2563 | int rc; | ||
2564 | BtShared *pBt = p->pBt; | ||
2565 | sqlite3BtreeEnter(p); | ||
2566 | if( pBt->inStmt && !pBt->readOnly ){ | ||
2567 | rc = sqlite3PagerStmtCommit(pBt->pPager); | ||
2568 | }else{ | ||
2569 | rc = SQLITE_OK; | ||
2570 | } | ||
2571 | pBt->inStmt = 0; | ||
2572 | sqlite3BtreeLeave(p); | ||
2573 | return rc; | ||
2574 | } | ||
2575 | |||
2576 | /* | ||
2577 | ** Rollback the active statement subtransaction. If no subtransaction | ||
2578 | ** is active this routine is a no-op. | ||
2579 | ** | ||
2580 | ** All cursors will be invalidated by this operation. Any attempt | ||
2581 | ** to use a cursor that was open at the beginning of this operation | ||
2582 | ** will result in an error. | ||
2583 | */ | ||
2584 | int sqlite3BtreeRollbackStmt(Btree *p){ | ||
2585 | int rc = SQLITE_OK; | ||
2586 | BtShared *pBt = p->pBt; | ||
2587 | sqlite3BtreeEnter(p); | ||
2588 | if( pBt->inStmt && !pBt->readOnly ){ | ||
2589 | rc = sqlite3PagerStmtRollback(pBt->pPager); | ||
2590 | assert( countWriteCursors(pBt)==0 ); | ||
2591 | pBt->inStmt = 0; | ||
2592 | } | ||
2593 | sqlite3BtreeLeave(p); | ||
2594 | return rc; | ||
2595 | } | ||
2596 | |||
2597 | /* | ||
2598 | ** Default key comparison function to be used if no comparison function | ||
2599 | ** is specified on the sqlite3BtreeCursor() call. | ||
2600 | */ | ||
2601 | static int dfltCompare( | ||
2602 | void *NotUsed, /* User data is not used */ | ||
2603 | int n1, const void *p1, /* First key to compare */ | ||
2604 | int n2, const void *p2 /* Second key to compare */ | ||
2605 | ){ | ||
2606 | int c; | ||
2607 | c = memcmp(p1, p2, n1<n2 ? n1 : n2); | ||
2608 | if( c==0 ){ | ||
2609 | c = n1 - n2; | ||
2610 | } | ||
2611 | return c; | ||
2612 | } | ||
2613 | |||
2614 | /* | ||
2615 | ** Create a new cursor for the BTree whose root is on the page | ||
2616 | ** iTable. The act of acquiring a cursor gets a read lock on | ||
2617 | ** the database file. | ||
2618 | ** | ||
2619 | ** If wrFlag==0, then the cursor can only be used for reading. | ||
2620 | ** If wrFlag==1, then the cursor can be used for reading or for | ||
2621 | ** writing if other conditions for writing are also met. These | ||
2622 | ** are the conditions that must be met in order for writing to | ||
2623 | ** be allowed: | ||
2624 | ** | ||
2625 | ** 1: The cursor must have been opened with wrFlag==1 | ||
2626 | ** | ||
2627 | ** 2: Other database connections that share the same pager cache | ||
2628 | ** but which are not in the READ_UNCOMMITTED state may not have | ||
2629 | ** cursors open with wrFlag==0 on the same table. Otherwise | ||
2630 | ** the changes made by this write cursor would be visible to | ||
2631 | ** the read cursors in the other database connection. | ||
2632 | ** | ||
2633 | ** 3: The database must be writable (not on read-only media) | ||
2634 | ** | ||
2635 | ** 4: There must be an active transaction. | ||
2636 | ** | ||
2637 | ** No checking is done to make sure that page iTable really is the | ||
2638 | ** root page of a b-tree. If it is not, then the cursor acquired | ||
2639 | ** will not work correctly. | ||
2640 | ** | ||
2641 | ** The comparison function must be logically the same for every cursor | ||
2642 | ** on a particular table. Changing the comparison function will result | ||
2643 | ** in incorrect operations. If the comparison function is NULL, a | ||
2644 | ** default comparison function is used. The comparison function is | ||
2645 | ** always ignored for INTKEY tables. | ||
2646 | */ | ||
2647 | static int btreeCursor( | ||
2648 | Btree *p, /* The btree */ | ||
2649 | int iTable, /* Root page of table to open */ | ||
2650 | int wrFlag, /* 1 to write. 0 read-only */ | ||
2651 | int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */ | ||
2652 | void *pArg, /* First arg to xCompare() */ | ||
2653 | BtCursor **ppCur /* Write new cursor here */ | ||
2654 | ){ | ||
2655 | int rc; | ||
2656 | BtCursor *pCur; | ||
2657 | BtShared *pBt = p->pBt; | ||
2658 | |||
2659 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
2660 | *ppCur = 0; | ||
2661 | if( wrFlag ){ | ||
2662 | if( pBt->readOnly ){ | ||
2663 | return SQLITE_READONLY; | ||
2664 | } | ||
2665 | if( checkReadLocks(p, iTable, 0) ){ | ||
2666 | return SQLITE_LOCKED; | ||
2667 | } | ||
2668 | } | ||
2669 | |||
2670 | if( pBt->pPage1==0 ){ | ||
2671 | rc = lockBtreeWithRetry(p); | ||
2672 | if( rc!=SQLITE_OK ){ | ||
2673 | return rc; | ||
2674 | } | ||
2675 | if( pBt->readOnly && wrFlag ){ | ||
2676 | return SQLITE_READONLY; | ||
2677 | } | ||
2678 | } | ||
2679 | pCur = sqlite3MallocZero( sizeof(*pCur) ); | ||
2680 | if( pCur==0 ){ | ||
2681 | rc = SQLITE_NOMEM; | ||
2682 | goto create_cursor_exception; | ||
2683 | } | ||
2684 | pCur->pgnoRoot = (Pgno)iTable; | ||
2685 | if( iTable==1 && sqlite3PagerPagecount(pBt->pPager)==0 ){ | ||
2686 | rc = SQLITE_EMPTY; | ||
2687 | goto create_cursor_exception; | ||
2688 | } | ||
2689 | rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0); | ||
2690 | if( rc!=SQLITE_OK ){ | ||
2691 | goto create_cursor_exception; | ||
2692 | } | ||
2693 | |||
2694 | /* Now that no other errors can occur, finish filling in the BtCursor | ||
2695 | ** variables, link the cursor into the BtShared list and set *ppCur (the | ||
2696 | ** output argument to this function). | ||
2697 | */ | ||
2698 | pCur->xCompare = xCmp ? xCmp : dfltCompare; | ||
2699 | pCur->pArg = pArg; | ||
2700 | pCur->pBtree = p; | ||
2701 | pCur->pBt = pBt; | ||
2702 | pCur->wrFlag = wrFlag; | ||
2703 | pCur->pNext = pBt->pCursor; | ||
2704 | if( pCur->pNext ){ | ||
2705 | pCur->pNext->pPrev = pCur; | ||
2706 | } | ||
2707 | pBt->pCursor = pCur; | ||
2708 | pCur->eState = CURSOR_INVALID; | ||
2709 | *ppCur = pCur; | ||
2710 | |||
2711 | return SQLITE_OK; | ||
2712 | |||
2713 | create_cursor_exception: | ||
2714 | if( pCur ){ | ||
2715 | releasePage(pCur->pPage); | ||
2716 | sqlite3_free(pCur); | ||
2717 | } | ||
2718 | unlockBtreeIfUnused(pBt); | ||
2719 | return rc; | ||
2720 | } | ||
2721 | int sqlite3BtreeCursor( | ||
2722 | Btree *p, /* The btree */ | ||
2723 | int iTable, /* Root page of table to open */ | ||
2724 | int wrFlag, /* 1 to write. 0 read-only */ | ||
2725 | int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */ | ||
2726 | void *pArg, /* First arg to xCompare() */ | ||
2727 | BtCursor **ppCur /* Write new cursor here */ | ||
2728 | ){ | ||
2729 | int rc; | ||
2730 | sqlite3BtreeEnter(p); | ||
2731 | rc = btreeCursor(p, iTable, wrFlag, xCmp, pArg, ppCur); | ||
2732 | sqlite3BtreeLeave(p); | ||
2733 | return rc; | ||
2734 | } | ||
2735 | |||
2736 | |||
2737 | /* | ||
2738 | ** Close a cursor. The read lock on the database file is released | ||
2739 | ** when the last cursor is closed. | ||
2740 | */ | ||
2741 | int sqlite3BtreeCloseCursor(BtCursor *pCur){ | ||
2742 | BtShared *pBt = pCur->pBt; | ||
2743 | Btree *pBtree = pCur->pBtree; | ||
2744 | |||
2745 | sqlite3BtreeEnter(pBtree); | ||
2746 | clearCursorPosition(pCur); | ||
2747 | if( pCur->pPrev ){ | ||
2748 | pCur->pPrev->pNext = pCur->pNext; | ||
2749 | }else{ | ||
2750 | pBt->pCursor = pCur->pNext; | ||
2751 | } | ||
2752 | if( pCur->pNext ){ | ||
2753 | pCur->pNext->pPrev = pCur->pPrev; | ||
2754 | } | ||
2755 | releasePage(pCur->pPage); | ||
2756 | unlockBtreeIfUnused(pBt); | ||
2757 | invalidateOverflowCache(pCur); | ||
2758 | sqlite3_free(pCur); | ||
2759 | sqlite3BtreeLeave(pBtree); | ||
2760 | return SQLITE_OK; | ||
2761 | } | ||
2762 | |||
2763 | /* | ||
2764 | ** Make a temporary cursor by filling in the fields of pTempCur. | ||
2765 | ** The temporary cursor is not on the cursor list for the Btree. | ||
2766 | */ | ||
2767 | void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){ | ||
2768 | assert( cursorHoldsMutex(pCur) ); | ||
2769 | memcpy(pTempCur, pCur, sizeof(*pCur)); | ||
2770 | pTempCur->pNext = 0; | ||
2771 | pTempCur->pPrev = 0; | ||
2772 | if( pTempCur->pPage ){ | ||
2773 | sqlite3PagerRef(pTempCur->pPage->pDbPage); | ||
2774 | } | ||
2775 | } | ||
2776 | |||
2777 | /* | ||
2778 | ** Delete a temporary cursor such as was made by the CreateTemporaryCursor() | ||
2779 | ** function above. | ||
2780 | */ | ||
2781 | void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){ | ||
2782 | assert( cursorHoldsMutex(pCur) ); | ||
2783 | if( pCur->pPage ){ | ||
2784 | sqlite3PagerUnref(pCur->pPage->pDbPage); | ||
2785 | } | ||
2786 | } | ||
2787 | |||
2788 | /* | ||
2789 | ** Make sure the BtCursor* given in the argument has a valid | ||
2790 | ** BtCursor.info structure. If it is not already valid, call | ||
2791 | ** sqlite3BtreeParseCell() to fill it in. | ||
2792 | ** | ||
2793 | ** BtCursor.info is a cache of the information in the current cell. | ||
2794 | ** Using this cache reduces the number of calls to sqlite3BtreeParseCell(). | ||
2795 | ** | ||
2796 | ** 2007-06-25: There is a bug in some versions of MSVC that cause the | ||
2797 | ** compiler to crash when getCellInfo() is implemented as a macro. | ||
2798 | ** But there is a measureable speed advantage to using the macro on gcc | ||
2799 | ** (when less compiler optimizations like -Os or -O0 are used and the | ||
2800 | ** compiler is not doing agressive inlining.) So we use a real function | ||
2801 | ** for MSVC and a macro for everything else. Ticket #2457. | ||
2802 | */ | ||
2803 | #ifndef NDEBUG | ||
2804 | static void assertCellInfo(BtCursor *pCur){ | ||
2805 | CellInfo info; | ||
2806 | memset(&info, 0, sizeof(info)); | ||
2807 | sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info); | ||
2808 | assert( memcmp(&info, &pCur->info, sizeof(info))==0 ); | ||
2809 | } | ||
2810 | #else | ||
2811 | #define assertCellInfo(x) | ||
2812 | #endif | ||
2813 | #ifdef _MSC_VER | ||
2814 | /* Use a real function in MSVC to work around bugs in that compiler. */ | ||
2815 | static void getCellInfo(BtCursor *pCur){ | ||
2816 | if( pCur->info.nSize==0 ){ | ||
2817 | sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info); | ||
2818 | }else{ | ||
2819 | assertCellInfo(pCur); | ||
2820 | } | ||
2821 | } | ||
2822 | #else /* if not _MSC_VER */ | ||
2823 | /* Use a macro in all other compilers so that the function is inlined */ | ||
2824 | #define getCellInfo(pCur) \ | ||
2825 | if( pCur->info.nSize==0 ){ \ | ||
2826 | sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info); \ | ||
2827 | }else{ \ | ||
2828 | assertCellInfo(pCur); \ | ||
2829 | } | ||
2830 | #endif /* _MSC_VER */ | ||
2831 | |||
2832 | /* | ||
2833 | ** Set *pSize to the size of the buffer needed to hold the value of | ||
2834 | ** the key for the current entry. If the cursor is not pointing | ||
2835 | ** to a valid entry, *pSize is set to 0. | ||
2836 | ** | ||
2837 | ** For a table with the INTKEY flag set, this routine returns the key | ||
2838 | ** itself, not the number of bytes in the key. | ||
2839 | */ | ||
2840 | int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ | ||
2841 | int rc; | ||
2842 | |||
2843 | assert( cursorHoldsMutex(pCur) ); | ||
2844 | rc = restoreOrClearCursorPosition(pCur); | ||
2845 | if( rc==SQLITE_OK ){ | ||
2846 | assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); | ||
2847 | if( pCur->eState==CURSOR_INVALID ){ | ||
2848 | *pSize = 0; | ||
2849 | }else{ | ||
2850 | getCellInfo(pCur); | ||
2851 | *pSize = pCur->info.nKey; | ||
2852 | } | ||
2853 | } | ||
2854 | return rc; | ||
2855 | } | ||
2856 | |||
2857 | /* | ||
2858 | ** Set *pSize to the number of bytes of data in the entry the | ||
2859 | ** cursor currently points to. Always return SQLITE_OK. | ||
2860 | ** Failure is not possible. If the cursor is not currently | ||
2861 | ** pointing to an entry (which can happen, for example, if | ||
2862 | ** the database is empty) then *pSize is set to 0. | ||
2863 | */ | ||
2864 | int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ | ||
2865 | int rc; | ||
2866 | |||
2867 | assert( cursorHoldsMutex(pCur) ); | ||
2868 | rc = restoreOrClearCursorPosition(pCur); | ||
2869 | if( rc==SQLITE_OK ){ | ||
2870 | assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); | ||
2871 | if( pCur->eState==CURSOR_INVALID ){ | ||
2872 | /* Not pointing at a valid entry - set *pSize to 0. */ | ||
2873 | *pSize = 0; | ||
2874 | }else{ | ||
2875 | getCellInfo(pCur); | ||
2876 | *pSize = pCur->info.nData; | ||
2877 | } | ||
2878 | } | ||
2879 | return rc; | ||
2880 | } | ||
2881 | |||
2882 | /* | ||
2883 | ** Given the page number of an overflow page in the database (parameter | ||
2884 | ** ovfl), this function finds the page number of the next page in the | ||
2885 | ** linked list of overflow pages. If possible, it uses the auto-vacuum | ||
2886 | ** pointer-map data instead of reading the content of page ovfl to do so. | ||
2887 | ** | ||
2888 | ** If an error occurs an SQLite error code is returned. Otherwise: | ||
2889 | ** | ||
2890 | ** Unless pPgnoNext is NULL, the page number of the next overflow | ||
2891 | ** page in the linked list is written to *pPgnoNext. If page ovfl | ||
2892 | ** is the last page in it's linked list, *pPgnoNext is set to zero. | ||
2893 | ** | ||
2894 | ** If ppPage is not NULL, *ppPage is set to the MemPage* handle | ||
2895 | ** for page ovfl. The underlying pager page may have been requested | ||
2896 | ** with the noContent flag set, so the page data accessable via | ||
2897 | ** this handle may not be trusted. | ||
2898 | */ | ||
2899 | static int getOverflowPage( | ||
2900 | BtShared *pBt, | ||
2901 | Pgno ovfl, /* Overflow page */ | ||
2902 | MemPage **ppPage, /* OUT: MemPage handle */ | ||
2903 | Pgno *pPgnoNext /* OUT: Next overflow page number */ | ||
2904 | ){ | ||
2905 | Pgno next = 0; | ||
2906 | int rc; | ||
2907 | |||
2908 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
2909 | /* One of these must not be NULL. Otherwise, why call this function? */ | ||
2910 | assert(ppPage || pPgnoNext); | ||
2911 | |||
2912 | /* If pPgnoNext is NULL, then this function is being called to obtain | ||
2913 | ** a MemPage* reference only. No page-data is required in this case. | ||
2914 | */ | ||
2915 | if( !pPgnoNext ){ | ||
2916 | return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1); | ||
2917 | } | ||
2918 | |||
2919 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
2920 | /* Try to find the next page in the overflow list using the | ||
2921 | ** autovacuum pointer-map pages. Guess that the next page in | ||
2922 | ** the overflow list is page number (ovfl+1). If that guess turns | ||
2923 | ** out to be wrong, fall back to loading the data of page | ||
2924 | ** number ovfl to determine the next page number. | ||
2925 | */ | ||
2926 | if( pBt->autoVacuum ){ | ||
2927 | Pgno pgno; | ||
2928 | Pgno iGuess = ovfl+1; | ||
2929 | u8 eType; | ||
2930 | |||
2931 | while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ | ||
2932 | iGuess++; | ||
2933 | } | ||
2934 | |||
2935 | if( iGuess<=sqlite3PagerPagecount(pBt->pPager) ){ | ||
2936 | rc = ptrmapGet(pBt, iGuess, &eType, &pgno); | ||
2937 | if( rc!=SQLITE_OK ){ | ||
2938 | return rc; | ||
2939 | } | ||
2940 | if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ | ||
2941 | next = iGuess; | ||
2942 | } | ||
2943 | } | ||
2944 | } | ||
2945 | #endif | ||
2946 | |||
2947 | if( next==0 || ppPage ){ | ||
2948 | MemPage *pPage = 0; | ||
2949 | |||
2950 | rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0); | ||
2951 | assert(rc==SQLITE_OK || pPage==0); | ||
2952 | if( next==0 && rc==SQLITE_OK ){ | ||
2953 | next = get4byte(pPage->aData); | ||
2954 | } | ||
2955 | |||
2956 | if( ppPage ){ | ||
2957 | *ppPage = pPage; | ||
2958 | }else{ | ||
2959 | releasePage(pPage); | ||
2960 | } | ||
2961 | } | ||
2962 | *pPgnoNext = next; | ||
2963 | |||
2964 | return rc; | ||
2965 | } | ||
2966 | |||
2967 | /* | ||
2968 | ** Copy data from a buffer to a page, or from a page to a buffer. | ||
2969 | ** | ||
2970 | ** pPayload is a pointer to data stored on database page pDbPage. | ||
2971 | ** If argument eOp is false, then nByte bytes of data are copied | ||
2972 | ** from pPayload to the buffer pointed at by pBuf. If eOp is true, | ||
2973 | ** then sqlite3PagerWrite() is called on pDbPage and nByte bytes | ||
2974 | ** of data are copied from the buffer pBuf to pPayload. | ||
2975 | ** | ||
2976 | ** SQLITE_OK is returned on success, otherwise an error code. | ||
2977 | */ | ||
2978 | static int copyPayload( | ||
2979 | void *pPayload, /* Pointer to page data */ | ||
2980 | void *pBuf, /* Pointer to buffer */ | ||
2981 | int nByte, /* Number of bytes to copy */ | ||
2982 | int eOp, /* 0 -> copy from page, 1 -> copy to page */ | ||
2983 | DbPage *pDbPage /* Page containing pPayload */ | ||
2984 | ){ | ||
2985 | if( eOp ){ | ||
2986 | /* Copy data from buffer to page (a write operation) */ | ||
2987 | int rc = sqlite3PagerWrite(pDbPage); | ||
2988 | if( rc!=SQLITE_OK ){ | ||
2989 | return rc; | ||
2990 | } | ||
2991 | memcpy(pPayload, pBuf, nByte); | ||
2992 | }else{ | ||
2993 | /* Copy data from page to buffer (a read operation) */ | ||
2994 | memcpy(pBuf, pPayload, nByte); | ||
2995 | } | ||
2996 | return SQLITE_OK; | ||
2997 | } | ||
2998 | |||
2999 | /* | ||
3000 | ** This function is used to read or overwrite payload information | ||
3001 | ** for the entry that the pCur cursor is pointing to. If the eOp | ||
3002 | ** parameter is 0, this is a read operation (data copied into | ||
3003 | ** buffer pBuf). If it is non-zero, a write (data copied from | ||
3004 | ** buffer pBuf). | ||
3005 | ** | ||
3006 | ** A total of "amt" bytes are read or written beginning at "offset". | ||
3007 | ** Data is read to or from the buffer pBuf. | ||
3008 | ** | ||
3009 | ** This routine does not make a distinction between key and data. | ||
3010 | ** It just reads or writes bytes from the payload area. Data might | ||
3011 | ** appear on the main page or be scattered out on multiple overflow | ||
3012 | ** pages. | ||
3013 | ** | ||
3014 | ** If the BtCursor.isIncrblobHandle flag is set, and the current | ||
3015 | ** cursor entry uses one or more overflow pages, this function | ||
3016 | ** allocates space for and lazily popluates the overflow page-list | ||
3017 | ** cache array (BtCursor.aOverflow). Subsequent calls use this | ||
3018 | ** cache to make seeking to the supplied offset more efficient. | ||
3019 | ** | ||
3020 | ** Once an overflow page-list cache has been allocated, it may be | ||
3021 | ** invalidated if some other cursor writes to the same table, or if | ||
3022 | ** the cursor is moved to a different row. Additionally, in auto-vacuum | ||
3023 | ** mode, the following events may invalidate an overflow page-list cache. | ||
3024 | ** | ||
3025 | ** * An incremental vacuum, | ||
3026 | ** * A commit in auto_vacuum="full" mode, | ||
3027 | ** * Creating a table (may require moving an overflow page). | ||
3028 | */ | ||
3029 | static int accessPayload( | ||
3030 | BtCursor *pCur, /* Cursor pointing to entry to read from */ | ||
3031 | int offset, /* Begin reading this far into payload */ | ||
3032 | int amt, /* Read this many bytes */ | ||
3033 | unsigned char *pBuf, /* Write the bytes into this buffer */ | ||
3034 | int skipKey, /* offset begins at data if this is true */ | ||
3035 | int eOp /* zero to read. non-zero to write. */ | ||
3036 | ){ | ||
3037 | unsigned char *aPayload; | ||
3038 | int rc = SQLITE_OK; | ||
3039 | u32 nKey; | ||
3040 | int iIdx = 0; | ||
3041 | MemPage *pPage = pCur->pPage; /* Btree page of current cursor entry */ | ||
3042 | BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ | ||
3043 | |||
3044 | assert( pPage ); | ||
3045 | assert( pCur->eState==CURSOR_VALID ); | ||
3046 | assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); | ||
3047 | assert( offset>=0 ); | ||
3048 | assert( cursorHoldsMutex(pCur) ); | ||
3049 | |||
3050 | getCellInfo(pCur); | ||
3051 | aPayload = pCur->info.pCell + pCur->info.nHeader; | ||
3052 | nKey = (pPage->intKey ? 0 : pCur->info.nKey); | ||
3053 | |||
3054 | if( skipKey ){ | ||
3055 | offset += nKey; | ||
3056 | } | ||
3057 | if( offset+amt > nKey+pCur->info.nData ){ | ||
3058 | /* Trying to read or write past the end of the data is an error */ | ||
3059 | return SQLITE_ERROR; | ||
3060 | } | ||
3061 | |||
3062 | /* Check if data must be read/written to/from the btree page itself. */ | ||
3063 | if( offset<pCur->info.nLocal ){ | ||
3064 | int a = amt; | ||
3065 | if( a+offset>pCur->info.nLocal ){ | ||
3066 | a = pCur->info.nLocal - offset; | ||
3067 | } | ||
3068 | rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); | ||
3069 | offset = 0; | ||
3070 | pBuf += a; | ||
3071 | amt -= a; | ||
3072 | }else{ | ||
3073 | offset -= pCur->info.nLocal; | ||
3074 | } | ||
3075 | |||
3076 | if( rc==SQLITE_OK && amt>0 ){ | ||
3077 | const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ | ||
3078 | Pgno nextPage; | ||
3079 | |||
3080 | nextPage = get4byte(&aPayload[pCur->info.nLocal]); | ||
3081 | |||
3082 | #ifndef SQLITE_OMIT_INCRBLOB | ||
3083 | /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[] | ||
3084 | ** has not been allocated, allocate it now. The array is sized at | ||
3085 | ** one entry for each overflow page in the overflow chain. The | ||
3086 | ** page number of the first overflow page is stored in aOverflow[0], | ||
3087 | ** etc. A value of 0 in the aOverflow[] array means "not yet known" | ||
3088 | ** (the cache is lazily populated). | ||
3089 | */ | ||
3090 | if( pCur->isIncrblobHandle && !pCur->aOverflow ){ | ||
3091 | int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; | ||
3092 | pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl); | ||
3093 | if( nOvfl && !pCur->aOverflow ){ | ||
3094 | rc = SQLITE_NOMEM; | ||
3095 | } | ||
3096 | } | ||
3097 | |||
3098 | /* If the overflow page-list cache has been allocated and the | ||
3099 | ** entry for the first required overflow page is valid, skip | ||
3100 | ** directly to it. | ||
3101 | */ | ||
3102 | if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){ | ||
3103 | iIdx = (offset/ovflSize); | ||
3104 | nextPage = pCur->aOverflow[iIdx]; | ||
3105 | offset = (offset%ovflSize); | ||
3106 | } | ||
3107 | #endif | ||
3108 | |||
3109 | for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ | ||
3110 | |||
3111 | #ifndef SQLITE_OMIT_INCRBLOB | ||
3112 | /* If required, populate the overflow page-list cache. */ | ||
3113 | if( pCur->aOverflow ){ | ||
3114 | assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); | ||
3115 | pCur->aOverflow[iIdx] = nextPage; | ||
3116 | } | ||
3117 | #endif | ||
3118 | |||
3119 | if( offset>=ovflSize ){ | ||
3120 | /* The only reason to read this page is to obtain the page | ||
3121 | ** number for the next page in the overflow chain. The page | ||
3122 | ** data is not required. So first try to lookup the overflow | ||
3123 | ** page-list cache, if any, then fall back to the getOverflowPage() | ||
3124 | ** function. | ||
3125 | */ | ||
3126 | #ifndef SQLITE_OMIT_INCRBLOB | ||
3127 | if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){ | ||
3128 | nextPage = pCur->aOverflow[iIdx+1]; | ||
3129 | } else | ||
3130 | #endif | ||
3131 | rc = getOverflowPage(pBt, nextPage, 0, &nextPage); | ||
3132 | offset -= ovflSize; | ||
3133 | }else{ | ||
3134 | /* Need to read this page properly. It contains some of the | ||
3135 | ** range of data that is being read (eOp==0) or written (eOp!=0). | ||
3136 | */ | ||
3137 | DbPage *pDbPage; | ||
3138 | int a = amt; | ||
3139 | rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage); | ||
3140 | if( rc==SQLITE_OK ){ | ||
3141 | aPayload = sqlite3PagerGetData(pDbPage); | ||
3142 | nextPage = get4byte(aPayload); | ||
3143 | if( a + offset > ovflSize ){ | ||
3144 | a = ovflSize - offset; | ||
3145 | } | ||
3146 | rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); | ||
3147 | sqlite3PagerUnref(pDbPage); | ||
3148 | offset = 0; | ||
3149 | amt -= a; | ||
3150 | pBuf += a; | ||
3151 | } | ||
3152 | } | ||
3153 | } | ||
3154 | } | ||
3155 | |||
3156 | if( rc==SQLITE_OK && amt>0 ){ | ||
3157 | return SQLITE_CORRUPT_BKPT; | ||
3158 | } | ||
3159 | return rc; | ||
3160 | } | ||
3161 | |||
3162 | /* | ||
3163 | ** Read part of the key associated with cursor pCur. Exactly | ||
3164 | ** "amt" bytes will be transfered into pBuf[]. The transfer | ||
3165 | ** begins at "offset". | ||
3166 | ** | ||
3167 | ** Return SQLITE_OK on success or an error code if anything goes | ||
3168 | ** wrong. An error is returned if "offset+amt" is larger than | ||
3169 | ** the available payload. | ||
3170 | */ | ||
3171 | int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ | ||
3172 | int rc; | ||
3173 | |||
3174 | assert( cursorHoldsMutex(pCur) ); | ||
3175 | rc = restoreOrClearCursorPosition(pCur); | ||
3176 | if( rc==SQLITE_OK ){ | ||
3177 | assert( pCur->eState==CURSOR_VALID ); | ||
3178 | assert( pCur->pPage!=0 ); | ||
3179 | if( pCur->pPage->intKey ){ | ||
3180 | return SQLITE_CORRUPT_BKPT; | ||
3181 | } | ||
3182 | assert( pCur->pPage->intKey==0 ); | ||
3183 | assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell ); | ||
3184 | rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0); | ||
3185 | } | ||
3186 | return rc; | ||
3187 | } | ||
3188 | |||
3189 | /* | ||
3190 | ** Read part of the data associated with cursor pCur. Exactly | ||
3191 | ** "amt" bytes will be transfered into pBuf[]. The transfer | ||
3192 | ** begins at "offset". | ||
3193 | ** | ||
3194 | ** Return SQLITE_OK on success or an error code if anything goes | ||
3195 | ** wrong. An error is returned if "offset+amt" is larger than | ||
3196 | ** the available payload. | ||
3197 | */ | ||
3198 | int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ | ||
3199 | int rc; | ||
3200 | |||
3201 | assert( cursorHoldsMutex(pCur) ); | ||
3202 | rc = restoreOrClearCursorPosition(pCur); | ||
3203 | if( rc==SQLITE_OK ){ | ||
3204 | assert( pCur->eState==CURSOR_VALID ); | ||
3205 | assert( pCur->pPage!=0 ); | ||
3206 | assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell ); | ||
3207 | rc = accessPayload(pCur, offset, amt, pBuf, 1, 0); | ||
3208 | } | ||
3209 | return rc; | ||
3210 | } | ||
3211 | |||
3212 | /* | ||
3213 | ** Return a pointer to payload information from the entry that the | ||
3214 | ** pCur cursor is pointing to. The pointer is to the beginning of | ||
3215 | ** the key if skipKey==0 and it points to the beginning of data if | ||
3216 | ** skipKey==1. The number of bytes of available key/data is written | ||
3217 | ** into *pAmt. If *pAmt==0, then the value returned will not be | ||
3218 | ** a valid pointer. | ||
3219 | ** | ||
3220 | ** This routine is an optimization. It is common for the entire key | ||
3221 | ** and data to fit on the local page and for there to be no overflow | ||
3222 | ** pages. When that is so, this routine can be used to access the | ||
3223 | ** key and data without making a copy. If the key and/or data spills | ||
3224 | ** onto overflow pages, then accessPayload() must be used to reassembly | ||
3225 | ** the key/data and copy it into a preallocated buffer. | ||
3226 | ** | ||
3227 | ** The pointer returned by this routine looks directly into the cached | ||
3228 | ** page of the database. The data might change or move the next time | ||
3229 | ** any btree routine is called. | ||
3230 | */ | ||
3231 | static const unsigned char *fetchPayload( | ||
3232 | BtCursor *pCur, /* Cursor pointing to entry to read from */ | ||
3233 | int *pAmt, /* Write the number of available bytes here */ | ||
3234 | int skipKey /* read beginning at data if this is true */ | ||
3235 | ){ | ||
3236 | unsigned char *aPayload; | ||
3237 | MemPage *pPage; | ||
3238 | u32 nKey; | ||
3239 | int nLocal; | ||
3240 | |||
3241 | assert( pCur!=0 && pCur->pPage!=0 ); | ||
3242 | assert( pCur->eState==CURSOR_VALID ); | ||
3243 | assert( cursorHoldsMutex(pCur) ); | ||
3244 | pPage = pCur->pPage; | ||
3245 | assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); | ||
3246 | getCellInfo(pCur); | ||
3247 | aPayload = pCur->info.pCell; | ||
3248 | aPayload += pCur->info.nHeader; | ||
3249 | if( pPage->intKey ){ | ||
3250 | nKey = 0; | ||
3251 | }else{ | ||
3252 | nKey = pCur->info.nKey; | ||
3253 | } | ||
3254 | if( skipKey ){ | ||
3255 | aPayload += nKey; | ||
3256 | nLocal = pCur->info.nLocal - nKey; | ||
3257 | }else{ | ||
3258 | nLocal = pCur->info.nLocal; | ||
3259 | if( nLocal>nKey ){ | ||
3260 | nLocal = nKey; | ||
3261 | } | ||
3262 | } | ||
3263 | *pAmt = nLocal; | ||
3264 | return aPayload; | ||
3265 | } | ||
3266 | |||
3267 | |||
3268 | /* | ||
3269 | ** For the entry that cursor pCur is point to, return as | ||
3270 | ** many bytes of the key or data as are available on the local | ||
3271 | ** b-tree page. Write the number of available bytes into *pAmt. | ||
3272 | ** | ||
3273 | ** The pointer returned is ephemeral. The key/data may move | ||
3274 | ** or be destroyed on the next call to any Btree routine, | ||
3275 | ** including calls from other threads against the same cache. | ||
3276 | ** Hence, a mutex on the BtShared should be held prior to calling | ||
3277 | ** this routine. | ||
3278 | ** | ||
3279 | ** These routines is used to get quick access to key and data | ||
3280 | ** in the common case where no overflow pages are used. | ||
3281 | */ | ||
3282 | const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){ | ||
3283 | assert( cursorHoldsMutex(pCur) ); | ||
3284 | if( pCur->eState==CURSOR_VALID ){ | ||
3285 | return (const void*)fetchPayload(pCur, pAmt, 0); | ||
3286 | } | ||
3287 | return 0; | ||
3288 | } | ||
3289 | const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){ | ||
3290 | assert( cursorHoldsMutex(pCur) ); | ||
3291 | if( pCur->eState==CURSOR_VALID ){ | ||
3292 | return (const void*)fetchPayload(pCur, pAmt, 1); | ||
3293 | } | ||
3294 | return 0; | ||
3295 | } | ||
3296 | |||
3297 | |||
3298 | /* | ||
3299 | ** Move the cursor down to a new child page. The newPgno argument is the | ||
3300 | ** page number of the child page to move to. | ||
3301 | */ | ||
3302 | static int moveToChild(BtCursor *pCur, u32 newPgno){ | ||
3303 | int rc; | ||
3304 | MemPage *pNewPage; | ||
3305 | MemPage *pOldPage; | ||
3306 | BtShared *pBt = pCur->pBt; | ||
3307 | |||
3308 | assert( cursorHoldsMutex(pCur) ); | ||
3309 | assert( pCur->eState==CURSOR_VALID ); | ||
3310 | rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage); | ||
3311 | if( rc ) return rc; | ||
3312 | pNewPage->idxParent = pCur->idx; | ||
3313 | pOldPage = pCur->pPage; | ||
3314 | pOldPage->idxShift = 0; | ||
3315 | releasePage(pOldPage); | ||
3316 | pCur->pPage = pNewPage; | ||
3317 | pCur->idx = 0; | ||
3318 | pCur->info.nSize = 0; | ||
3319 | if( pNewPage->nCell<1 ){ | ||
3320 | return SQLITE_CORRUPT_BKPT; | ||
3321 | } | ||
3322 | return SQLITE_OK; | ||
3323 | } | ||
3324 | |||
3325 | /* | ||
3326 | ** Return true if the page is the virtual root of its table. | ||
3327 | ** | ||
3328 | ** The virtual root page is the root page for most tables. But | ||
3329 | ** for the table rooted on page 1, sometime the real root page | ||
3330 | ** is empty except for the right-pointer. In such cases the | ||
3331 | ** virtual root page is the page that the right-pointer of page | ||
3332 | ** 1 is pointing to. | ||
3333 | */ | ||
3334 | int sqlite3BtreeIsRootPage(MemPage *pPage){ | ||
3335 | MemPage *pParent; | ||
3336 | |||
3337 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
3338 | pParent = pPage->pParent; | ||
3339 | if( pParent==0 ) return 1; | ||
3340 | if( pParent->pgno>1 ) return 0; | ||
3341 | if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1; | ||
3342 | return 0; | ||
3343 | } | ||
3344 | |||
3345 | /* | ||
3346 | ** Move the cursor up to the parent page. | ||
3347 | ** | ||
3348 | ** pCur->idx is set to the cell index that contains the pointer | ||
3349 | ** to the page we are coming from. If we are coming from the | ||
3350 | ** right-most child page then pCur->idx is set to one more than | ||
3351 | ** the largest cell index. | ||
3352 | */ | ||
3353 | void sqlite3BtreeMoveToParent(BtCursor *pCur){ | ||
3354 | MemPage *pParent; | ||
3355 | MemPage *pPage; | ||
3356 | int idxParent; | ||
3357 | |||
3358 | assert( cursorHoldsMutex(pCur) ); | ||
3359 | assert( pCur->eState==CURSOR_VALID ); | ||
3360 | pPage = pCur->pPage; | ||
3361 | assert( pPage!=0 ); | ||
3362 | assert( !sqlite3BtreeIsRootPage(pPage) ); | ||
3363 | pParent = pPage->pParent; | ||
3364 | assert( pParent!=0 ); | ||
3365 | idxParent = pPage->idxParent; | ||
3366 | sqlite3PagerRef(pParent->pDbPage); | ||
3367 | releasePage(pPage); | ||
3368 | pCur->pPage = pParent; | ||
3369 | pCur->info.nSize = 0; | ||
3370 | assert( pParent->idxShift==0 ); | ||
3371 | pCur->idx = idxParent; | ||
3372 | } | ||
3373 | |||
3374 | /* | ||
3375 | ** Move the cursor to the root page | ||
3376 | */ | ||
3377 | static int moveToRoot(BtCursor *pCur){ | ||
3378 | MemPage *pRoot; | ||
3379 | int rc = SQLITE_OK; | ||
3380 | Btree *p = pCur->pBtree; | ||
3381 | BtShared *pBt = p->pBt; | ||
3382 | |||
3383 | assert( cursorHoldsMutex(pCur) ); | ||
3384 | assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); | ||
3385 | assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); | ||
3386 | assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); | ||
3387 | if( pCur->eState>=CURSOR_REQUIRESEEK ){ | ||
3388 | if( pCur->eState==CURSOR_FAULT ){ | ||
3389 | return pCur->skip; | ||
3390 | } | ||
3391 | clearCursorPosition(pCur); | ||
3392 | } | ||
3393 | pRoot = pCur->pPage; | ||
3394 | if( pRoot && pRoot->pgno==pCur->pgnoRoot ){ | ||
3395 | assert( pRoot->isInit ); | ||
3396 | }else{ | ||
3397 | if( | ||
3398 | SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0)) | ||
3399 | ){ | ||
3400 | pCur->eState = CURSOR_INVALID; | ||
3401 | return rc; | ||
3402 | } | ||
3403 | releasePage(pCur->pPage); | ||
3404 | pCur->pPage = pRoot; | ||
3405 | } | ||
3406 | pCur->idx = 0; | ||
3407 | pCur->info.nSize = 0; | ||
3408 | if( pRoot->nCell==0 && !pRoot->leaf ){ | ||
3409 | Pgno subpage; | ||
3410 | assert( pRoot->pgno==1 ); | ||
3411 | subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); | ||
3412 | assert( subpage>0 ); | ||
3413 | pCur->eState = CURSOR_VALID; | ||
3414 | rc = moveToChild(pCur, subpage); | ||
3415 | } | ||
3416 | pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID); | ||
3417 | return rc; | ||
3418 | } | ||
3419 | |||
3420 | /* | ||
3421 | ** Move the cursor down to the left-most leaf entry beneath the | ||
3422 | ** entry to which it is currently pointing. | ||
3423 | ** | ||
3424 | ** The left-most leaf is the one with the smallest key - the first | ||
3425 | ** in ascending order. | ||
3426 | */ | ||
3427 | static int moveToLeftmost(BtCursor *pCur){ | ||
3428 | Pgno pgno; | ||
3429 | int rc = SQLITE_OK; | ||
3430 | MemPage *pPage; | ||
3431 | |||
3432 | assert( cursorHoldsMutex(pCur) ); | ||
3433 | assert( pCur->eState==CURSOR_VALID ); | ||
3434 | while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){ | ||
3435 | assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); | ||
3436 | pgno = get4byte(findCell(pPage, pCur->idx)); | ||
3437 | rc = moveToChild(pCur, pgno); | ||
3438 | } | ||
3439 | return rc; | ||
3440 | } | ||
3441 | |||
3442 | /* | ||
3443 | ** Move the cursor down to the right-most leaf entry beneath the | ||
3444 | ** page to which it is currently pointing. Notice the difference | ||
3445 | ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() | ||
3446 | ** finds the left-most entry beneath the *entry* whereas moveToRightmost() | ||
3447 | ** finds the right-most entry beneath the *page*. | ||
3448 | ** | ||
3449 | ** The right-most entry is the one with the largest key - the last | ||
3450 | ** key in ascending order. | ||
3451 | */ | ||
3452 | static int moveToRightmost(BtCursor *pCur){ | ||
3453 | Pgno pgno; | ||
3454 | int rc = SQLITE_OK; | ||
3455 | MemPage *pPage; | ||
3456 | |||
3457 | assert( cursorHoldsMutex(pCur) ); | ||
3458 | assert( pCur->eState==CURSOR_VALID ); | ||
3459 | while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){ | ||
3460 | pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); | ||
3461 | pCur->idx = pPage->nCell; | ||
3462 | rc = moveToChild(pCur, pgno); | ||
3463 | } | ||
3464 | if( rc==SQLITE_OK ){ | ||
3465 | pCur->idx = pPage->nCell - 1; | ||
3466 | pCur->info.nSize = 0; | ||
3467 | } | ||
3468 | return SQLITE_OK; | ||
3469 | } | ||
3470 | |||
3471 | /* Move the cursor to the first entry in the table. Return SQLITE_OK | ||
3472 | ** on success. Set *pRes to 0 if the cursor actually points to something | ||
3473 | ** or set *pRes to 1 if the table is empty. | ||
3474 | */ | ||
3475 | int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ | ||
3476 | int rc; | ||
3477 | |||
3478 | assert( cursorHoldsMutex(pCur) ); | ||
3479 | assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) ); | ||
3480 | rc = moveToRoot(pCur); | ||
3481 | if( rc==SQLITE_OK ){ | ||
3482 | if( pCur->eState==CURSOR_INVALID ){ | ||
3483 | assert( pCur->pPage->nCell==0 ); | ||
3484 | *pRes = 1; | ||
3485 | rc = SQLITE_OK; | ||
3486 | }else{ | ||
3487 | assert( pCur->pPage->nCell>0 ); | ||
3488 | *pRes = 0; | ||
3489 | rc = moveToLeftmost(pCur); | ||
3490 | } | ||
3491 | } | ||
3492 | return rc; | ||
3493 | } | ||
3494 | |||
3495 | /* Move the cursor to the last entry in the table. Return SQLITE_OK | ||
3496 | ** on success. Set *pRes to 0 if the cursor actually points to something | ||
3497 | ** or set *pRes to 1 if the table is empty. | ||
3498 | */ | ||
3499 | int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ | ||
3500 | int rc; | ||
3501 | |||
3502 | assert( cursorHoldsMutex(pCur) ); | ||
3503 | assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) ); | ||
3504 | rc = moveToRoot(pCur); | ||
3505 | if( rc==SQLITE_OK ){ | ||
3506 | if( CURSOR_INVALID==pCur->eState ){ | ||
3507 | assert( pCur->pPage->nCell==0 ); | ||
3508 | *pRes = 1; | ||
3509 | }else{ | ||
3510 | assert( pCur->eState==CURSOR_VALID ); | ||
3511 | *pRes = 0; | ||
3512 | rc = moveToRightmost(pCur); | ||
3513 | } | ||
3514 | } | ||
3515 | return rc; | ||
3516 | } | ||
3517 | |||
3518 | /* Move the cursor so that it points to an entry near pKey/nKey. | ||
3519 | ** Return a success code. | ||
3520 | ** | ||
3521 | ** For INTKEY tables, only the nKey parameter is used. pKey is | ||
3522 | ** ignored. For other tables, nKey is the number of bytes of data | ||
3523 | ** in pKey. The comparison function specified when the cursor was | ||
3524 | ** created is used to compare keys. | ||
3525 | ** | ||
3526 | ** If an exact match is not found, then the cursor is always | ||
3527 | ** left pointing at a leaf page which would hold the entry if it | ||
3528 | ** were present. The cursor might point to an entry that comes | ||
3529 | ** before or after the key. | ||
3530 | ** | ||
3531 | ** The result of comparing the key with the entry to which the | ||
3532 | ** cursor is written to *pRes if pRes!=NULL. The meaning of | ||
3533 | ** this value is as follows: | ||
3534 | ** | ||
3535 | ** *pRes<0 The cursor is left pointing at an entry that | ||
3536 | ** is smaller than pKey or if the table is empty | ||
3537 | ** and the cursor is therefore left point to nothing. | ||
3538 | ** | ||
3539 | ** *pRes==0 The cursor is left pointing at an entry that | ||
3540 | ** exactly matches pKey. | ||
3541 | ** | ||
3542 | ** *pRes>0 The cursor is left pointing at an entry that | ||
3543 | ** is larger than pKey. | ||
3544 | ** | ||
3545 | */ | ||
3546 | int sqlite3BtreeMoveto( | ||
3547 | BtCursor *pCur, /* The cursor to be moved */ | ||
3548 | const void *pKey, /* The key content for indices. Not used by tables */ | ||
3549 | i64 nKey, /* Size of pKey. Or the key for tables */ | ||
3550 | int biasRight, /* If true, bias the search to the high end */ | ||
3551 | int *pRes /* Search result flag */ | ||
3552 | ){ | ||
3553 | int rc; | ||
3554 | |||
3555 | assert( cursorHoldsMutex(pCur) ); | ||
3556 | assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) ); | ||
3557 | rc = moveToRoot(pCur); | ||
3558 | if( rc ){ | ||
3559 | return rc; | ||
3560 | } | ||
3561 | assert( pCur->pPage ); | ||
3562 | assert( pCur->pPage->isInit ); | ||
3563 | if( pCur->eState==CURSOR_INVALID ){ | ||
3564 | *pRes = -1; | ||
3565 | assert( pCur->pPage->nCell==0 ); | ||
3566 | return SQLITE_OK; | ||
3567 | } | ||
3568 | for(;;){ | ||
3569 | int lwr, upr; | ||
3570 | Pgno chldPg; | ||
3571 | MemPage *pPage = pCur->pPage; | ||
3572 | int c = -1; /* pRes return if table is empty must be -1 */ | ||
3573 | lwr = 0; | ||
3574 | upr = pPage->nCell-1; | ||
3575 | if( !pPage->intKey && pKey==0 ){ | ||
3576 | return SQLITE_CORRUPT_BKPT; | ||
3577 | } | ||
3578 | if( biasRight ){ | ||
3579 | pCur->idx = upr; | ||
3580 | }else{ | ||
3581 | pCur->idx = (upr+lwr)/2; | ||
3582 | } | ||
3583 | if( lwr<=upr ) for(;;){ | ||
3584 | void *pCellKey; | ||
3585 | i64 nCellKey; | ||
3586 | pCur->info.nSize = 0; | ||
3587 | if( pPage->intKey ){ | ||
3588 | u8 *pCell; | ||
3589 | pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize; | ||
3590 | if( pPage->hasData ){ | ||
3591 | u32 dummy; | ||
3592 | pCell += getVarint32(pCell, &dummy); | ||
3593 | } | ||
3594 | getVarint(pCell, (u64 *)&nCellKey); | ||
3595 | if( nCellKey<nKey ){ | ||
3596 | c = -1; | ||
3597 | }else if( nCellKey>nKey ){ | ||
3598 | c = +1; | ||
3599 | }else{ | ||
3600 | c = 0; | ||
3601 | } | ||
3602 | }else{ | ||
3603 | int available; | ||
3604 | pCellKey = (void *)fetchPayload(pCur, &available, 0); | ||
3605 | nCellKey = pCur->info.nKey; | ||
3606 | if( available>=nCellKey ){ | ||
3607 | c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey); | ||
3608 | }else{ | ||
3609 | pCellKey = sqlite3_malloc( nCellKey ); | ||
3610 | if( pCellKey==0 ) return SQLITE_NOMEM; | ||
3611 | rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey); | ||
3612 | c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey); | ||
3613 | sqlite3_free(pCellKey); | ||
3614 | if( rc ){ | ||
3615 | return rc; | ||
3616 | } | ||
3617 | } | ||
3618 | } | ||
3619 | if( c==0 ){ | ||
3620 | if( pPage->leafData && !pPage->leaf ){ | ||
3621 | lwr = pCur->idx; | ||
3622 | upr = lwr - 1; | ||
3623 | break; | ||
3624 | }else{ | ||
3625 | if( pRes ) *pRes = 0; | ||
3626 | return SQLITE_OK; | ||
3627 | } | ||
3628 | } | ||
3629 | if( c<0 ){ | ||
3630 | lwr = pCur->idx+1; | ||
3631 | }else{ | ||
3632 | upr = pCur->idx-1; | ||
3633 | } | ||
3634 | if( lwr>upr ){ | ||
3635 | break; | ||
3636 | } | ||
3637 | pCur->idx = (lwr+upr)/2; | ||
3638 | } | ||
3639 | assert( lwr==upr+1 ); | ||
3640 | assert( pPage->isInit ); | ||
3641 | if( pPage->leaf ){ | ||
3642 | chldPg = 0; | ||
3643 | }else if( lwr>=pPage->nCell ){ | ||
3644 | chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); | ||
3645 | }else{ | ||
3646 | chldPg = get4byte(findCell(pPage, lwr)); | ||
3647 | } | ||
3648 | if( chldPg==0 ){ | ||
3649 | assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell ); | ||
3650 | if( pRes ) *pRes = c; | ||
3651 | return SQLITE_OK; | ||
3652 | } | ||
3653 | pCur->idx = lwr; | ||
3654 | pCur->info.nSize = 0; | ||
3655 | rc = moveToChild(pCur, chldPg); | ||
3656 | if( rc ){ | ||
3657 | return rc; | ||
3658 | } | ||
3659 | } | ||
3660 | /* NOT REACHED */ | ||
3661 | } | ||
3662 | |||
3663 | |||
3664 | /* | ||
3665 | ** Return TRUE if the cursor is not pointing at an entry of the table. | ||
3666 | ** | ||
3667 | ** TRUE will be returned after a call to sqlite3BtreeNext() moves | ||
3668 | ** past the last entry in the table or sqlite3BtreePrev() moves past | ||
3669 | ** the first entry. TRUE is also returned if the table is empty. | ||
3670 | */ | ||
3671 | int sqlite3BtreeEof(BtCursor *pCur){ | ||
3672 | /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries | ||
3673 | ** have been deleted? This API will need to change to return an error code | ||
3674 | ** as well as the boolean result value. | ||
3675 | */ | ||
3676 | return (CURSOR_VALID!=pCur->eState); | ||
3677 | } | ||
3678 | |||
3679 | /* | ||
3680 | ** Return the database connection handle for a cursor. | ||
3681 | */ | ||
3682 | sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){ | ||
3683 | assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) ); | ||
3684 | return pCur->pBtree->pSqlite; | ||
3685 | } | ||
3686 | |||
3687 | /* | ||
3688 | ** Advance the cursor to the next entry in the database. If | ||
3689 | ** successful then set *pRes=0. If the cursor | ||
3690 | ** was already pointing to the last entry in the database before | ||
3691 | ** this routine was called, then set *pRes=1. | ||
3692 | */ | ||
3693 | static int btreeNext(BtCursor *pCur, int *pRes){ | ||
3694 | int rc; | ||
3695 | MemPage *pPage; | ||
3696 | |||
3697 | assert( cursorHoldsMutex(pCur) ); | ||
3698 | rc = restoreOrClearCursorPosition(pCur); | ||
3699 | if( rc!=SQLITE_OK ){ | ||
3700 | return rc; | ||
3701 | } | ||
3702 | assert( pRes!=0 ); | ||
3703 | pPage = pCur->pPage; | ||
3704 | if( CURSOR_INVALID==pCur->eState ){ | ||
3705 | *pRes = 1; | ||
3706 | return SQLITE_OK; | ||
3707 | } | ||
3708 | if( pCur->skip>0 ){ | ||
3709 | pCur->skip = 0; | ||
3710 | *pRes = 0; | ||
3711 | return SQLITE_OK; | ||
3712 | } | ||
3713 | pCur->skip = 0; | ||
3714 | |||
3715 | assert( pPage->isInit ); | ||
3716 | assert( pCur->idx<pPage->nCell ); | ||
3717 | |||
3718 | pCur->idx++; | ||
3719 | pCur->info.nSize = 0; | ||
3720 | if( pCur->idx>=pPage->nCell ){ | ||
3721 | if( !pPage->leaf ){ | ||
3722 | rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); | ||
3723 | if( rc ) return rc; | ||
3724 | rc = moveToLeftmost(pCur); | ||
3725 | *pRes = 0; | ||
3726 | return rc; | ||
3727 | } | ||
3728 | do{ | ||
3729 | if( sqlite3BtreeIsRootPage(pPage) ){ | ||
3730 | *pRes = 1; | ||
3731 | pCur->eState = CURSOR_INVALID; | ||
3732 | return SQLITE_OK; | ||
3733 | } | ||
3734 | sqlite3BtreeMoveToParent(pCur); | ||
3735 | pPage = pCur->pPage; | ||
3736 | }while( pCur->idx>=pPage->nCell ); | ||
3737 | *pRes = 0; | ||
3738 | if( pPage->leafData ){ | ||
3739 | rc = sqlite3BtreeNext(pCur, pRes); | ||
3740 | }else{ | ||
3741 | rc = SQLITE_OK; | ||
3742 | } | ||
3743 | return rc; | ||
3744 | } | ||
3745 | *pRes = 0; | ||
3746 | if( pPage->leaf ){ | ||
3747 | return SQLITE_OK; | ||
3748 | } | ||
3749 | rc = moveToLeftmost(pCur); | ||
3750 | return rc; | ||
3751 | } | ||
3752 | int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ | ||
3753 | int rc; | ||
3754 | assert( cursorHoldsMutex(pCur) ); | ||
3755 | rc = btreeNext(pCur, pRes); | ||
3756 | return rc; | ||
3757 | } | ||
3758 | |||
3759 | |||
3760 | /* | ||
3761 | ** Step the cursor to the back to the previous entry in the database. If | ||
3762 | ** successful then set *pRes=0. If the cursor | ||
3763 | ** was already pointing to the first entry in the database before | ||
3764 | ** this routine was called, then set *pRes=1. | ||
3765 | */ | ||
3766 | static int btreePrevious(BtCursor *pCur, int *pRes){ | ||
3767 | int rc; | ||
3768 | Pgno pgno; | ||
3769 | MemPage *pPage; | ||
3770 | |||
3771 | assert( cursorHoldsMutex(pCur) ); | ||
3772 | rc = restoreOrClearCursorPosition(pCur); | ||
3773 | if( rc!=SQLITE_OK ){ | ||
3774 | return rc; | ||
3775 | } | ||
3776 | if( CURSOR_INVALID==pCur->eState ){ | ||
3777 | *pRes = 1; | ||
3778 | return SQLITE_OK; | ||
3779 | } | ||
3780 | if( pCur->skip<0 ){ | ||
3781 | pCur->skip = 0; | ||
3782 | *pRes = 0; | ||
3783 | return SQLITE_OK; | ||
3784 | } | ||
3785 | pCur->skip = 0; | ||
3786 | |||
3787 | pPage = pCur->pPage; | ||
3788 | assert( pPage->isInit ); | ||
3789 | assert( pCur->idx>=0 ); | ||
3790 | if( !pPage->leaf ){ | ||
3791 | pgno = get4byte( findCell(pPage, pCur->idx) ); | ||
3792 | rc = moveToChild(pCur, pgno); | ||
3793 | if( rc ){ | ||
3794 | return rc; | ||
3795 | } | ||
3796 | rc = moveToRightmost(pCur); | ||
3797 | }else{ | ||
3798 | while( pCur->idx==0 ){ | ||
3799 | if( sqlite3BtreeIsRootPage(pPage) ){ | ||
3800 | pCur->eState = CURSOR_INVALID; | ||
3801 | *pRes = 1; | ||
3802 | return SQLITE_OK; | ||
3803 | } | ||
3804 | sqlite3BtreeMoveToParent(pCur); | ||
3805 | pPage = pCur->pPage; | ||
3806 | } | ||
3807 | pCur->idx--; | ||
3808 | pCur->info.nSize = 0; | ||
3809 | if( pPage->leafData && !pPage->leaf ){ | ||
3810 | rc = sqlite3BtreePrevious(pCur, pRes); | ||
3811 | }else{ | ||
3812 | rc = SQLITE_OK; | ||
3813 | } | ||
3814 | } | ||
3815 | *pRes = 0; | ||
3816 | return rc; | ||
3817 | } | ||
3818 | int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | ||
3819 | int rc; | ||
3820 | assert( cursorHoldsMutex(pCur) ); | ||
3821 | rc = btreePrevious(pCur, pRes); | ||
3822 | return rc; | ||
3823 | } | ||
3824 | |||
3825 | /* | ||
3826 | ** Allocate a new page from the database file. | ||
3827 | ** | ||
3828 | ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() | ||
3829 | ** has already been called on the new page.) The new page has also | ||
3830 | ** been referenced and the calling routine is responsible for calling | ||
3831 | ** sqlite3PagerUnref() on the new page when it is done. | ||
3832 | ** | ||
3833 | ** SQLITE_OK is returned on success. Any other return value indicates | ||
3834 | ** an error. *ppPage and *pPgno are undefined in the event of an error. | ||
3835 | ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned. | ||
3836 | ** | ||
3837 | ** If the "nearby" parameter is not 0, then a (feeble) effort is made to | ||
3838 | ** locate a page close to the page number "nearby". This can be used in an | ||
3839 | ** attempt to keep related pages close to each other in the database file, | ||
3840 | ** which in turn can make database access faster. | ||
3841 | ** | ||
3842 | ** If the "exact" parameter is not 0, and the page-number nearby exists | ||
3843 | ** anywhere on the free-list, then it is guarenteed to be returned. This | ||
3844 | ** is only used by auto-vacuum databases when allocating a new table. | ||
3845 | */ | ||
3846 | static int allocateBtreePage( | ||
3847 | BtShared *pBt, | ||
3848 | MemPage **ppPage, | ||
3849 | Pgno *pPgno, | ||
3850 | Pgno nearby, | ||
3851 | u8 exact | ||
3852 | ){ | ||
3853 | MemPage *pPage1; | ||
3854 | int rc; | ||
3855 | int n; /* Number of pages on the freelist */ | ||
3856 | int k; /* Number of leaves on the trunk of the freelist */ | ||
3857 | MemPage *pTrunk = 0; | ||
3858 | MemPage *pPrevTrunk = 0; | ||
3859 | |||
3860 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
3861 | pPage1 = pBt->pPage1; | ||
3862 | n = get4byte(&pPage1->aData[36]); | ||
3863 | if( n>0 ){ | ||
3864 | /* There are pages on the freelist. Reuse one of those pages. */ | ||
3865 | Pgno iTrunk; | ||
3866 | u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ | ||
3867 | |||
3868 | /* If the 'exact' parameter was true and a query of the pointer-map | ||
3869 | ** shows that the page 'nearby' is somewhere on the free-list, then | ||
3870 | ** the entire-list will be searched for that page. | ||
3871 | */ | ||
3872 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
3873 | if( exact && nearby<=sqlite3PagerPagecount(pBt->pPager) ){ | ||
3874 | u8 eType; | ||
3875 | assert( nearby>0 ); | ||
3876 | assert( pBt->autoVacuum ); | ||
3877 | rc = ptrmapGet(pBt, nearby, &eType, 0); | ||
3878 | if( rc ) return rc; | ||
3879 | if( eType==PTRMAP_FREEPAGE ){ | ||
3880 | searchList = 1; | ||
3881 | } | ||
3882 | *pPgno = nearby; | ||
3883 | } | ||
3884 | #endif | ||
3885 | |||
3886 | /* Decrement the free-list count by 1. Set iTrunk to the index of the | ||
3887 | ** first free-list trunk page. iPrevTrunk is initially 1. | ||
3888 | */ | ||
3889 | rc = sqlite3PagerWrite(pPage1->pDbPage); | ||
3890 | if( rc ) return rc; | ||
3891 | put4byte(&pPage1->aData[36], n-1); | ||
3892 | |||
3893 | /* The code within this loop is run only once if the 'searchList' variable | ||
3894 | ** is not true. Otherwise, it runs once for each trunk-page on the | ||
3895 | ** free-list until the page 'nearby' is located. | ||
3896 | */ | ||
3897 | do { | ||
3898 | pPrevTrunk = pTrunk; | ||
3899 | if( pPrevTrunk ){ | ||
3900 | iTrunk = get4byte(&pPrevTrunk->aData[0]); | ||
3901 | }else{ | ||
3902 | iTrunk = get4byte(&pPage1->aData[32]); | ||
3903 | } | ||
3904 | rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0); | ||
3905 | if( rc ){ | ||
3906 | pTrunk = 0; | ||
3907 | goto end_allocate_page; | ||
3908 | } | ||
3909 | |||
3910 | k = get4byte(&pTrunk->aData[4]); | ||
3911 | if( k==0 && !searchList ){ | ||
3912 | /* The trunk has no leaves and the list is not being searched. | ||
3913 | ** So extract the trunk page itself and use it as the newly | ||
3914 | ** allocated page */ | ||
3915 | assert( pPrevTrunk==0 ); | ||
3916 | rc = sqlite3PagerWrite(pTrunk->pDbPage); | ||
3917 | if( rc ){ | ||
3918 | goto end_allocate_page; | ||
3919 | } | ||
3920 | *pPgno = iTrunk; | ||
3921 | memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); | ||
3922 | *ppPage = pTrunk; | ||
3923 | pTrunk = 0; | ||
3924 | TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); | ||
3925 | }else if( k>pBt->usableSize/4 - 8 ){ | ||
3926 | /* Value of k is out of range. Database corruption */ | ||
3927 | rc = SQLITE_CORRUPT_BKPT; | ||
3928 | goto end_allocate_page; | ||
3929 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
3930 | }else if( searchList && nearby==iTrunk ){ | ||
3931 | /* The list is being searched and this trunk page is the page | ||
3932 | ** to allocate, regardless of whether it has leaves. | ||
3933 | */ | ||
3934 | assert( *pPgno==iTrunk ); | ||
3935 | *ppPage = pTrunk; | ||
3936 | searchList = 0; | ||
3937 | rc = sqlite3PagerWrite(pTrunk->pDbPage); | ||
3938 | if( rc ){ | ||
3939 | goto end_allocate_page; | ||
3940 | } | ||
3941 | if( k==0 ){ | ||
3942 | if( !pPrevTrunk ){ | ||
3943 | memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); | ||
3944 | }else{ | ||
3945 | memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); | ||
3946 | } | ||
3947 | }else{ | ||
3948 | /* The trunk page is required by the caller but it contains | ||
3949 | ** pointers to free-list leaves. The first leaf becomes a trunk | ||
3950 | ** page in this case. | ||
3951 | */ | ||
3952 | MemPage *pNewTrunk; | ||
3953 | Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); | ||
3954 | rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); | ||
3955 | if( rc!=SQLITE_OK ){ | ||
3956 | goto end_allocate_page; | ||
3957 | } | ||
3958 | rc = sqlite3PagerWrite(pNewTrunk->pDbPage); | ||
3959 | if( rc!=SQLITE_OK ){ | ||
3960 | releasePage(pNewTrunk); | ||
3961 | goto end_allocate_page; | ||
3962 | } | ||
3963 | memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); | ||
3964 | put4byte(&pNewTrunk->aData[4], k-1); | ||
3965 | memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); | ||
3966 | releasePage(pNewTrunk); | ||
3967 | if( !pPrevTrunk ){ | ||
3968 | put4byte(&pPage1->aData[32], iNewTrunk); | ||
3969 | }else{ | ||
3970 | rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); | ||
3971 | if( rc ){ | ||
3972 | goto end_allocate_page; | ||
3973 | } | ||
3974 | put4byte(&pPrevTrunk->aData[0], iNewTrunk); | ||
3975 | } | ||
3976 | } | ||
3977 | pTrunk = 0; | ||
3978 | TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); | ||
3979 | #endif | ||
3980 | }else{ | ||
3981 | /* Extract a leaf from the trunk */ | ||
3982 | int closest; | ||
3983 | Pgno iPage; | ||
3984 | unsigned char *aData = pTrunk->aData; | ||
3985 | rc = sqlite3PagerWrite(pTrunk->pDbPage); | ||
3986 | if( rc ){ | ||
3987 | goto end_allocate_page; | ||
3988 | } | ||
3989 | if( nearby>0 ){ | ||
3990 | int i, dist; | ||
3991 | closest = 0; | ||
3992 | dist = get4byte(&aData[8]) - nearby; | ||
3993 | if( dist<0 ) dist = -dist; | ||
3994 | for(i=1; i<k; i++){ | ||
3995 | int d2 = get4byte(&aData[8+i*4]) - nearby; | ||
3996 | if( d2<0 ) d2 = -d2; | ||
3997 | if( d2<dist ){ | ||
3998 | closest = i; | ||
3999 | dist = d2; | ||
4000 | } | ||
4001 | } | ||
4002 | }else{ | ||
4003 | closest = 0; | ||
4004 | } | ||
4005 | |||
4006 | iPage = get4byte(&aData[8+closest*4]); | ||
4007 | if( !searchList || iPage==nearby ){ | ||
4008 | *pPgno = iPage; | ||
4009 | if( *pPgno>sqlite3PagerPagecount(pBt->pPager) ){ | ||
4010 | /* Free page off the end of the file */ | ||
4011 | return SQLITE_CORRUPT_BKPT; | ||
4012 | } | ||
4013 | TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" | ||
4014 | ": %d more free pages\n", | ||
4015 | *pPgno, closest+1, k, pTrunk->pgno, n-1)); | ||
4016 | if( closest<k-1 ){ | ||
4017 | memcpy(&aData[8+closest*4], &aData[4+k*4], 4); | ||
4018 | } | ||
4019 | put4byte(&aData[4], k-1); | ||
4020 | rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1); | ||
4021 | if( rc==SQLITE_OK ){ | ||
4022 | sqlite3PagerDontRollback((*ppPage)->pDbPage); | ||
4023 | rc = sqlite3PagerWrite((*ppPage)->pDbPage); | ||
4024 | if( rc!=SQLITE_OK ){ | ||
4025 | releasePage(*ppPage); | ||
4026 | } | ||
4027 | } | ||
4028 | searchList = 0; | ||
4029 | } | ||
4030 | } | ||
4031 | releasePage(pPrevTrunk); | ||
4032 | pPrevTrunk = 0; | ||
4033 | }while( searchList ); | ||
4034 | }else{ | ||
4035 | /* There are no pages on the freelist, so create a new page at the | ||
4036 | ** end of the file */ | ||
4037 | *pPgno = sqlite3PagerPagecount(pBt->pPager) + 1; | ||
4038 | |||
4039 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4040 | if( pBt->nTrunc ){ | ||
4041 | /* An incr-vacuum has already run within this transaction. So the | ||
4042 | ** page to allocate is not from the physical end of the file, but | ||
4043 | ** at pBt->nTrunc. | ||
4044 | */ | ||
4045 | *pPgno = pBt->nTrunc+1; | ||
4046 | if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ | ||
4047 | (*pPgno)++; | ||
4048 | } | ||
4049 | } | ||
4050 | if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){ | ||
4051 | /* If *pPgno refers to a pointer-map page, allocate two new pages | ||
4052 | ** at the end of the file instead of one. The first allocated page | ||
4053 | ** becomes a new pointer-map page, the second is used by the caller. | ||
4054 | */ | ||
4055 | TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno)); | ||
4056 | assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); | ||
4057 | (*pPgno)++; | ||
4058 | } | ||
4059 | if( pBt->nTrunc ){ | ||
4060 | pBt->nTrunc = *pPgno; | ||
4061 | } | ||
4062 | #endif | ||
4063 | |||
4064 | assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); | ||
4065 | rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0); | ||
4066 | if( rc ) return rc; | ||
4067 | rc = sqlite3PagerWrite((*ppPage)->pDbPage); | ||
4068 | if( rc!=SQLITE_OK ){ | ||
4069 | releasePage(*ppPage); | ||
4070 | } | ||
4071 | TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); | ||
4072 | } | ||
4073 | |||
4074 | assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); | ||
4075 | |||
4076 | end_allocate_page: | ||
4077 | releasePage(pTrunk); | ||
4078 | releasePage(pPrevTrunk); | ||
4079 | return rc; | ||
4080 | } | ||
4081 | |||
4082 | /* | ||
4083 | ** Add a page of the database file to the freelist. | ||
4084 | ** | ||
4085 | ** sqlite3PagerUnref() is NOT called for pPage. | ||
4086 | */ | ||
4087 | static int freePage(MemPage *pPage){ | ||
4088 | BtShared *pBt = pPage->pBt; | ||
4089 | MemPage *pPage1 = pBt->pPage1; | ||
4090 | int rc, n, k; | ||
4091 | |||
4092 | /* Prepare the page for freeing */ | ||
4093 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4094 | assert( pPage->pgno>1 ); | ||
4095 | pPage->isInit = 0; | ||
4096 | releasePage(pPage->pParent); | ||
4097 | pPage->pParent = 0; | ||
4098 | |||
4099 | /* Increment the free page count on pPage1 */ | ||
4100 | rc = sqlite3PagerWrite(pPage1->pDbPage); | ||
4101 | if( rc ) return rc; | ||
4102 | n = get4byte(&pPage1->aData[36]); | ||
4103 | put4byte(&pPage1->aData[36], n+1); | ||
4104 | |||
4105 | #ifdef SQLITE_SECURE_DELETE | ||
4106 | /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then | ||
4107 | ** always fully overwrite deleted information with zeros. | ||
4108 | */ | ||
4109 | rc = sqlite3PagerWrite(pPage->pDbPage); | ||
4110 | if( rc ) return rc; | ||
4111 | memset(pPage->aData, 0, pPage->pBt->pageSize); | ||
4112 | #endif | ||
4113 | |||
4114 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4115 | /* If the database supports auto-vacuum, write an entry in the pointer-map | ||
4116 | ** to indicate that the page is free. | ||
4117 | */ | ||
4118 | if( pBt->autoVacuum ){ | ||
4119 | rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0); | ||
4120 | if( rc ) return rc; | ||
4121 | } | ||
4122 | #endif | ||
4123 | |||
4124 | if( n==0 ){ | ||
4125 | /* This is the first free page */ | ||
4126 | rc = sqlite3PagerWrite(pPage->pDbPage); | ||
4127 | if( rc ) return rc; | ||
4128 | memset(pPage->aData, 0, 8); | ||
4129 | put4byte(&pPage1->aData[32], pPage->pgno); | ||
4130 | TRACE(("FREE-PAGE: %d first\n", pPage->pgno)); | ||
4131 | }else{ | ||
4132 | /* Other free pages already exist. Retrive the first trunk page | ||
4133 | ** of the freelist and find out how many leaves it has. */ | ||
4134 | MemPage *pTrunk; | ||
4135 | rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0); | ||
4136 | if( rc ) return rc; | ||
4137 | k = get4byte(&pTrunk->aData[4]); | ||
4138 | if( k>=pBt->usableSize/4 - 8 ){ | ||
4139 | /* The trunk is full. Turn the page being freed into a new | ||
4140 | ** trunk page with no leaves. */ | ||
4141 | rc = sqlite3PagerWrite(pPage->pDbPage); | ||
4142 | if( rc==SQLITE_OK ){ | ||
4143 | put4byte(pPage->aData, pTrunk->pgno); | ||
4144 | put4byte(&pPage->aData[4], 0); | ||
4145 | put4byte(&pPage1->aData[32], pPage->pgno); | ||
4146 | TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", | ||
4147 | pPage->pgno, pTrunk->pgno)); | ||
4148 | } | ||
4149 | }else if( k<0 ){ | ||
4150 | rc = SQLITE_CORRUPT; | ||
4151 | }else{ | ||
4152 | /* Add the newly freed page as a leaf on the current trunk */ | ||
4153 | rc = sqlite3PagerWrite(pTrunk->pDbPage); | ||
4154 | if( rc==SQLITE_OK ){ | ||
4155 | put4byte(&pTrunk->aData[4], k+1); | ||
4156 | put4byte(&pTrunk->aData[8+k*4], pPage->pgno); | ||
4157 | #ifndef SQLITE_SECURE_DELETE | ||
4158 | sqlite3PagerDontWrite(pPage->pDbPage); | ||
4159 | #endif | ||
4160 | } | ||
4161 | TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); | ||
4162 | } | ||
4163 | releasePage(pTrunk); | ||
4164 | } | ||
4165 | return rc; | ||
4166 | } | ||
4167 | |||
4168 | /* | ||
4169 | ** Free any overflow pages associated with the given Cell. | ||
4170 | */ | ||
4171 | static int clearCell(MemPage *pPage, unsigned char *pCell){ | ||
4172 | BtShared *pBt = pPage->pBt; | ||
4173 | CellInfo info; | ||
4174 | Pgno ovflPgno; | ||
4175 | int rc; | ||
4176 | int nOvfl; | ||
4177 | int ovflPageSize; | ||
4178 | |||
4179 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4180 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
4181 | if( info.iOverflow==0 ){ | ||
4182 | return SQLITE_OK; /* No overflow pages. Return without doing anything */ | ||
4183 | } | ||
4184 | ovflPgno = get4byte(&pCell[info.iOverflow]); | ||
4185 | ovflPageSize = pBt->usableSize - 4; | ||
4186 | nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; | ||
4187 | assert( ovflPgno==0 || nOvfl>0 ); | ||
4188 | while( nOvfl-- ){ | ||
4189 | MemPage *pOvfl; | ||
4190 | if( ovflPgno==0 || ovflPgno>sqlite3PagerPagecount(pBt->pPager) ){ | ||
4191 | return SQLITE_CORRUPT_BKPT; | ||
4192 | } | ||
4193 | |||
4194 | rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno); | ||
4195 | if( rc ) return rc; | ||
4196 | rc = freePage(pOvfl); | ||
4197 | sqlite3PagerUnref(pOvfl->pDbPage); | ||
4198 | if( rc ) return rc; | ||
4199 | } | ||
4200 | return SQLITE_OK; | ||
4201 | } | ||
4202 | |||
4203 | /* | ||
4204 | ** Create the byte sequence used to represent a cell on page pPage | ||
4205 | ** and write that byte sequence into pCell[]. Overflow pages are | ||
4206 | ** allocated and filled in as necessary. The calling procedure | ||
4207 | ** is responsible for making sure sufficient space has been allocated | ||
4208 | ** for pCell[]. | ||
4209 | ** | ||
4210 | ** Note that pCell does not necessary need to point to the pPage->aData | ||
4211 | ** area. pCell might point to some temporary storage. The cell will | ||
4212 | ** be constructed in this temporary area then copied into pPage->aData | ||
4213 | ** later. | ||
4214 | */ | ||
4215 | static int fillInCell( | ||
4216 | MemPage *pPage, /* The page that contains the cell */ | ||
4217 | unsigned char *pCell, /* Complete text of the cell */ | ||
4218 | const void *pKey, i64 nKey, /* The key */ | ||
4219 | const void *pData,int nData, /* The data */ | ||
4220 | int nZero, /* Extra zero bytes to append to pData */ | ||
4221 | int *pnSize /* Write cell size here */ | ||
4222 | ){ | ||
4223 | int nPayload; | ||
4224 | const u8 *pSrc; | ||
4225 | int nSrc, n, rc; | ||
4226 | int spaceLeft; | ||
4227 | MemPage *pOvfl = 0; | ||
4228 | MemPage *pToRelease = 0; | ||
4229 | unsigned char *pPrior; | ||
4230 | unsigned char *pPayload; | ||
4231 | BtShared *pBt = pPage->pBt; | ||
4232 | Pgno pgnoOvfl = 0; | ||
4233 | int nHeader; | ||
4234 | CellInfo info; | ||
4235 | |||
4236 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4237 | |||
4238 | /* Fill in the header. */ | ||
4239 | nHeader = 0; | ||
4240 | if( !pPage->leaf ){ | ||
4241 | nHeader += 4; | ||
4242 | } | ||
4243 | if( pPage->hasData ){ | ||
4244 | nHeader += putVarint(&pCell[nHeader], nData+nZero); | ||
4245 | }else{ | ||
4246 | nData = nZero = 0; | ||
4247 | } | ||
4248 | nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); | ||
4249 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
4250 | assert( info.nHeader==nHeader ); | ||
4251 | assert( info.nKey==nKey ); | ||
4252 | assert( info.nData==nData+nZero ); | ||
4253 | |||
4254 | /* Fill in the payload */ | ||
4255 | nPayload = nData + nZero; | ||
4256 | if( pPage->intKey ){ | ||
4257 | pSrc = pData; | ||
4258 | nSrc = nData; | ||
4259 | nData = 0; | ||
4260 | }else{ | ||
4261 | nPayload += nKey; | ||
4262 | pSrc = pKey; | ||
4263 | nSrc = nKey; | ||
4264 | } | ||
4265 | *pnSize = info.nSize; | ||
4266 | spaceLeft = info.nLocal; | ||
4267 | pPayload = &pCell[nHeader]; | ||
4268 | pPrior = &pCell[info.iOverflow]; | ||
4269 | |||
4270 | while( nPayload>0 ){ | ||
4271 | if( spaceLeft==0 ){ | ||
4272 | int isExact = 0; | ||
4273 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4274 | Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ | ||
4275 | if( pBt->autoVacuum ){ | ||
4276 | do{ | ||
4277 | pgnoOvfl++; | ||
4278 | } while( | ||
4279 | PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) | ||
4280 | ); | ||
4281 | if( pgnoOvfl>1 ){ | ||
4282 | /* isExact = 1; */ | ||
4283 | } | ||
4284 | } | ||
4285 | #endif | ||
4286 | rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact); | ||
4287 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4288 | /* If the database supports auto-vacuum, and the second or subsequent | ||
4289 | ** overflow page is being allocated, add an entry to the pointer-map | ||
4290 | ** for that page now. | ||
4291 | ** | ||
4292 | ** If this is the first overflow page, then write a partial entry | ||
4293 | ** to the pointer-map. If we write nothing to this pointer-map slot, | ||
4294 | ** then the optimistic overflow chain processing in clearCell() | ||
4295 | ** may misinterpret the uninitialised values and delete the | ||
4296 | ** wrong pages from the database. | ||
4297 | */ | ||
4298 | if( pBt->autoVacuum && rc==SQLITE_OK ){ | ||
4299 | u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); | ||
4300 | rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap); | ||
4301 | if( rc ){ | ||
4302 | releasePage(pOvfl); | ||
4303 | } | ||
4304 | } | ||
4305 | #endif | ||
4306 | if( rc ){ | ||
4307 | releasePage(pToRelease); | ||
4308 | return rc; | ||
4309 | } | ||
4310 | put4byte(pPrior, pgnoOvfl); | ||
4311 | releasePage(pToRelease); | ||
4312 | pToRelease = pOvfl; | ||
4313 | pPrior = pOvfl->aData; | ||
4314 | put4byte(pPrior, 0); | ||
4315 | pPayload = &pOvfl->aData[4]; | ||
4316 | spaceLeft = pBt->usableSize - 4; | ||
4317 | } | ||
4318 | n = nPayload; | ||
4319 | if( n>spaceLeft ) n = spaceLeft; | ||
4320 | if( nSrc>0 ){ | ||
4321 | if( n>nSrc ) n = nSrc; | ||
4322 | assert( pSrc ); | ||
4323 | memcpy(pPayload, pSrc, n); | ||
4324 | }else{ | ||
4325 | memset(pPayload, 0, n); | ||
4326 | } | ||
4327 | nPayload -= n; | ||
4328 | pPayload += n; | ||
4329 | pSrc += n; | ||
4330 | nSrc -= n; | ||
4331 | spaceLeft -= n; | ||
4332 | if( nSrc==0 ){ | ||
4333 | nSrc = nData; | ||
4334 | pSrc = pData; | ||
4335 | } | ||
4336 | } | ||
4337 | releasePage(pToRelease); | ||
4338 | return SQLITE_OK; | ||
4339 | } | ||
4340 | |||
4341 | /* | ||
4342 | ** Change the MemPage.pParent pointer on the page whose number is | ||
4343 | ** given in the second argument so that MemPage.pParent holds the | ||
4344 | ** pointer in the third argument. | ||
4345 | */ | ||
4346 | static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){ | ||
4347 | MemPage *pThis; | ||
4348 | DbPage *pDbPage; | ||
4349 | |||
4350 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
4351 | assert( pNewParent!=0 ); | ||
4352 | if( pgno==0 ) return SQLITE_OK; | ||
4353 | assert( pBt->pPager!=0 ); | ||
4354 | pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); | ||
4355 | if( pDbPage ){ | ||
4356 | pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage); | ||
4357 | if( pThis->isInit ){ | ||
4358 | assert( pThis->aData==sqlite3PagerGetData(pDbPage) ); | ||
4359 | if( pThis->pParent!=pNewParent ){ | ||
4360 | if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage); | ||
4361 | pThis->pParent = pNewParent; | ||
4362 | sqlite3PagerRef(pNewParent->pDbPage); | ||
4363 | } | ||
4364 | pThis->idxParent = idx; | ||
4365 | } | ||
4366 | sqlite3PagerUnref(pDbPage); | ||
4367 | } | ||
4368 | |||
4369 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4370 | if( pBt->autoVacuum ){ | ||
4371 | return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno); | ||
4372 | } | ||
4373 | #endif | ||
4374 | return SQLITE_OK; | ||
4375 | } | ||
4376 | |||
4377 | |||
4378 | |||
4379 | /* | ||
4380 | ** Change the pParent pointer of all children of pPage to point back | ||
4381 | ** to pPage. | ||
4382 | ** | ||
4383 | ** In other words, for every child of pPage, invoke reparentPage() | ||
4384 | ** to make sure that each child knows that pPage is its parent. | ||
4385 | ** | ||
4386 | ** This routine gets called after you memcpy() one page into | ||
4387 | ** another. | ||
4388 | */ | ||
4389 | static int reparentChildPages(MemPage *pPage){ | ||
4390 | int i; | ||
4391 | BtShared *pBt = pPage->pBt; | ||
4392 | int rc = SQLITE_OK; | ||
4393 | |||
4394 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4395 | if( pPage->leaf ) return SQLITE_OK; | ||
4396 | |||
4397 | for(i=0; i<pPage->nCell; i++){ | ||
4398 | u8 *pCell = findCell(pPage, i); | ||
4399 | if( !pPage->leaf ){ | ||
4400 | rc = reparentPage(pBt, get4byte(pCell), pPage, i); | ||
4401 | if( rc!=SQLITE_OK ) return rc; | ||
4402 | } | ||
4403 | } | ||
4404 | if( !pPage->leaf ){ | ||
4405 | rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]), | ||
4406 | pPage, i); | ||
4407 | pPage->idxShift = 0; | ||
4408 | } | ||
4409 | return rc; | ||
4410 | } | ||
4411 | |||
4412 | /* | ||
4413 | ** Remove the i-th cell from pPage. This routine effects pPage only. | ||
4414 | ** The cell content is not freed or deallocated. It is assumed that | ||
4415 | ** the cell content has been copied someplace else. This routine just | ||
4416 | ** removes the reference to the cell from pPage. | ||
4417 | ** | ||
4418 | ** "sz" must be the number of bytes in the cell. | ||
4419 | */ | ||
4420 | static void dropCell(MemPage *pPage, int idx, int sz){ | ||
4421 | int i; /* Loop counter */ | ||
4422 | int pc; /* Offset to cell content of cell being deleted */ | ||
4423 | u8 *data; /* pPage->aData */ | ||
4424 | u8 *ptr; /* Used to move bytes around within data[] */ | ||
4425 | |||
4426 | assert( idx>=0 && idx<pPage->nCell ); | ||
4427 | assert( sz==cellSize(pPage, idx) ); | ||
4428 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
4429 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4430 | data = pPage->aData; | ||
4431 | ptr = &data[pPage->cellOffset + 2*idx]; | ||
4432 | pc = get2byte(ptr); | ||
4433 | assert( pc>10 && pc+sz<=pPage->pBt->usableSize ); | ||
4434 | freeSpace(pPage, pc, sz); | ||
4435 | for(i=idx+1; i<pPage->nCell; i++, ptr+=2){ | ||
4436 | ptr[0] = ptr[2]; | ||
4437 | ptr[1] = ptr[3]; | ||
4438 | } | ||
4439 | pPage->nCell--; | ||
4440 | put2byte(&data[pPage->hdrOffset+3], pPage->nCell); | ||
4441 | pPage->nFree += 2; | ||
4442 | pPage->idxShift = 1; | ||
4443 | } | ||
4444 | |||
4445 | /* | ||
4446 | ** Insert a new cell on pPage at cell index "i". pCell points to the | ||
4447 | ** content of the cell. | ||
4448 | ** | ||
4449 | ** If the cell content will fit on the page, then put it there. If it | ||
4450 | ** will not fit, then make a copy of the cell content into pTemp if | ||
4451 | ** pTemp is not null. Regardless of pTemp, allocate a new entry | ||
4452 | ** in pPage->aOvfl[] and make it point to the cell content (either | ||
4453 | ** in pTemp or the original pCell) and also record its index. | ||
4454 | ** Allocating a new entry in pPage->aCell[] implies that | ||
4455 | ** pPage->nOverflow is incremented. | ||
4456 | ** | ||
4457 | ** If nSkip is non-zero, then do not copy the first nSkip bytes of the | ||
4458 | ** cell. The caller will overwrite them after this function returns. If | ||
4459 | ** nSkip is non-zero, then pCell may not point to an invalid memory location | ||
4460 | ** (but pCell+nSkip is always valid). | ||
4461 | */ | ||
4462 | static int insertCell( | ||
4463 | MemPage *pPage, /* Page into which we are copying */ | ||
4464 | int i, /* New cell becomes the i-th cell of the page */ | ||
4465 | u8 *pCell, /* Content of the new cell */ | ||
4466 | int sz, /* Bytes of content in pCell */ | ||
4467 | u8 *pTemp, /* Temp storage space for pCell, if needed */ | ||
4468 | u8 nSkip /* Do not write the first nSkip bytes of the cell */ | ||
4469 | ){ | ||
4470 | int idx; /* Where to write new cell content in data[] */ | ||
4471 | int j; /* Loop counter */ | ||
4472 | int top; /* First byte of content for any cell in data[] */ | ||
4473 | int end; /* First byte past the last cell pointer in data[] */ | ||
4474 | int ins; /* Index in data[] where new cell pointer is inserted */ | ||
4475 | int hdr; /* Offset into data[] of the page header */ | ||
4476 | int cellOffset; /* Address of first cell pointer in data[] */ | ||
4477 | u8 *data; /* The content of the whole page */ | ||
4478 | u8 *ptr; /* Used for moving information around in data[] */ | ||
4479 | |||
4480 | assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); | ||
4481 | assert( sz==cellSizePtr(pPage, pCell) ); | ||
4482 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4483 | if( pPage->nOverflow || sz+2>pPage->nFree ){ | ||
4484 | if( pTemp ){ | ||
4485 | memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip); | ||
4486 | pCell = pTemp; | ||
4487 | } | ||
4488 | j = pPage->nOverflow++; | ||
4489 | assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) ); | ||
4490 | pPage->aOvfl[j].pCell = pCell; | ||
4491 | pPage->aOvfl[j].idx = i; | ||
4492 | pPage->nFree = 0; | ||
4493 | }else{ | ||
4494 | int rc = sqlite3PagerWrite(pPage->pDbPage); | ||
4495 | if( rc!=SQLITE_OK ){ | ||
4496 | return rc; | ||
4497 | } | ||
4498 | assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | ||
4499 | data = pPage->aData; | ||
4500 | hdr = pPage->hdrOffset; | ||
4501 | top = get2byte(&data[hdr+5]); | ||
4502 | cellOffset = pPage->cellOffset; | ||
4503 | end = cellOffset + 2*pPage->nCell + 2; | ||
4504 | ins = cellOffset + 2*i; | ||
4505 | if( end > top - sz ){ | ||
4506 | rc = defragmentPage(pPage); | ||
4507 | if( rc!=SQLITE_OK ) return rc; | ||
4508 | top = get2byte(&data[hdr+5]); | ||
4509 | assert( end + sz <= top ); | ||
4510 | } | ||
4511 | idx = allocateSpace(pPage, sz); | ||
4512 | assert( idx>0 ); | ||
4513 | assert( end <= get2byte(&data[hdr+5]) ); | ||
4514 | pPage->nCell++; | ||
4515 | pPage->nFree -= 2; | ||
4516 | memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip); | ||
4517 | for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){ | ||
4518 | ptr[0] = ptr[-2]; | ||
4519 | ptr[1] = ptr[-1]; | ||
4520 | } | ||
4521 | put2byte(&data[ins], idx); | ||
4522 | put2byte(&data[hdr+3], pPage->nCell); | ||
4523 | pPage->idxShift = 1; | ||
4524 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4525 | if( pPage->pBt->autoVacuum ){ | ||
4526 | /* The cell may contain a pointer to an overflow page. If so, write | ||
4527 | ** the entry for the overflow page into the pointer map. | ||
4528 | */ | ||
4529 | CellInfo info; | ||
4530 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
4531 | assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); | ||
4532 | if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){ | ||
4533 | Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); | ||
4534 | rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno); | ||
4535 | if( rc!=SQLITE_OK ) return rc; | ||
4536 | } | ||
4537 | } | ||
4538 | #endif | ||
4539 | } | ||
4540 | |||
4541 | return SQLITE_OK; | ||
4542 | } | ||
4543 | |||
4544 | /* | ||
4545 | ** Add a list of cells to a page. The page should be initially empty. | ||
4546 | ** The cells are guaranteed to fit on the page. | ||
4547 | */ | ||
4548 | static void assemblePage( | ||
4549 | MemPage *pPage, /* The page to be assemblied */ | ||
4550 | int nCell, /* The number of cells to add to this page */ | ||
4551 | u8 **apCell, /* Pointers to cell bodies */ | ||
4552 | int *aSize /* Sizes of the cells */ | ||
4553 | ){ | ||
4554 | int i; /* Loop counter */ | ||
4555 | int totalSize; /* Total size of all cells */ | ||
4556 | int hdr; /* Index of page header */ | ||
4557 | int cellptr; /* Address of next cell pointer */ | ||
4558 | int cellbody; /* Address of next cell body */ | ||
4559 | u8 *data; /* Data for the page */ | ||
4560 | |||
4561 | assert( pPage->nOverflow==0 ); | ||
4562 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4563 | totalSize = 0; | ||
4564 | for(i=0; i<nCell; i++){ | ||
4565 | totalSize += aSize[i]; | ||
4566 | } | ||
4567 | assert( totalSize+2*nCell<=pPage->nFree ); | ||
4568 | assert( pPage->nCell==0 ); | ||
4569 | cellptr = pPage->cellOffset; | ||
4570 | data = pPage->aData; | ||
4571 | hdr = pPage->hdrOffset; | ||
4572 | put2byte(&data[hdr+3], nCell); | ||
4573 | if( nCell ){ | ||
4574 | cellbody = allocateSpace(pPage, totalSize); | ||
4575 | assert( cellbody>0 ); | ||
4576 | assert( pPage->nFree >= 2*nCell ); | ||
4577 | pPage->nFree -= 2*nCell; | ||
4578 | for(i=0; i<nCell; i++){ | ||
4579 | put2byte(&data[cellptr], cellbody); | ||
4580 | memcpy(&data[cellbody], apCell[i], aSize[i]); | ||
4581 | cellptr += 2; | ||
4582 | cellbody += aSize[i]; | ||
4583 | } | ||
4584 | assert( cellbody==pPage->pBt->usableSize ); | ||
4585 | } | ||
4586 | pPage->nCell = nCell; | ||
4587 | } | ||
4588 | |||
4589 | /* | ||
4590 | ** The following parameters determine how many adjacent pages get involved | ||
4591 | ** in a balancing operation. NN is the number of neighbors on either side | ||
4592 | ** of the page that participate in the balancing operation. NB is the | ||
4593 | ** total number of pages that participate, including the target page and | ||
4594 | ** NN neighbors on either side. | ||
4595 | ** | ||
4596 | ** The minimum value of NN is 1 (of course). Increasing NN above 1 | ||
4597 | ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance | ||
4598 | ** in exchange for a larger degradation in INSERT and UPDATE performance. | ||
4599 | ** The value of NN appears to give the best results overall. | ||
4600 | */ | ||
4601 | #define NN 1 /* Number of neighbors on either side of pPage */ | ||
4602 | #define NB (NN*2+1) /* Total pages involved in the balance */ | ||
4603 | |||
4604 | /* Forward reference */ | ||
4605 | static int balance(MemPage*, int); | ||
4606 | |||
4607 | #ifndef SQLITE_OMIT_QUICKBALANCE | ||
4608 | /* | ||
4609 | ** This version of balance() handles the common special case where | ||
4610 | ** a new entry is being inserted on the extreme right-end of the | ||
4611 | ** tree, in other words, when the new entry will become the largest | ||
4612 | ** entry in the tree. | ||
4613 | ** | ||
4614 | ** Instead of trying balance the 3 right-most leaf pages, just add | ||
4615 | ** a new page to the right-hand side and put the one new entry in | ||
4616 | ** that page. This leaves the right side of the tree somewhat | ||
4617 | ** unbalanced. But odds are that we will be inserting new entries | ||
4618 | ** at the end soon afterwards so the nearly empty page will quickly | ||
4619 | ** fill up. On average. | ||
4620 | ** | ||
4621 | ** pPage is the leaf page which is the right-most page in the tree. | ||
4622 | ** pParent is its parent. pPage must have a single overflow entry | ||
4623 | ** which is also the right-most entry on the page. | ||
4624 | */ | ||
4625 | static int balance_quick(MemPage *pPage, MemPage *pParent){ | ||
4626 | int rc; | ||
4627 | MemPage *pNew; | ||
4628 | Pgno pgnoNew; | ||
4629 | u8 *pCell; | ||
4630 | int szCell; | ||
4631 | CellInfo info; | ||
4632 | BtShared *pBt = pPage->pBt; | ||
4633 | int parentIdx = pParent->nCell; /* pParent new divider cell index */ | ||
4634 | int parentSize; /* Size of new divider cell */ | ||
4635 | u8 parentCell[64]; /* Space for the new divider cell */ | ||
4636 | |||
4637 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4638 | |||
4639 | /* Allocate a new page. Insert the overflow cell from pPage | ||
4640 | ** into it. Then remove the overflow cell from pPage. | ||
4641 | */ | ||
4642 | rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); | ||
4643 | if( rc!=SQLITE_OK ){ | ||
4644 | return rc; | ||
4645 | } | ||
4646 | pCell = pPage->aOvfl[0].pCell; | ||
4647 | szCell = cellSizePtr(pPage, pCell); | ||
4648 | zeroPage(pNew, pPage->aData[0]); | ||
4649 | assemblePage(pNew, 1, &pCell, &szCell); | ||
4650 | pPage->nOverflow = 0; | ||
4651 | |||
4652 | /* Set the parent of the newly allocated page to pParent. */ | ||
4653 | pNew->pParent = pParent; | ||
4654 | sqlite3PagerRef(pParent->pDbPage); | ||
4655 | |||
4656 | /* pPage is currently the right-child of pParent. Change this | ||
4657 | ** so that the right-child is the new page allocated above and | ||
4658 | ** pPage is the next-to-right child. | ||
4659 | */ | ||
4660 | assert( pPage->nCell>0 ); | ||
4661 | pCell = findCell(pPage, pPage->nCell-1); | ||
4662 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
4663 | rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize); | ||
4664 | if( rc!=SQLITE_OK ){ | ||
4665 | return rc; | ||
4666 | } | ||
4667 | assert( parentSize<64 ); | ||
4668 | rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4); | ||
4669 | if( rc!=SQLITE_OK ){ | ||
4670 | return rc; | ||
4671 | } | ||
4672 | put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno); | ||
4673 | put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); | ||
4674 | |||
4675 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4676 | /* If this is an auto-vacuum database, update the pointer map | ||
4677 | ** with entries for the new page, and any pointer from the | ||
4678 | ** cell on the page to an overflow page. | ||
4679 | */ | ||
4680 | if( pBt->autoVacuum ){ | ||
4681 | rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno); | ||
4682 | if( rc==SQLITE_OK ){ | ||
4683 | rc = ptrmapPutOvfl(pNew, 0); | ||
4684 | } | ||
4685 | if( rc!=SQLITE_OK ){ | ||
4686 | releasePage(pNew); | ||
4687 | return rc; | ||
4688 | } | ||
4689 | } | ||
4690 | #endif | ||
4691 | |||
4692 | /* Release the reference to the new page and balance the parent page, | ||
4693 | ** in case the divider cell inserted caused it to become overfull. | ||
4694 | */ | ||
4695 | releasePage(pNew); | ||
4696 | return balance(pParent, 0); | ||
4697 | } | ||
4698 | #endif /* SQLITE_OMIT_QUICKBALANCE */ | ||
4699 | |||
4700 | /* | ||
4701 | ** This routine redistributes Cells on pPage and up to NN*2 siblings | ||
4702 | ** of pPage so that all pages have about the same amount of free space. | ||
4703 | ** Usually NN siblings on either side of pPage is used in the balancing, | ||
4704 | ** though more siblings might come from one side if pPage is the first | ||
4705 | ** or last child of its parent. If pPage has fewer than 2*NN siblings | ||
4706 | ** (something which can only happen if pPage is the root page or a | ||
4707 | ** child of root) then all available siblings participate in the balancing. | ||
4708 | ** | ||
4709 | ** The number of siblings of pPage might be increased or decreased by one or | ||
4710 | ** two in an effort to keep pages nearly full but not over full. The root page | ||
4711 | ** is special and is allowed to be nearly empty. If pPage is | ||
4712 | ** the root page, then the depth of the tree might be increased | ||
4713 | ** or decreased by one, as necessary, to keep the root page from being | ||
4714 | ** overfull or completely empty. | ||
4715 | ** | ||
4716 | ** Note that when this routine is called, some of the Cells on pPage | ||
4717 | ** might not actually be stored in pPage->aData[]. This can happen | ||
4718 | ** if the page is overfull. Part of the job of this routine is to | ||
4719 | ** make sure all Cells for pPage once again fit in pPage->aData[]. | ||
4720 | ** | ||
4721 | ** In the course of balancing the siblings of pPage, the parent of pPage | ||
4722 | ** might become overfull or underfull. If that happens, then this routine | ||
4723 | ** is called recursively on the parent. | ||
4724 | ** | ||
4725 | ** If this routine fails for any reason, it might leave the database | ||
4726 | ** in a corrupted state. So if this routine fails, the database should | ||
4727 | ** be rolled back. | ||
4728 | */ | ||
4729 | static int balance_nonroot(MemPage *pPage){ | ||
4730 | MemPage *pParent; /* The parent of pPage */ | ||
4731 | BtShared *pBt; /* The whole database */ | ||
4732 | int nCell = 0; /* Number of cells in apCell[] */ | ||
4733 | int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ | ||
4734 | int nOld; /* Number of pages in apOld[] */ | ||
4735 | int nNew; /* Number of pages in apNew[] */ | ||
4736 | int nDiv; /* Number of cells in apDiv[] */ | ||
4737 | int i, j, k; /* Loop counters */ | ||
4738 | int idx; /* Index of pPage in pParent->aCell[] */ | ||
4739 | int nxDiv; /* Next divider slot in pParent->aCell[] */ | ||
4740 | int rc; /* The return code */ | ||
4741 | int leafCorrection; /* 4 if pPage is a leaf. 0 if not */ | ||
4742 | int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ | ||
4743 | int usableSpace; /* Bytes in pPage beyond the header */ | ||
4744 | int pageFlags; /* Value of pPage->aData[0] */ | ||
4745 | int subtotal; /* Subtotal of bytes in cells on one page */ | ||
4746 | int iSpace = 0; /* First unused byte of aSpace[] */ | ||
4747 | MemPage *apOld[NB]; /* pPage and up to two siblings */ | ||
4748 | Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */ | ||
4749 | MemPage *apCopy[NB]; /* Private copies of apOld[] pages */ | ||
4750 | MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ | ||
4751 | Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */ | ||
4752 | u8 *apDiv[NB]; /* Divider cells in pParent */ | ||
4753 | int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ | ||
4754 | int szNew[NB+2]; /* Combined size of cells place on i-th page */ | ||
4755 | u8 **apCell = 0; /* All cells begin balanced */ | ||
4756 | int *szCell; /* Local size of all cells in apCell[] */ | ||
4757 | u8 *aCopy[NB]; /* Space for holding data of apCopy[] */ | ||
4758 | u8 *aSpace; /* Space to hold copies of dividers cells */ | ||
4759 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4760 | u8 *aFrom = 0; | ||
4761 | #endif | ||
4762 | |||
4763 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
4764 | |||
4765 | /* | ||
4766 | ** Find the parent page. | ||
4767 | */ | ||
4768 | assert( pPage->isInit ); | ||
4769 | assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 ); | ||
4770 | pBt = pPage->pBt; | ||
4771 | pParent = pPage->pParent; | ||
4772 | assert( pParent ); | ||
4773 | if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){ | ||
4774 | return rc; | ||
4775 | } | ||
4776 | TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); | ||
4777 | |||
4778 | #ifndef SQLITE_OMIT_QUICKBALANCE | ||
4779 | /* | ||
4780 | ** A special case: If a new entry has just been inserted into a | ||
4781 | ** table (that is, a btree with integer keys and all data at the leaves) | ||
4782 | ** and the new entry is the right-most entry in the tree (it has the | ||
4783 | ** largest key) then use the special balance_quick() routine for | ||
4784 | ** balancing. balance_quick() is much faster and results in a tighter | ||
4785 | ** packing of data in the common case. | ||
4786 | */ | ||
4787 | if( pPage->leaf && | ||
4788 | pPage->intKey && | ||
4789 | pPage->leafData && | ||
4790 | pPage->nOverflow==1 && | ||
4791 | pPage->aOvfl[0].idx==pPage->nCell && | ||
4792 | pPage->pParent->pgno!=1 && | ||
4793 | get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno | ||
4794 | ){ | ||
4795 | /* | ||
4796 | ** TODO: Check the siblings to the left of pPage. It may be that | ||
4797 | ** they are not full and no new page is required. | ||
4798 | */ | ||
4799 | return balance_quick(pPage, pParent); | ||
4800 | } | ||
4801 | #endif | ||
4802 | |||
4803 | if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){ | ||
4804 | return rc; | ||
4805 | } | ||
4806 | |||
4807 | /* | ||
4808 | ** Find the cell in the parent page whose left child points back | ||
4809 | ** to pPage. The "idx" variable is the index of that cell. If pPage | ||
4810 | ** is the rightmost child of pParent then set idx to pParent->nCell | ||
4811 | */ | ||
4812 | if( pParent->idxShift ){ | ||
4813 | Pgno pgno; | ||
4814 | pgno = pPage->pgno; | ||
4815 | assert( pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); | ||
4816 | for(idx=0; idx<pParent->nCell; idx++){ | ||
4817 | if( get4byte(findCell(pParent, idx))==pgno ){ | ||
4818 | break; | ||
4819 | } | ||
4820 | } | ||
4821 | assert( idx<pParent->nCell | ||
4822 | || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno ); | ||
4823 | }else{ | ||
4824 | idx = pPage->idxParent; | ||
4825 | } | ||
4826 | |||
4827 | /* | ||
4828 | ** Initialize variables so that it will be safe to jump | ||
4829 | ** directly to balance_cleanup at any moment. | ||
4830 | */ | ||
4831 | nOld = nNew = 0; | ||
4832 | sqlite3PagerRef(pParent->pDbPage); | ||
4833 | |||
4834 | /* | ||
4835 | ** Find sibling pages to pPage and the cells in pParent that divide | ||
4836 | ** the siblings. An attempt is made to find NN siblings on either | ||
4837 | ** side of pPage. More siblings are taken from one side, however, if | ||
4838 | ** pPage there are fewer than NN siblings on the other side. If pParent | ||
4839 | ** has NB or fewer children then all children of pParent are taken. | ||
4840 | */ | ||
4841 | nxDiv = idx - NN; | ||
4842 | if( nxDiv + NB > pParent->nCell ){ | ||
4843 | nxDiv = pParent->nCell - NB + 1; | ||
4844 | } | ||
4845 | if( nxDiv<0 ){ | ||
4846 | nxDiv = 0; | ||
4847 | } | ||
4848 | nDiv = 0; | ||
4849 | for(i=0, k=nxDiv; i<NB; i++, k++){ | ||
4850 | if( k<pParent->nCell ){ | ||
4851 | apDiv[i] = findCell(pParent, k); | ||
4852 | nDiv++; | ||
4853 | assert( !pParent->leaf ); | ||
4854 | pgnoOld[i] = get4byte(apDiv[i]); | ||
4855 | }else if( k==pParent->nCell ){ | ||
4856 | pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]); | ||
4857 | }else{ | ||
4858 | break; | ||
4859 | } | ||
4860 | rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent); | ||
4861 | if( rc ) goto balance_cleanup; | ||
4862 | apOld[i]->idxParent = k; | ||
4863 | apCopy[i] = 0; | ||
4864 | assert( i==nOld ); | ||
4865 | nOld++; | ||
4866 | nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; | ||
4867 | } | ||
4868 | |||
4869 | /* Make nMaxCells a multiple of 2 in order to preserve 8-byte | ||
4870 | ** alignment */ | ||
4871 | nMaxCells = (nMaxCells + 1)&~1; | ||
4872 | |||
4873 | /* | ||
4874 | ** Allocate space for memory structures | ||
4875 | */ | ||
4876 | apCell = sqlite3_malloc( | ||
4877 | nMaxCells*sizeof(u8*) /* apCell */ | ||
4878 | + nMaxCells*sizeof(int) /* szCell */ | ||
4879 | + ROUND8(sizeof(MemPage))*NB /* aCopy */ | ||
4880 | + pBt->pageSize*(5+NB) /* aSpace */ | ||
4881 | + (ISAUTOVACUUM ? nMaxCells : 0) /* aFrom */ | ||
4882 | ); | ||
4883 | if( apCell==0 ){ | ||
4884 | rc = SQLITE_NOMEM; | ||
4885 | goto balance_cleanup; | ||
4886 | } | ||
4887 | szCell = (int*)&apCell[nMaxCells]; | ||
4888 | aCopy[0] = (u8*)&szCell[nMaxCells]; | ||
4889 | assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ | ||
4890 | for(i=1; i<NB; i++){ | ||
4891 | aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))]; | ||
4892 | assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ | ||
4893 | } | ||
4894 | aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))]; | ||
4895 | assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */ | ||
4896 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4897 | if( pBt->autoVacuum ){ | ||
4898 | aFrom = &aSpace[5*pBt->pageSize]; | ||
4899 | } | ||
4900 | #endif | ||
4901 | |||
4902 | /* | ||
4903 | ** Make copies of the content of pPage and its siblings into aOld[]. | ||
4904 | ** The rest of this function will use data from the copies rather | ||
4905 | ** that the original pages since the original pages will be in the | ||
4906 | ** process of being overwritten. | ||
4907 | */ | ||
4908 | for(i=0; i<nOld; i++){ | ||
4909 | MemPage *p = apCopy[i] = (MemPage*)aCopy[i]; | ||
4910 | memcpy(p, apOld[i], sizeof(MemPage)); | ||
4911 | p->aData = (void*)&p[1]; | ||
4912 | memcpy(p->aData, apOld[i]->aData, pBt->pageSize); | ||
4913 | } | ||
4914 | |||
4915 | /* | ||
4916 | ** Load pointers to all cells on sibling pages and the divider cells | ||
4917 | ** into the local apCell[] array. Make copies of the divider cells | ||
4918 | ** into space obtained form aSpace[] and remove the the divider Cells | ||
4919 | ** from pParent. | ||
4920 | ** | ||
4921 | ** If the siblings are on leaf pages, then the child pointers of the | ||
4922 | ** divider cells are stripped from the cells before they are copied | ||
4923 | ** into aSpace[]. In this way, all cells in apCell[] are without | ||
4924 | ** child pointers. If siblings are not leaves, then all cell in | ||
4925 | ** apCell[] include child pointers. Either way, all cells in apCell[] | ||
4926 | ** are alike. | ||
4927 | ** | ||
4928 | ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. | ||
4929 | ** leafData: 1 if pPage holds key+data and pParent holds only keys. | ||
4930 | */ | ||
4931 | nCell = 0; | ||
4932 | leafCorrection = pPage->leaf*4; | ||
4933 | leafData = pPage->leafData && pPage->leaf; | ||
4934 | for(i=0; i<nOld; i++){ | ||
4935 | MemPage *pOld = apCopy[i]; | ||
4936 | int limit = pOld->nCell+pOld->nOverflow; | ||
4937 | for(j=0; j<limit; j++){ | ||
4938 | assert( nCell<nMaxCells ); | ||
4939 | apCell[nCell] = findOverflowCell(pOld, j); | ||
4940 | szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); | ||
4941 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4942 | if( pBt->autoVacuum ){ | ||
4943 | int a; | ||
4944 | aFrom[nCell] = i; | ||
4945 | for(a=0; a<pOld->nOverflow; a++){ | ||
4946 | if( pOld->aOvfl[a].pCell==apCell[nCell] ){ | ||
4947 | aFrom[nCell] = 0xFF; | ||
4948 | break; | ||
4949 | } | ||
4950 | } | ||
4951 | } | ||
4952 | #endif | ||
4953 | nCell++; | ||
4954 | } | ||
4955 | if( i<nOld-1 ){ | ||
4956 | int sz = cellSizePtr(pParent, apDiv[i]); | ||
4957 | if( leafData ){ | ||
4958 | /* With the LEAFDATA flag, pParent cells hold only INTKEYs that | ||
4959 | ** are duplicates of keys on the child pages. We need to remove | ||
4960 | ** the divider cells from pParent, but the dividers cells are not | ||
4961 | ** added to apCell[] because they are duplicates of child cells. | ||
4962 | */ | ||
4963 | dropCell(pParent, nxDiv, sz); | ||
4964 | }else{ | ||
4965 | u8 *pTemp; | ||
4966 | assert( nCell<nMaxCells ); | ||
4967 | szCell[nCell] = sz; | ||
4968 | pTemp = &aSpace[iSpace]; | ||
4969 | iSpace += sz; | ||
4970 | assert( iSpace<=pBt->pageSize*5 ); | ||
4971 | memcpy(pTemp, apDiv[i], sz); | ||
4972 | apCell[nCell] = pTemp+leafCorrection; | ||
4973 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4974 | if( pBt->autoVacuum ){ | ||
4975 | aFrom[nCell] = 0xFF; | ||
4976 | } | ||
4977 | #endif | ||
4978 | dropCell(pParent, nxDiv, sz); | ||
4979 | szCell[nCell] -= leafCorrection; | ||
4980 | assert( get4byte(pTemp)==pgnoOld[i] ); | ||
4981 | if( !pOld->leaf ){ | ||
4982 | assert( leafCorrection==0 ); | ||
4983 | /* The right pointer of the child page pOld becomes the left | ||
4984 | ** pointer of the divider cell */ | ||
4985 | memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4); | ||
4986 | }else{ | ||
4987 | assert( leafCorrection==4 ); | ||
4988 | if( szCell[nCell]<4 ){ | ||
4989 | /* Do not allow any cells smaller than 4 bytes. */ | ||
4990 | szCell[nCell] = 4; | ||
4991 | } | ||
4992 | } | ||
4993 | nCell++; | ||
4994 | } | ||
4995 | } | ||
4996 | } | ||
4997 | |||
4998 | /* | ||
4999 | ** Figure out the number of pages needed to hold all nCell cells. | ||
5000 | ** Store this number in "k". Also compute szNew[] which is the total | ||
5001 | ** size of all cells on the i-th page and cntNew[] which is the index | ||
5002 | ** in apCell[] of the cell that divides page i from page i+1. | ||
5003 | ** cntNew[k] should equal nCell. | ||
5004 | ** | ||
5005 | ** Values computed by this block: | ||
5006 | ** | ||
5007 | ** k: The total number of sibling pages | ||
5008 | ** szNew[i]: Spaced used on the i-th sibling page. | ||
5009 | ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to | ||
5010 | ** the right of the i-th sibling page. | ||
5011 | ** usableSpace: Number of bytes of space available on each sibling. | ||
5012 | ** | ||
5013 | */ | ||
5014 | usableSpace = pBt->usableSize - 12 + leafCorrection; | ||
5015 | for(subtotal=k=i=0; i<nCell; i++){ | ||
5016 | assert( i<nMaxCells ); | ||
5017 | subtotal += szCell[i] + 2; | ||
5018 | if( subtotal > usableSpace ){ | ||
5019 | szNew[k] = subtotal - szCell[i]; | ||
5020 | cntNew[k] = i; | ||
5021 | if( leafData ){ i--; } | ||
5022 | subtotal = 0; | ||
5023 | k++; | ||
5024 | } | ||
5025 | } | ||
5026 | szNew[k] = subtotal; | ||
5027 | cntNew[k] = nCell; | ||
5028 | k++; | ||
5029 | |||
5030 | /* | ||
5031 | ** The packing computed by the previous block is biased toward the siblings | ||
5032 | ** on the left side. The left siblings are always nearly full, while the | ||
5033 | ** right-most sibling might be nearly empty. This block of code attempts | ||
5034 | ** to adjust the packing of siblings to get a better balance. | ||
5035 | ** | ||
5036 | ** This adjustment is more than an optimization. The packing above might | ||
5037 | ** be so out of balance as to be illegal. For example, the right-most | ||
5038 | ** sibling might be completely empty. This adjustment is not optional. | ||
5039 | */ | ||
5040 | for(i=k-1; i>0; i--){ | ||
5041 | int szRight = szNew[i]; /* Size of sibling on the right */ | ||
5042 | int szLeft = szNew[i-1]; /* Size of sibling on the left */ | ||
5043 | int r; /* Index of right-most cell in left sibling */ | ||
5044 | int d; /* Index of first cell to the left of right sibling */ | ||
5045 | |||
5046 | r = cntNew[i-1] - 1; | ||
5047 | d = r + 1 - leafData; | ||
5048 | assert( d<nMaxCells ); | ||
5049 | assert( r<nMaxCells ); | ||
5050 | while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){ | ||
5051 | szRight += szCell[d] + 2; | ||
5052 | szLeft -= szCell[r] + 2; | ||
5053 | cntNew[i-1]--; | ||
5054 | r = cntNew[i-1] - 1; | ||
5055 | d = r + 1 - leafData; | ||
5056 | } | ||
5057 | szNew[i] = szRight; | ||
5058 | szNew[i-1] = szLeft; | ||
5059 | } | ||
5060 | |||
5061 | /* Either we found one or more cells (cntnew[0])>0) or we are the | ||
5062 | ** a virtual root page. A virtual root page is when the real root | ||
5063 | ** page is page 1 and we are the only child of that page. | ||
5064 | */ | ||
5065 | assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) ); | ||
5066 | |||
5067 | /* | ||
5068 | ** Allocate k new pages. Reuse old pages where possible. | ||
5069 | */ | ||
5070 | assert( pPage->pgno>1 ); | ||
5071 | pageFlags = pPage->aData[0]; | ||
5072 | for(i=0; i<k; i++){ | ||
5073 | MemPage *pNew; | ||
5074 | if( i<nOld ){ | ||
5075 | pNew = apNew[i] = apOld[i]; | ||
5076 | pgnoNew[i] = pgnoOld[i]; | ||
5077 | apOld[i] = 0; | ||
5078 | rc = sqlite3PagerWrite(pNew->pDbPage); | ||
5079 | nNew++; | ||
5080 | if( rc ) goto balance_cleanup; | ||
5081 | }else{ | ||
5082 | assert( i>0 ); | ||
5083 | rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0); | ||
5084 | if( rc ) goto balance_cleanup; | ||
5085 | apNew[i] = pNew; | ||
5086 | nNew++; | ||
5087 | } | ||
5088 | zeroPage(pNew, pageFlags); | ||
5089 | } | ||
5090 | |||
5091 | /* Free any old pages that were not reused as new pages. | ||
5092 | */ | ||
5093 | while( i<nOld ){ | ||
5094 | rc = freePage(apOld[i]); | ||
5095 | if( rc ) goto balance_cleanup; | ||
5096 | releasePage(apOld[i]); | ||
5097 | apOld[i] = 0; | ||
5098 | i++; | ||
5099 | } | ||
5100 | |||
5101 | /* | ||
5102 | ** Put the new pages in accending order. This helps to | ||
5103 | ** keep entries in the disk file in order so that a scan | ||
5104 | ** of the table is a linear scan through the file. That | ||
5105 | ** in turn helps the operating system to deliver pages | ||
5106 | ** from the disk more rapidly. | ||
5107 | ** | ||
5108 | ** An O(n^2) insertion sort algorithm is used, but since | ||
5109 | ** n is never more than NB (a small constant), that should | ||
5110 | ** not be a problem. | ||
5111 | ** | ||
5112 | ** When NB==3, this one optimization makes the database | ||
5113 | ** about 25% faster for large insertions and deletions. | ||
5114 | */ | ||
5115 | for(i=0; i<k-1; i++){ | ||
5116 | int minV = pgnoNew[i]; | ||
5117 | int minI = i; | ||
5118 | for(j=i+1; j<k; j++){ | ||
5119 | if( pgnoNew[j]<(unsigned)minV ){ | ||
5120 | minI = j; | ||
5121 | minV = pgnoNew[j]; | ||
5122 | } | ||
5123 | } | ||
5124 | if( minI>i ){ | ||
5125 | int t; | ||
5126 | MemPage *pT; | ||
5127 | t = pgnoNew[i]; | ||
5128 | pT = apNew[i]; | ||
5129 | pgnoNew[i] = pgnoNew[minI]; | ||
5130 | apNew[i] = apNew[minI]; | ||
5131 | pgnoNew[minI] = t; | ||
5132 | apNew[minI] = pT; | ||
5133 | } | ||
5134 | } | ||
5135 | TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n", | ||
5136 | pgnoOld[0], | ||
5137 | nOld>=2 ? pgnoOld[1] : 0, | ||
5138 | nOld>=3 ? pgnoOld[2] : 0, | ||
5139 | pgnoNew[0], szNew[0], | ||
5140 | nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0, | ||
5141 | nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0, | ||
5142 | nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0, | ||
5143 | nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0)); | ||
5144 | |||
5145 | /* | ||
5146 | ** Evenly distribute the data in apCell[] across the new pages. | ||
5147 | ** Insert divider cells into pParent as necessary. | ||
5148 | */ | ||
5149 | j = 0; | ||
5150 | for(i=0; i<nNew; i++){ | ||
5151 | /* Assemble the new sibling page. */ | ||
5152 | MemPage *pNew = apNew[i]; | ||
5153 | assert( j<nMaxCells ); | ||
5154 | assert( pNew->pgno==pgnoNew[i] ); | ||
5155 | assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]); | ||
5156 | assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) ); | ||
5157 | assert( pNew->nOverflow==0 ); | ||
5158 | |||
5159 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
5160 | /* If this is an auto-vacuum database, update the pointer map entries | ||
5161 | ** that point to the siblings that were rearranged. These can be: left | ||
5162 | ** children of cells, the right-child of the page, or overflow pages | ||
5163 | ** pointed to by cells. | ||
5164 | */ | ||
5165 | if( pBt->autoVacuum ){ | ||
5166 | for(k=j; k<cntNew[i]; k++){ | ||
5167 | assert( k<nMaxCells ); | ||
5168 | if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){ | ||
5169 | rc = ptrmapPutOvfl(pNew, k-j); | ||
5170 | if( rc!=SQLITE_OK ){ | ||
5171 | goto balance_cleanup; | ||
5172 | } | ||
5173 | } | ||
5174 | } | ||
5175 | } | ||
5176 | #endif | ||
5177 | |||
5178 | j = cntNew[i]; | ||
5179 | |||
5180 | /* If the sibling page assembled above was not the right-most sibling, | ||
5181 | ** insert a divider cell into the parent page. | ||
5182 | */ | ||
5183 | if( i<nNew-1 && j<nCell ){ | ||
5184 | u8 *pCell; | ||
5185 | u8 *pTemp; | ||
5186 | int sz; | ||
5187 | |||
5188 | assert( j<nMaxCells ); | ||
5189 | pCell = apCell[j]; | ||
5190 | sz = szCell[j] + leafCorrection; | ||
5191 | if( !pNew->leaf ){ | ||
5192 | memcpy(&pNew->aData[8], pCell, 4); | ||
5193 | pTemp = 0; | ||
5194 | }else if( leafData ){ | ||
5195 | /* If the tree is a leaf-data tree, and the siblings are leaves, | ||
5196 | ** then there is no divider cell in apCell[]. Instead, the divider | ||
5197 | ** cell consists of the integer key for the right-most cell of | ||
5198 | ** the sibling-page assembled above only. | ||
5199 | */ | ||
5200 | CellInfo info; | ||
5201 | j--; | ||
5202 | sqlite3BtreeParseCellPtr(pNew, apCell[j], &info); | ||
5203 | pCell = &aSpace[iSpace]; | ||
5204 | fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz); | ||
5205 | iSpace += sz; | ||
5206 | assert( iSpace<=pBt->pageSize*5 ); | ||
5207 | pTemp = 0; | ||
5208 | }else{ | ||
5209 | pCell -= 4; | ||
5210 | pTemp = &aSpace[iSpace]; | ||
5211 | iSpace += sz; | ||
5212 | assert( iSpace<=pBt->pageSize*5 ); | ||
5213 | /* Obscure case for non-leaf-data trees: If the cell at pCell was | ||
5214 | ** previously stored on a leaf node, and it's reported size was 4 | ||
5215 | ** bytes, then it may actually be smaller than this | ||
5216 | ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of | ||
5217 | ** any cell). But it's important to pass the correct size to | ||
5218 | ** insertCell(), so reparse the cell now. | ||
5219 | ** | ||
5220 | ** Note that this can never happen in an SQLite data file, as all | ||
5221 | ** cells are at least 4 bytes. It only happens in b-trees used | ||
5222 | ** to evaluate "IN (SELECT ...)" and similar clauses. | ||
5223 | */ | ||
5224 | if( szCell[j]==4 ){ | ||
5225 | assert(leafCorrection==4); | ||
5226 | sz = cellSizePtr(pParent, pCell); | ||
5227 | } | ||
5228 | } | ||
5229 | rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4); | ||
5230 | if( rc!=SQLITE_OK ) goto balance_cleanup; | ||
5231 | put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno); | ||
5232 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
5233 | /* If this is an auto-vacuum database, and not a leaf-data tree, | ||
5234 | ** then update the pointer map with an entry for the overflow page | ||
5235 | ** that the cell just inserted points to (if any). | ||
5236 | */ | ||
5237 | if( pBt->autoVacuum && !leafData ){ | ||
5238 | rc = ptrmapPutOvfl(pParent, nxDiv); | ||
5239 | if( rc!=SQLITE_OK ){ | ||
5240 | goto balance_cleanup; | ||
5241 | } | ||
5242 | } | ||
5243 | #endif | ||
5244 | j++; | ||
5245 | nxDiv++; | ||
5246 | } | ||
5247 | } | ||
5248 | assert( j==nCell ); | ||
5249 | assert( nOld>0 ); | ||
5250 | assert( nNew>0 ); | ||
5251 | if( (pageFlags & PTF_LEAF)==0 ){ | ||
5252 | memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4); | ||
5253 | } | ||
5254 | if( nxDiv==pParent->nCell+pParent->nOverflow ){ | ||
5255 | /* Right-most sibling is the right-most child of pParent */ | ||
5256 | put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]); | ||
5257 | }else{ | ||
5258 | /* Right-most sibling is the left child of the first entry in pParent | ||
5259 | ** past the right-most divider entry */ | ||
5260 | put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]); | ||
5261 | } | ||
5262 | |||
5263 | /* | ||
5264 | ** Reparent children of all cells. | ||
5265 | */ | ||
5266 | for(i=0; i<nNew; i++){ | ||
5267 | rc = reparentChildPages(apNew[i]); | ||
5268 | if( rc!=SQLITE_OK ) goto balance_cleanup; | ||
5269 | } | ||
5270 | rc = reparentChildPages(pParent); | ||
5271 | if( rc!=SQLITE_OK ) goto balance_cleanup; | ||
5272 | |||
5273 | /* | ||
5274 | ** Balance the parent page. Note that the current page (pPage) might | ||
5275 | ** have been added to the freelist so it might no longer be initialized. | ||
5276 | ** But the parent page will always be initialized. | ||
5277 | */ | ||
5278 | assert( pParent->isInit ); | ||
5279 | rc = balance(pParent, 0); | ||
5280 | |||
5281 | /* | ||
5282 | ** Cleanup before returning. | ||
5283 | */ | ||
5284 | balance_cleanup: | ||
5285 | sqlite3_free(apCell); | ||
5286 | for(i=0; i<nOld; i++){ | ||
5287 | releasePage(apOld[i]); | ||
5288 | } | ||
5289 | for(i=0; i<nNew; i++){ | ||
5290 | releasePage(apNew[i]); | ||
5291 | } | ||
5292 | releasePage(pParent); | ||
5293 | TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n", | ||
5294 | pPage->pgno, nOld, nNew, nCell)); | ||
5295 | return rc; | ||
5296 | } | ||
5297 | |||
5298 | /* | ||
5299 | ** This routine is called for the root page of a btree when the root | ||
5300 | ** page contains no cells. This is an opportunity to make the tree | ||
5301 | ** shallower by one level. | ||
5302 | */ | ||
5303 | static int balance_shallower(MemPage *pPage){ | ||
5304 | MemPage *pChild; /* The only child page of pPage */ | ||
5305 | Pgno pgnoChild; /* Page number for pChild */ | ||
5306 | int rc = SQLITE_OK; /* Return code from subprocedures */ | ||
5307 | BtShared *pBt; /* The main BTree structure */ | ||
5308 | int mxCellPerPage; /* Maximum number of cells per page */ | ||
5309 | u8 **apCell; /* All cells from pages being balanced */ | ||
5310 | int *szCell; /* Local size of all cells */ | ||
5311 | |||
5312 | assert( pPage->pParent==0 ); | ||
5313 | assert( pPage->nCell==0 ); | ||
5314 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
5315 | pBt = pPage->pBt; | ||
5316 | mxCellPerPage = MX_CELL(pBt); | ||
5317 | apCell = sqlite3_malloc( mxCellPerPage*(sizeof(u8*)+sizeof(int)) ); | ||
5318 | if( apCell==0 ) return SQLITE_NOMEM; | ||
5319 | szCell = (int*)&apCell[mxCellPerPage]; | ||
5320 | if( pPage->leaf ){ | ||
5321 | /* The table is completely empty */ | ||
5322 | TRACE(("BALANCE: empty table %d\n", pPage->pgno)); | ||
5323 | }else{ | ||
5324 | /* The root page is empty but has one child. Transfer the | ||
5325 | ** information from that one child into the root page if it | ||
5326 | ** will fit. This reduces the depth of the tree by one. | ||
5327 | ** | ||
5328 | ** If the root page is page 1, it has less space available than | ||
5329 | ** its child (due to the 100 byte header that occurs at the beginning | ||
5330 | ** of the database fle), so it might not be able to hold all of the | ||
5331 | ** information currently contained in the child. If this is the | ||
5332 | ** case, then do not do the transfer. Leave page 1 empty except | ||
5333 | ** for the right-pointer to the child page. The child page becomes | ||
5334 | ** the virtual root of the tree. | ||
5335 | */ | ||
5336 | pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]); | ||
5337 | assert( pgnoChild>0 ); | ||
5338 | assert( pgnoChild<=sqlite3PagerPagecount(pPage->pBt->pPager) ); | ||
5339 | rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0); | ||
5340 | if( rc ) goto end_shallow_balance; | ||
5341 | if( pPage->pgno==1 ){ | ||
5342 | rc = sqlite3BtreeInitPage(pChild, pPage); | ||
5343 | if( rc ) goto end_shallow_balance; | ||
5344 | assert( pChild->nOverflow==0 ); | ||
5345 | if( pChild->nFree>=100 ){ | ||
5346 | /* The child information will fit on the root page, so do the | ||
5347 | ** copy */ | ||
5348 | int i; | ||
5349 | zeroPage(pPage, pChild->aData[0]); | ||
5350 | for(i=0; i<pChild->nCell; i++){ | ||
5351 | apCell[i] = findCell(pChild,i); | ||
5352 | szCell[i] = cellSizePtr(pChild, apCell[i]); | ||
5353 | } | ||
5354 | assemblePage(pPage, pChild->nCell, apCell, szCell); | ||
5355 | /* Copy the right-pointer of the child to the parent. */ | ||
5356 | put4byte(&pPage->aData[pPage->hdrOffset+8], | ||
5357 | get4byte(&pChild->aData[pChild->hdrOffset+8])); | ||
5358 | freePage(pChild); | ||
5359 | TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno)); | ||
5360 | }else{ | ||
5361 | /* The child has more information that will fit on the root. | ||
5362 | ** The tree is already balanced. Do nothing. */ | ||
5363 | TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno)); | ||
5364 | } | ||
5365 | }else{ | ||
5366 | memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize); | ||
5367 | pPage->isInit = 0; | ||
5368 | pPage->pParent = 0; | ||
5369 | rc = sqlite3BtreeInitPage(pPage, 0); | ||
5370 | assert( rc==SQLITE_OK ); | ||
5371 | freePage(pChild); | ||
5372 | TRACE(("BALANCE: transfer child %d into root %d\n", | ||
5373 | pChild->pgno, pPage->pgno)); | ||
5374 | } | ||
5375 | rc = reparentChildPages(pPage); | ||
5376 | assert( pPage->nOverflow==0 ); | ||
5377 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
5378 | if( pBt->autoVacuum ){ | ||
5379 | int i; | ||
5380 | for(i=0; i<pPage->nCell; i++){ | ||
5381 | rc = ptrmapPutOvfl(pPage, i); | ||
5382 | if( rc!=SQLITE_OK ){ | ||
5383 | goto end_shallow_balance; | ||
5384 | } | ||
5385 | } | ||
5386 | } | ||
5387 | #endif | ||
5388 | releasePage(pChild); | ||
5389 | } | ||
5390 | end_shallow_balance: | ||
5391 | sqlite3_free(apCell); | ||
5392 | return rc; | ||
5393 | } | ||
5394 | |||
5395 | |||
5396 | /* | ||
5397 | ** The root page is overfull | ||
5398 | ** | ||
5399 | ** When this happens, Create a new child page and copy the | ||
5400 | ** contents of the root into the child. Then make the root | ||
5401 | ** page an empty page with rightChild pointing to the new | ||
5402 | ** child. Finally, call balance_internal() on the new child | ||
5403 | ** to cause it to split. | ||
5404 | */ | ||
5405 | static int balance_deeper(MemPage *pPage){ | ||
5406 | int rc; /* Return value from subprocedures */ | ||
5407 | MemPage *pChild; /* Pointer to a new child page */ | ||
5408 | Pgno pgnoChild; /* Page number of the new child page */ | ||
5409 | BtShared *pBt; /* The BTree */ | ||
5410 | int usableSize; /* Total usable size of a page */ | ||
5411 | u8 *data; /* Content of the parent page */ | ||
5412 | u8 *cdata; /* Content of the child page */ | ||
5413 | int hdr; /* Offset to page header in parent */ | ||
5414 | int brk; /* Offset to content of first cell in parent */ | ||
5415 | |||
5416 | assert( pPage->pParent==0 ); | ||
5417 | assert( pPage->nOverflow>0 ); | ||
5418 | pBt = pPage->pBt; | ||
5419 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
5420 | rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0); | ||
5421 | if( rc ) return rc; | ||
5422 | assert( sqlite3PagerIswriteable(pChild->pDbPage) ); | ||
5423 | usableSize = pBt->usableSize; | ||
5424 | data = pPage->aData; | ||
5425 | hdr = pPage->hdrOffset; | ||
5426 | brk = get2byte(&data[hdr+5]); | ||
5427 | cdata = pChild->aData; | ||
5428 | memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr); | ||
5429 | memcpy(&cdata[brk], &data[brk], usableSize-brk); | ||
5430 | assert( pChild->isInit==0 ); | ||
5431 | rc = sqlite3BtreeInitPage(pChild, pPage); | ||
5432 | if( rc ) goto balancedeeper_out; | ||
5433 | memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0])); | ||
5434 | pChild->nOverflow = pPage->nOverflow; | ||
5435 | if( pChild->nOverflow ){ | ||
5436 | pChild->nFree = 0; | ||
5437 | } | ||
5438 | assert( pChild->nCell==pPage->nCell ); | ||
5439 | zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF); | ||
5440 | put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild); | ||
5441 | TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno)); | ||
5442 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
5443 | if( pBt->autoVacuum ){ | ||
5444 | int i; | ||
5445 | rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno); | ||
5446 | if( rc ) goto balancedeeper_out; | ||
5447 | for(i=0; i<pChild->nCell; i++){ | ||
5448 | rc = ptrmapPutOvfl(pChild, i); | ||
5449 | if( rc!=SQLITE_OK ){ | ||
5450 | return rc; | ||
5451 | } | ||
5452 | } | ||
5453 | } | ||
5454 | #endif | ||
5455 | rc = balance_nonroot(pChild); | ||
5456 | |||
5457 | balancedeeper_out: | ||
5458 | releasePage(pChild); | ||
5459 | return rc; | ||
5460 | } | ||
5461 | |||
5462 | /* | ||
5463 | ** Decide if the page pPage needs to be balanced. If balancing is | ||
5464 | ** required, call the appropriate balancing routine. | ||
5465 | */ | ||
5466 | static int balance(MemPage *pPage, int insert){ | ||
5467 | int rc = SQLITE_OK; | ||
5468 | assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | ||
5469 | if( pPage->pParent==0 ){ | ||
5470 | rc = sqlite3PagerWrite(pPage->pDbPage); | ||
5471 | if( rc==SQLITE_OK && pPage->nOverflow>0 ){ | ||
5472 | rc = balance_deeper(pPage); | ||
5473 | } | ||
5474 | if( rc==SQLITE_OK && pPage->nCell==0 ){ | ||
5475 | rc = balance_shallower(pPage); | ||
5476 | } | ||
5477 | }else{ | ||
5478 | if( pPage->nOverflow>0 || | ||
5479 | (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){ | ||
5480 | rc = balance_nonroot(pPage); | ||
5481 | } | ||
5482 | } | ||
5483 | return rc; | ||
5484 | } | ||
5485 | |||
5486 | /* | ||
5487 | ** This routine checks all cursors that point to table pgnoRoot. | ||
5488 | ** If any of those cursors were opened with wrFlag==0 in a different | ||
5489 | ** database connection (a database connection that shares the pager | ||
5490 | ** cache with the current connection) and that other connection | ||
5491 | ** is not in the ReadUncommmitted state, then this routine returns | ||
5492 | ** SQLITE_LOCKED. | ||
5493 | ** | ||
5494 | ** In addition to checking for read-locks (where a read-lock | ||
5495 | ** means a cursor opened with wrFlag==0) this routine also moves | ||
5496 | ** all write cursors so that they are pointing to the | ||
5497 | ** first Cell on the root page. This is necessary because an insert | ||
5498 | ** or delete might change the number of cells on a page or delete | ||
5499 | ** a page entirely and we do not want to leave any cursors | ||
5500 | ** pointing to non-existant pages or cells. | ||
5501 | */ | ||
5502 | static int checkReadLocks(Btree *pBtree, Pgno pgnoRoot, BtCursor *pExclude){ | ||
5503 | BtCursor *p; | ||
5504 | BtShared *pBt = pBtree->pBt; | ||
5505 | sqlite3 *db = pBtree->pSqlite; | ||
5506 | assert( sqlite3BtreeHoldsMutex(pBtree) ); | ||
5507 | for(p=pBt->pCursor; p; p=p->pNext){ | ||
5508 | if( p==pExclude ) continue; | ||
5509 | if( p->eState!=CURSOR_VALID ) continue; | ||
5510 | if( p->pgnoRoot!=pgnoRoot ) continue; | ||
5511 | if( p->wrFlag==0 ){ | ||
5512 | sqlite3 *dbOther = p->pBtree->pSqlite; | ||
5513 | if( dbOther==0 || | ||
5514 | (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){ | ||
5515 | return SQLITE_LOCKED; | ||
5516 | } | ||
5517 | }else if( p->pPage->pgno!=p->pgnoRoot ){ | ||
5518 | moveToRoot(p); | ||
5519 | } | ||
5520 | } | ||
5521 | return SQLITE_OK; | ||
5522 | } | ||
5523 | |||
5524 | /* | ||
5525 | ** Insert a new record into the BTree. The key is given by (pKey,nKey) | ||
5526 | ** and the data is given by (pData,nData). The cursor is used only to | ||
5527 | ** define what table the record should be inserted into. The cursor | ||
5528 | ** is left pointing at a random location. | ||
5529 | ** | ||
5530 | ** For an INTKEY table, only the nKey value of the key is used. pKey is | ||
5531 | ** ignored. For a ZERODATA table, the pData and nData are both ignored. | ||
5532 | */ | ||
5533 | int sqlite3BtreeInsert( | ||
5534 | BtCursor *pCur, /* Insert data into the table of this cursor */ | ||
5535 | const void *pKey, i64 nKey, /* The key of the new record */ | ||
5536 | const void *pData, int nData, /* The data of the new record */ | ||
5537 | int nZero, /* Number of extra 0 bytes to append to data */ | ||
5538 | int appendBias /* True if this is likely an append */ | ||
5539 | ){ | ||
5540 | int rc; | ||
5541 | int loc; | ||
5542 | int szNew; | ||
5543 | MemPage *pPage; | ||
5544 | Btree *p = pCur->pBtree; | ||
5545 | BtShared *pBt = p->pBt; | ||
5546 | unsigned char *oldCell; | ||
5547 | unsigned char *newCell = 0; | ||
5548 | |||
5549 | assert( cursorHoldsMutex(pCur) ); | ||
5550 | if( pBt->inTransaction!=TRANS_WRITE ){ | ||
5551 | /* Must start a transaction before doing an insert */ | ||
5552 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
5553 | return rc; | ||
5554 | } | ||
5555 | assert( !pBt->readOnly ); | ||
5556 | if( !pCur->wrFlag ){ | ||
5557 | return SQLITE_PERM; /* Cursor not open for writing */ | ||
5558 | } | ||
5559 | if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){ | ||
5560 | return SQLITE_LOCKED; /* The table pCur points to has a read lock */ | ||
5561 | } | ||
5562 | if( pCur->eState==CURSOR_FAULT ){ | ||
5563 | return pCur->skip; | ||
5564 | } | ||
5565 | |||
5566 | /* Save the positions of any other cursors open on this table */ | ||
5567 | clearCursorPosition(pCur); | ||
5568 | if( | ||
5569 | SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) || | ||
5570 | SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc)) | ||
5571 | ){ | ||
5572 | return rc; | ||
5573 | } | ||
5574 | |||
5575 | pPage = pCur->pPage; | ||
5576 | assert( pPage->intKey || nKey>=0 ); | ||
5577 | assert( pPage->leaf || !pPage->leafData ); | ||
5578 | TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", | ||
5579 | pCur->pgnoRoot, nKey, nData, pPage->pgno, | ||
5580 | loc==0 ? "overwrite" : "new entry")); | ||
5581 | assert( pPage->isInit ); | ||
5582 | newCell = sqlite3_malloc( MX_CELL_SIZE(pBt) ); | ||
5583 | if( newCell==0 ) return SQLITE_NOMEM; | ||
5584 | rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); | ||
5585 | if( rc ) goto end_insert; | ||
5586 | assert( szNew==cellSizePtr(pPage, newCell) ); | ||
5587 | assert( szNew<=MX_CELL_SIZE(pBt) ); | ||
5588 | if( loc==0 && CURSOR_VALID==pCur->eState ){ | ||
5589 | int szOld; | ||
5590 | assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); | ||
5591 | rc = sqlite3PagerWrite(pPage->pDbPage); | ||
5592 | if( rc ){ | ||
5593 | goto end_insert; | ||
5594 | } | ||
5595 | oldCell = findCell(pPage, pCur->idx); | ||
5596 | if( !pPage->leaf ){ | ||
5597 | memcpy(newCell, oldCell, 4); | ||
5598 | } | ||
5599 | szOld = cellSizePtr(pPage, oldCell); | ||
5600 | rc = clearCell(pPage, oldCell); | ||
5601 | if( rc ) goto end_insert; | ||
5602 | dropCell(pPage, pCur->idx, szOld); | ||
5603 | }else if( loc<0 && pPage->nCell>0 ){ | ||
5604 | assert( pPage->leaf ); | ||
5605 | pCur->idx++; | ||
5606 | pCur->info.nSize = 0; | ||
5607 | }else{ | ||
5608 | assert( pPage->leaf ); | ||
5609 | } | ||
5610 | rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0); | ||
5611 | if( rc!=SQLITE_OK ) goto end_insert; | ||
5612 | rc = balance(pPage, 1); | ||
5613 | /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */ | ||
5614 | /* fflush(stdout); */ | ||
5615 | if( rc==SQLITE_OK ){ | ||
5616 | moveToRoot(pCur); | ||
5617 | } | ||
5618 | end_insert: | ||
5619 | sqlite3_free(newCell); | ||
5620 | return rc; | ||
5621 | } | ||
5622 | |||
5623 | /* | ||
5624 | ** Delete the entry that the cursor is pointing to. The cursor | ||
5625 | ** is left pointing at a random location. | ||
5626 | */ | ||
5627 | int sqlite3BtreeDelete(BtCursor *pCur){ | ||
5628 | MemPage *pPage = pCur->pPage; | ||
5629 | unsigned char *pCell; | ||
5630 | int rc; | ||
5631 | Pgno pgnoChild = 0; | ||
5632 | Btree *p = pCur->pBtree; | ||
5633 | BtShared *pBt = p->pBt; | ||
5634 | |||
5635 | assert( cursorHoldsMutex(pCur) ); | ||
5636 | assert( pPage->isInit ); | ||
5637 | if( pBt->inTransaction!=TRANS_WRITE ){ | ||
5638 | /* Must start a transaction before doing a delete */ | ||
5639 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
5640 | return rc; | ||
5641 | } | ||
5642 | assert( !pBt->readOnly ); | ||
5643 | if( pCur->eState==CURSOR_FAULT ){ | ||
5644 | return pCur->skip; | ||
5645 | } | ||
5646 | if( pCur->idx >= pPage->nCell ){ | ||
5647 | return SQLITE_ERROR; /* The cursor is not pointing to anything */ | ||
5648 | } | ||
5649 | if( !pCur->wrFlag ){ | ||
5650 | return SQLITE_PERM; /* Did not open this cursor for writing */ | ||
5651 | } | ||
5652 | if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){ | ||
5653 | return SQLITE_LOCKED; /* The table pCur points to has a read lock */ | ||
5654 | } | ||
5655 | |||
5656 | /* Restore the current cursor position (a no-op if the cursor is not in | ||
5657 | ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors | ||
5658 | ** open on the same table. Then call sqlite3PagerWrite() on the page | ||
5659 | ** that the entry will be deleted from. | ||
5660 | */ | ||
5661 | if( | ||
5662 | (rc = restoreOrClearCursorPosition(pCur))!=0 || | ||
5663 | (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 || | ||
5664 | (rc = sqlite3PagerWrite(pPage->pDbPage))!=0 | ||
5665 | ){ | ||
5666 | return rc; | ||
5667 | } | ||
5668 | |||
5669 | /* Locate the cell within it's page and leave pCell pointing to the | ||
5670 | ** data. The clearCell() call frees any overflow pages associated with the | ||
5671 | ** cell. The cell itself is still intact. | ||
5672 | */ | ||
5673 | pCell = findCell(pPage, pCur->idx); | ||
5674 | if( !pPage->leaf ){ | ||
5675 | pgnoChild = get4byte(pCell); | ||
5676 | } | ||
5677 | rc = clearCell(pPage, pCell); | ||
5678 | if( rc ){ | ||
5679 | return rc; | ||
5680 | } | ||
5681 | |||
5682 | if( !pPage->leaf ){ | ||
5683 | /* | ||
5684 | ** The entry we are about to delete is not a leaf so if we do not | ||
5685 | ** do something we will leave a hole on an internal page. | ||
5686 | ** We have to fill the hole by moving in a cell from a leaf. The | ||
5687 | ** next Cell after the one to be deleted is guaranteed to exist and | ||
5688 | ** to be a leaf so we can use it. | ||
5689 | */ | ||
5690 | BtCursor leafCur; | ||
5691 | unsigned char *pNext; | ||
5692 | int szNext; /* The compiler warning is wrong: szNext is always | ||
5693 | ** initialized before use. Adding an extra initialization | ||
5694 | ** to silence the compiler slows down the code. */ | ||
5695 | int notUsed; | ||
5696 | unsigned char *tempCell = 0; | ||
5697 | assert( !pPage->leafData ); | ||
5698 | sqlite3BtreeGetTempCursor(pCur, &leafCur); | ||
5699 | rc = sqlite3BtreeNext(&leafCur, ¬Used); | ||
5700 | if( rc==SQLITE_OK ){ | ||
5701 | rc = sqlite3PagerWrite(leafCur.pPage->pDbPage); | ||
5702 | } | ||
5703 | if( rc==SQLITE_OK ){ | ||
5704 | TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n", | ||
5705 | pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno)); | ||
5706 | dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell)); | ||
5707 | pNext = findCell(leafCur.pPage, leafCur.idx); | ||
5708 | szNext = cellSizePtr(leafCur.pPage, pNext); | ||
5709 | assert( MX_CELL_SIZE(pBt)>=szNext+4 ); | ||
5710 | tempCell = sqlite3_malloc( MX_CELL_SIZE(pBt) ); | ||
5711 | if( tempCell==0 ){ | ||
5712 | rc = SQLITE_NOMEM; | ||
5713 | } | ||
5714 | } | ||
5715 | if( rc==SQLITE_OK ){ | ||
5716 | rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0); | ||
5717 | } | ||
5718 | if( rc==SQLITE_OK ){ | ||
5719 | put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild); | ||
5720 | rc = balance(pPage, 0); | ||
5721 | } | ||
5722 | if( rc==SQLITE_OK ){ | ||
5723 | dropCell(leafCur.pPage, leafCur.idx, szNext); | ||
5724 | rc = balance(leafCur.pPage, 0); | ||
5725 | } | ||
5726 | sqlite3_free(tempCell); | ||
5727 | sqlite3BtreeReleaseTempCursor(&leafCur); | ||
5728 | }else{ | ||
5729 | TRACE(("DELETE: table=%d delete from leaf %d\n", | ||
5730 | pCur->pgnoRoot, pPage->pgno)); | ||
5731 | dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell)); | ||
5732 | rc = balance(pPage, 0); | ||
5733 | } | ||
5734 | if( rc==SQLITE_OK ){ | ||
5735 | moveToRoot(pCur); | ||
5736 | } | ||
5737 | return rc; | ||
5738 | } | ||
5739 | |||
5740 | /* | ||
5741 | ** Create a new BTree table. Write into *piTable the page | ||
5742 | ** number for the root page of the new table. | ||
5743 | ** | ||
5744 | ** The type of type is determined by the flags parameter. Only the | ||
5745 | ** following values of flags are currently in use. Other values for | ||
5746 | ** flags might not work: | ||
5747 | ** | ||
5748 | ** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys | ||
5749 | ** BTREE_ZERODATA Used for SQL indices | ||
5750 | */ | ||
5751 | static int btreeCreateTable(Btree *p, int *piTable, int flags){ | ||
5752 | BtShared *pBt = p->pBt; | ||
5753 | MemPage *pRoot; | ||
5754 | Pgno pgnoRoot; | ||
5755 | int rc; | ||
5756 | |||
5757 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
5758 | if( pBt->inTransaction!=TRANS_WRITE ){ | ||
5759 | /* Must start a transaction first */ | ||
5760 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
5761 | return rc; | ||
5762 | } | ||
5763 | assert( !pBt->readOnly ); | ||
5764 | |||
5765 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
5766 | rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); | ||
5767 | if( rc ){ | ||
5768 | return rc; | ||
5769 | } | ||
5770 | #else | ||
5771 | if( pBt->autoVacuum ){ | ||
5772 | Pgno pgnoMove; /* Move a page here to make room for the root-page */ | ||
5773 | MemPage *pPageMove; /* The page to move to. */ | ||
5774 | |||
5775 | /* Creating a new table may probably require moving an existing database | ||
5776 | ** to make room for the new tables root page. In case this page turns | ||
5777 | ** out to be an overflow page, delete all overflow page-map caches | ||
5778 | ** held by open cursors. | ||
5779 | */ | ||
5780 | invalidateAllOverflowCache(pBt); | ||
5781 | |||
5782 | /* Read the value of meta[3] from the database to determine where the | ||
5783 | ** root page of the new table should go. meta[3] is the largest root-page | ||
5784 | ** created so far, so the new root-page is (meta[3]+1). | ||
5785 | */ | ||
5786 | rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot); | ||
5787 | if( rc!=SQLITE_OK ){ | ||
5788 | return rc; | ||
5789 | } | ||
5790 | pgnoRoot++; | ||
5791 | |||
5792 | /* The new root-page may not be allocated on a pointer-map page, or the | ||
5793 | ** PENDING_BYTE page. | ||
5794 | */ | ||
5795 | if( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || | ||
5796 | pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ | ||
5797 | pgnoRoot++; | ||
5798 | } | ||
5799 | assert( pgnoRoot>=3 ); | ||
5800 | |||
5801 | /* Allocate a page. The page that currently resides at pgnoRoot will | ||
5802 | ** be moved to the allocated page (unless the allocated page happens | ||
5803 | ** to reside at pgnoRoot). | ||
5804 | */ | ||
5805 | rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1); | ||
5806 | if( rc!=SQLITE_OK ){ | ||
5807 | return rc; | ||
5808 | } | ||
5809 | |||
5810 | if( pgnoMove!=pgnoRoot ){ | ||
5811 | /* pgnoRoot is the page that will be used for the root-page of | ||
5812 | ** the new table (assuming an error did not occur). But we were | ||
5813 | ** allocated pgnoMove. If required (i.e. if it was not allocated | ||
5814 | ** by extending the file), the current page at position pgnoMove | ||
5815 | ** is already journaled. | ||
5816 | */ | ||
5817 | u8 eType; | ||
5818 | Pgno iPtrPage; | ||
5819 | |||
5820 | releasePage(pPageMove); | ||
5821 | |||
5822 | /* Move the page currently at pgnoRoot to pgnoMove. */ | ||
5823 | rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0); | ||
5824 | if( rc!=SQLITE_OK ){ | ||
5825 | return rc; | ||
5826 | } | ||
5827 | rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); | ||
5828 | if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ | ||
5829 | releasePage(pRoot); | ||
5830 | return rc; | ||
5831 | } | ||
5832 | assert( eType!=PTRMAP_ROOTPAGE ); | ||
5833 | assert( eType!=PTRMAP_FREEPAGE ); | ||
5834 | rc = sqlite3PagerWrite(pRoot->pDbPage); | ||
5835 | if( rc!=SQLITE_OK ){ | ||
5836 | releasePage(pRoot); | ||
5837 | return rc; | ||
5838 | } | ||
5839 | rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove); | ||
5840 | releasePage(pRoot); | ||
5841 | |||
5842 | /* Obtain the page at pgnoRoot */ | ||
5843 | if( rc!=SQLITE_OK ){ | ||
5844 | return rc; | ||
5845 | } | ||
5846 | rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0); | ||
5847 | if( rc!=SQLITE_OK ){ | ||
5848 | return rc; | ||
5849 | } | ||
5850 | rc = sqlite3PagerWrite(pRoot->pDbPage); | ||
5851 | if( rc!=SQLITE_OK ){ | ||
5852 | releasePage(pRoot); | ||
5853 | return rc; | ||
5854 | } | ||
5855 | }else{ | ||
5856 | pRoot = pPageMove; | ||
5857 | } | ||
5858 | |||
5859 | /* Update the pointer-map and meta-data with the new root-page number. */ | ||
5860 | rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0); | ||
5861 | if( rc ){ | ||
5862 | releasePage(pRoot); | ||
5863 | return rc; | ||
5864 | } | ||
5865 | rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); | ||
5866 | if( rc ){ | ||
5867 | releasePage(pRoot); | ||
5868 | return rc; | ||
5869 | } | ||
5870 | |||
5871 | }else{ | ||
5872 | rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); | ||
5873 | if( rc ) return rc; | ||
5874 | } | ||
5875 | #endif | ||
5876 | assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); | ||
5877 | zeroPage(pRoot, flags | PTF_LEAF); | ||
5878 | sqlite3PagerUnref(pRoot->pDbPage); | ||
5879 | *piTable = (int)pgnoRoot; | ||
5880 | return SQLITE_OK; | ||
5881 | } | ||
5882 | int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){ | ||
5883 | int rc; | ||
5884 | sqlite3BtreeEnter(p); | ||
5885 | rc = btreeCreateTable(p, piTable, flags); | ||
5886 | sqlite3BtreeLeave(p); | ||
5887 | return rc; | ||
5888 | } | ||
5889 | |||
5890 | /* | ||
5891 | ** Erase the given database page and all its children. Return | ||
5892 | ** the page to the freelist. | ||
5893 | */ | ||
5894 | static int clearDatabasePage( | ||
5895 | BtShared *pBt, /* The BTree that contains the table */ | ||
5896 | Pgno pgno, /* Page number to clear */ | ||
5897 | MemPage *pParent, /* Parent page. NULL for the root */ | ||
5898 | int freePageFlag /* Deallocate page if true */ | ||
5899 | ){ | ||
5900 | MemPage *pPage = 0; | ||
5901 | int rc; | ||
5902 | unsigned char *pCell; | ||
5903 | int i; | ||
5904 | |||
5905 | assert( sqlite3_mutex_held(pBt->mutex) ); | ||
5906 | if( pgno>sqlite3PagerPagecount(pBt->pPager) ){ | ||
5907 | return SQLITE_CORRUPT_BKPT; | ||
5908 | } | ||
5909 | |||
5910 | rc = getAndInitPage(pBt, pgno, &pPage, pParent); | ||
5911 | if( rc ) goto cleardatabasepage_out; | ||
5912 | for(i=0; i<pPage->nCell; i++){ | ||
5913 | pCell = findCell(pPage, i); | ||
5914 | if( !pPage->leaf ){ | ||
5915 | rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1); | ||
5916 | if( rc ) goto cleardatabasepage_out; | ||
5917 | } | ||
5918 | rc = clearCell(pPage, pCell); | ||
5919 | if( rc ) goto cleardatabasepage_out; | ||
5920 | } | ||
5921 | if( !pPage->leaf ){ | ||
5922 | rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1); | ||
5923 | if( rc ) goto cleardatabasepage_out; | ||
5924 | } | ||
5925 | if( freePageFlag ){ | ||
5926 | rc = freePage(pPage); | ||
5927 | }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ | ||
5928 | zeroPage(pPage, pPage->aData[0] | PTF_LEAF); | ||
5929 | } | ||
5930 | |||
5931 | cleardatabasepage_out: | ||
5932 | releasePage(pPage); | ||
5933 | return rc; | ||
5934 | } | ||
5935 | |||
5936 | /* | ||
5937 | ** Delete all information from a single table in the database. iTable is | ||
5938 | ** the page number of the root of the table. After this routine returns, | ||
5939 | ** the root page is empty, but still exists. | ||
5940 | ** | ||
5941 | ** This routine will fail with SQLITE_LOCKED if there are any open | ||
5942 | ** read cursors on the table. Open write cursors are moved to the | ||
5943 | ** root of the table. | ||
5944 | */ | ||
5945 | int sqlite3BtreeClearTable(Btree *p, int iTable){ | ||
5946 | int rc; | ||
5947 | BtShared *pBt = p->pBt; | ||
5948 | sqlite3BtreeEnter(p); | ||
5949 | if( p->inTrans!=TRANS_WRITE ){ | ||
5950 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
5951 | }else if( (rc = checkReadLocks(p, iTable, 0))!=SQLITE_OK ){ | ||
5952 | /* nothing to do */ | ||
5953 | }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){ | ||
5954 | /* nothing to do */ | ||
5955 | }else{ | ||
5956 | rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0); | ||
5957 | } | ||
5958 | sqlite3BtreeLeave(p); | ||
5959 | return rc; | ||
5960 | } | ||
5961 | |||
5962 | /* | ||
5963 | ** Erase all information in a table and add the root of the table to | ||
5964 | ** the freelist. Except, the root of the principle table (the one on | ||
5965 | ** page 1) is never added to the freelist. | ||
5966 | ** | ||
5967 | ** This routine will fail with SQLITE_LOCKED if there are any open | ||
5968 | ** cursors on the table. | ||
5969 | ** | ||
5970 | ** If AUTOVACUUM is enabled and the page at iTable is not the last | ||
5971 | ** root page in the database file, then the last root page | ||
5972 | ** in the database file is moved into the slot formerly occupied by | ||
5973 | ** iTable and that last slot formerly occupied by the last root page | ||
5974 | ** is added to the freelist instead of iTable. In this say, all | ||
5975 | ** root pages are kept at the beginning of the database file, which | ||
5976 | ** is necessary for AUTOVACUUM to work right. *piMoved is set to the | ||
5977 | ** page number that used to be the last root page in the file before | ||
5978 | ** the move. If no page gets moved, *piMoved is set to 0. | ||
5979 | ** The last root page is recorded in meta[3] and the value of | ||
5980 | ** meta[3] is updated by this procedure. | ||
5981 | */ | ||
5982 | static int btreeDropTable(Btree *p, int iTable, int *piMoved){ | ||
5983 | int rc; | ||
5984 | MemPage *pPage = 0; | ||
5985 | BtShared *pBt = p->pBt; | ||
5986 | |||
5987 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
5988 | if( p->inTrans!=TRANS_WRITE ){ | ||
5989 | return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
5990 | } | ||
5991 | |||
5992 | /* It is illegal to drop a table if any cursors are open on the | ||
5993 | ** database. This is because in auto-vacuum mode the backend may | ||
5994 | ** need to move another root-page to fill a gap left by the deleted | ||
5995 | ** root page. If an open cursor was using this page a problem would | ||
5996 | ** occur. | ||
5997 | */ | ||
5998 | if( pBt->pCursor ){ | ||
5999 | return SQLITE_LOCKED; | ||
6000 | } | ||
6001 | |||
6002 | rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0); | ||
6003 | if( rc ) return rc; | ||
6004 | rc = sqlite3BtreeClearTable(p, iTable); | ||
6005 | if( rc ){ | ||
6006 | releasePage(pPage); | ||
6007 | return rc; | ||
6008 | } | ||
6009 | |||
6010 | *piMoved = 0; | ||
6011 | |||
6012 | if( iTable>1 ){ | ||
6013 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
6014 | rc = freePage(pPage); | ||
6015 | releasePage(pPage); | ||
6016 | #else | ||
6017 | if( pBt->autoVacuum ){ | ||
6018 | Pgno maxRootPgno; | ||
6019 | rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno); | ||
6020 | if( rc!=SQLITE_OK ){ | ||
6021 | releasePage(pPage); | ||
6022 | return rc; | ||
6023 | } | ||
6024 | |||
6025 | if( iTable==maxRootPgno ){ | ||
6026 | /* If the table being dropped is the table with the largest root-page | ||
6027 | ** number in the database, put the root page on the free list. | ||
6028 | */ | ||
6029 | rc = freePage(pPage); | ||
6030 | releasePage(pPage); | ||
6031 | if( rc!=SQLITE_OK ){ | ||
6032 | return rc; | ||
6033 | } | ||
6034 | }else{ | ||
6035 | /* The table being dropped does not have the largest root-page | ||
6036 | ** number in the database. So move the page that does into the | ||
6037 | ** gap left by the deleted root-page. | ||
6038 | */ | ||
6039 | MemPage *pMove; | ||
6040 | releasePage(pPage); | ||
6041 | rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0); | ||
6042 | if( rc!=SQLITE_OK ){ | ||
6043 | return rc; | ||
6044 | } | ||
6045 | rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable); | ||
6046 | releasePage(pMove); | ||
6047 | if( rc!=SQLITE_OK ){ | ||
6048 | return rc; | ||
6049 | } | ||
6050 | rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0); | ||
6051 | if( rc!=SQLITE_OK ){ | ||
6052 | return rc; | ||
6053 | } | ||
6054 | rc = freePage(pMove); | ||
6055 | releasePage(pMove); | ||
6056 | if( rc!=SQLITE_OK ){ | ||
6057 | return rc; | ||
6058 | } | ||
6059 | *piMoved = maxRootPgno; | ||
6060 | } | ||
6061 | |||
6062 | /* Set the new 'max-root-page' value in the database header. This | ||
6063 | ** is the old value less one, less one more if that happens to | ||
6064 | ** be a root-page number, less one again if that is the | ||
6065 | ** PENDING_BYTE_PAGE. | ||
6066 | */ | ||
6067 | maxRootPgno--; | ||
6068 | if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){ | ||
6069 | maxRootPgno--; | ||
6070 | } | ||
6071 | if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){ | ||
6072 | maxRootPgno--; | ||
6073 | } | ||
6074 | assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); | ||
6075 | |||
6076 | rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); | ||
6077 | }else{ | ||
6078 | rc = freePage(pPage); | ||
6079 | releasePage(pPage); | ||
6080 | } | ||
6081 | #endif | ||
6082 | }else{ | ||
6083 | /* If sqlite3BtreeDropTable was called on page 1. */ | ||
6084 | zeroPage(pPage, PTF_INTKEY|PTF_LEAF ); | ||
6085 | releasePage(pPage); | ||
6086 | } | ||
6087 | return rc; | ||
6088 | } | ||
6089 | int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ | ||
6090 | int rc; | ||
6091 | sqlite3BtreeEnter(p); | ||
6092 | rc = btreeDropTable(p, iTable, piMoved); | ||
6093 | sqlite3BtreeLeave(p); | ||
6094 | return rc; | ||
6095 | } | ||
6096 | |||
6097 | |||
6098 | /* | ||
6099 | ** Read the meta-information out of a database file. Meta[0] | ||
6100 | ** is the number of free pages currently in the database. Meta[1] | ||
6101 | ** through meta[15] are available for use by higher layers. Meta[0] | ||
6102 | ** is read-only, the others are read/write. | ||
6103 | ** | ||
6104 | ** The schema layer numbers meta values differently. At the schema | ||
6105 | ** layer (and the SetCookie and ReadCookie opcodes) the number of | ||
6106 | ** free pages is not visible. So Cookie[0] is the same as Meta[1]. | ||
6107 | */ | ||
6108 | int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ | ||
6109 | DbPage *pDbPage; | ||
6110 | int rc; | ||
6111 | unsigned char *pP1; | ||
6112 | BtShared *pBt = p->pBt; | ||
6113 | |||
6114 | sqlite3BtreeEnter(p); | ||
6115 | |||
6116 | /* Reading a meta-data value requires a read-lock on page 1 (and hence | ||
6117 | ** the sqlite_master table. We grab this lock regardless of whether or | ||
6118 | ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page | ||
6119 | ** 1 is treated as a special case by queryTableLock() and lockTable()). | ||
6120 | */ | ||
6121 | rc = queryTableLock(p, 1, READ_LOCK); | ||
6122 | if( rc!=SQLITE_OK ){ | ||
6123 | sqlite3BtreeLeave(p); | ||
6124 | return rc; | ||
6125 | } | ||
6126 | |||
6127 | assert( idx>=0 && idx<=15 ); | ||
6128 | rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage); | ||
6129 | if( rc ){ | ||
6130 | sqlite3BtreeLeave(p); | ||
6131 | return rc; | ||
6132 | } | ||
6133 | pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage); | ||
6134 | *pMeta = get4byte(&pP1[36 + idx*4]); | ||
6135 | sqlite3PagerUnref(pDbPage); | ||
6136 | |||
6137 | /* If autovacuumed is disabled in this build but we are trying to | ||
6138 | ** access an autovacuumed database, then make the database readonly. | ||
6139 | */ | ||
6140 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
6141 | if( idx==4 && *pMeta>0 ) pBt->readOnly = 1; | ||
6142 | #endif | ||
6143 | |||
6144 | /* Grab the read-lock on page 1. */ | ||
6145 | rc = lockTable(p, 1, READ_LOCK); | ||
6146 | sqlite3BtreeLeave(p); | ||
6147 | return rc; | ||
6148 | } | ||
6149 | |||
6150 | /* | ||
6151 | ** Write meta-information back into the database. Meta[0] is | ||
6152 | ** read-only and may not be written. | ||
6153 | */ | ||
6154 | int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ | ||
6155 | BtShared *pBt = p->pBt; | ||
6156 | unsigned char *pP1; | ||
6157 | int rc; | ||
6158 | assert( idx>=1 && idx<=15 ); | ||
6159 | sqlite3BtreeEnter(p); | ||
6160 | if( p->inTrans!=TRANS_WRITE ){ | ||
6161 | rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; | ||
6162 | }else{ | ||
6163 | assert( pBt->pPage1!=0 ); | ||
6164 | pP1 = pBt->pPage1->aData; | ||
6165 | rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | ||
6166 | if( rc==SQLITE_OK ){ | ||
6167 | put4byte(&pP1[36 + idx*4], iMeta); | ||
6168 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6169 | if( idx==7 ){ | ||
6170 | assert( pBt->autoVacuum || iMeta==0 ); | ||
6171 | assert( iMeta==0 || iMeta==1 ); | ||
6172 | pBt->incrVacuum = iMeta; | ||
6173 | } | ||
6174 | #endif | ||
6175 | } | ||
6176 | } | ||
6177 | sqlite3BtreeLeave(p); | ||
6178 | return rc; | ||
6179 | } | ||
6180 | |||
6181 | /* | ||
6182 | ** Return the flag byte at the beginning of the page that the cursor | ||
6183 | ** is currently pointing to. | ||
6184 | */ | ||
6185 | int sqlite3BtreeFlags(BtCursor *pCur){ | ||
6186 | /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call | ||
6187 | ** restoreOrClearCursorPosition() here. | ||
6188 | */ | ||
6189 | MemPage *pPage = pCur->pPage; | ||
6190 | assert( cursorHoldsMutex(pCur) ); | ||
6191 | assert( pPage->pBt==pCur->pBt ); | ||
6192 | return pPage ? pPage->aData[pPage->hdrOffset] : 0; | ||
6193 | } | ||
6194 | |||
6195 | |||
6196 | /* | ||
6197 | ** Return the pager associated with a BTree. This routine is used for | ||
6198 | ** testing and debugging only. | ||
6199 | */ | ||
6200 | Pager *sqlite3BtreePager(Btree *p){ | ||
6201 | return p->pBt->pPager; | ||
6202 | } | ||
6203 | |||
6204 | #ifndef SQLITE_OMIT_INTEGRITY_CHECK | ||
6205 | /* | ||
6206 | ** Append a message to the error message string. | ||
6207 | */ | ||
6208 | static void checkAppendMsg( | ||
6209 | IntegrityCk *pCheck, | ||
6210 | char *zMsg1, | ||
6211 | const char *zFormat, | ||
6212 | ... | ||
6213 | ){ | ||
6214 | va_list ap; | ||
6215 | char *zMsg2; | ||
6216 | if( !pCheck->mxErr ) return; | ||
6217 | pCheck->mxErr--; | ||
6218 | pCheck->nErr++; | ||
6219 | va_start(ap, zFormat); | ||
6220 | zMsg2 = sqlite3VMPrintf(0, zFormat, ap); | ||
6221 | va_end(ap); | ||
6222 | if( zMsg1==0 ) zMsg1 = ""; | ||
6223 | if( pCheck->zErrMsg ){ | ||
6224 | char *zOld = pCheck->zErrMsg; | ||
6225 | pCheck->zErrMsg = 0; | ||
6226 | sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0); | ||
6227 | sqlite3_free(zOld); | ||
6228 | }else{ | ||
6229 | sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0); | ||
6230 | } | ||
6231 | sqlite3_free(zMsg2); | ||
6232 | } | ||
6233 | #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | ||
6234 | |||
6235 | #ifndef SQLITE_OMIT_INTEGRITY_CHECK | ||
6236 | /* | ||
6237 | ** Add 1 to the reference count for page iPage. If this is the second | ||
6238 | ** reference to the page, add an error message to pCheck->zErrMsg. | ||
6239 | ** Return 1 if there are 2 ore more references to the page and 0 if | ||
6240 | ** if this is the first reference to the page. | ||
6241 | ** | ||
6242 | ** Also check that the page number is in bounds. | ||
6243 | */ | ||
6244 | static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){ | ||
6245 | if( iPage==0 ) return 1; | ||
6246 | if( iPage>pCheck->nPage || iPage<0 ){ | ||
6247 | checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage); | ||
6248 | return 1; | ||
6249 | } | ||
6250 | if( pCheck->anRef[iPage]==1 ){ | ||
6251 | checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage); | ||
6252 | return 1; | ||
6253 | } | ||
6254 | return (pCheck->anRef[iPage]++)>1; | ||
6255 | } | ||
6256 | |||
6257 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6258 | /* | ||
6259 | ** Check that the entry in the pointer-map for page iChild maps to | ||
6260 | ** page iParent, pointer type ptrType. If not, append an error message | ||
6261 | ** to pCheck. | ||
6262 | */ | ||
6263 | static void checkPtrmap( | ||
6264 | IntegrityCk *pCheck, /* Integrity check context */ | ||
6265 | Pgno iChild, /* Child page number */ | ||
6266 | u8 eType, /* Expected pointer map type */ | ||
6267 | Pgno iParent, /* Expected pointer map parent page number */ | ||
6268 | char *zContext /* Context description (used for error msg) */ | ||
6269 | ){ | ||
6270 | int rc; | ||
6271 | u8 ePtrmapType; | ||
6272 | Pgno iPtrmapParent; | ||
6273 | |||
6274 | rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); | ||
6275 | if( rc!=SQLITE_OK ){ | ||
6276 | checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild); | ||
6277 | return; | ||
6278 | } | ||
6279 | |||
6280 | if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ | ||
6281 | checkAppendMsg(pCheck, zContext, | ||
6282 | "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", | ||
6283 | iChild, eType, iParent, ePtrmapType, iPtrmapParent); | ||
6284 | } | ||
6285 | } | ||
6286 | #endif | ||
6287 | |||
6288 | /* | ||
6289 | ** Check the integrity of the freelist or of an overflow page list. | ||
6290 | ** Verify that the number of pages on the list is N. | ||
6291 | */ | ||
6292 | static void checkList( | ||
6293 | IntegrityCk *pCheck, /* Integrity checking context */ | ||
6294 | int isFreeList, /* True for a freelist. False for overflow page list */ | ||
6295 | int iPage, /* Page number for first page in the list */ | ||
6296 | int N, /* Expected number of pages in the list */ | ||
6297 | char *zContext /* Context for error messages */ | ||
6298 | ){ | ||
6299 | int i; | ||
6300 | int expected = N; | ||
6301 | int iFirst = iPage; | ||
6302 | while( N-- > 0 && pCheck->mxErr ){ | ||
6303 | DbPage *pOvflPage; | ||
6304 | unsigned char *pOvflData; | ||
6305 | if( iPage<1 ){ | ||
6306 | checkAppendMsg(pCheck, zContext, | ||
6307 | "%d of %d pages missing from overflow list starting at %d", | ||
6308 | N+1, expected, iFirst); | ||
6309 | break; | ||
6310 | } | ||
6311 | if( checkRef(pCheck, iPage, zContext) ) break; | ||
6312 | if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ | ||
6313 | checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage); | ||
6314 | break; | ||
6315 | } | ||
6316 | pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); | ||
6317 | if( isFreeList ){ | ||
6318 | int n = get4byte(&pOvflData[4]); | ||
6319 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6320 | if( pCheck->pBt->autoVacuum ){ | ||
6321 | checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext); | ||
6322 | } | ||
6323 | #endif | ||
6324 | if( n>pCheck->pBt->usableSize/4-8 ){ | ||
6325 | checkAppendMsg(pCheck, zContext, | ||
6326 | "freelist leaf count too big on page %d", iPage); | ||
6327 | N--; | ||
6328 | }else{ | ||
6329 | for(i=0; i<n; i++){ | ||
6330 | Pgno iFreePage = get4byte(&pOvflData[8+i*4]); | ||
6331 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6332 | if( pCheck->pBt->autoVacuum ){ | ||
6333 | checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext); | ||
6334 | } | ||
6335 | #endif | ||
6336 | checkRef(pCheck, iFreePage, zContext); | ||
6337 | } | ||
6338 | N -= n; | ||
6339 | } | ||
6340 | } | ||
6341 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6342 | else{ | ||
6343 | /* If this database supports auto-vacuum and iPage is not the last | ||
6344 | ** page in this overflow list, check that the pointer-map entry for | ||
6345 | ** the following page matches iPage. | ||
6346 | */ | ||
6347 | if( pCheck->pBt->autoVacuum && N>0 ){ | ||
6348 | i = get4byte(pOvflData); | ||
6349 | checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext); | ||
6350 | } | ||
6351 | } | ||
6352 | #endif | ||
6353 | iPage = get4byte(pOvflData); | ||
6354 | sqlite3PagerUnref(pOvflPage); | ||
6355 | } | ||
6356 | } | ||
6357 | #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | ||
6358 | |||
6359 | #ifndef SQLITE_OMIT_INTEGRITY_CHECK | ||
6360 | /* | ||
6361 | ** Do various sanity checks on a single page of a tree. Return | ||
6362 | ** the tree depth. Root pages return 0. Parents of root pages | ||
6363 | ** return 1, and so forth. | ||
6364 | ** | ||
6365 | ** These checks are done: | ||
6366 | ** | ||
6367 | ** 1. Make sure that cells and freeblocks do not overlap | ||
6368 | ** but combine to completely cover the page. | ||
6369 | ** NO 2. Make sure cell keys are in order. | ||
6370 | ** NO 3. Make sure no key is less than or equal to zLowerBound. | ||
6371 | ** NO 4. Make sure no key is greater than or equal to zUpperBound. | ||
6372 | ** 5. Check the integrity of overflow pages. | ||
6373 | ** 6. Recursively call checkTreePage on all children. | ||
6374 | ** 7. Verify that the depth of all children is the same. | ||
6375 | ** 8. Make sure this page is at least 33% full or else it is | ||
6376 | ** the root of the tree. | ||
6377 | */ | ||
6378 | static int checkTreePage( | ||
6379 | IntegrityCk *pCheck, /* Context for the sanity check */ | ||
6380 | int iPage, /* Page number of the page to check */ | ||
6381 | MemPage *pParent, /* Parent page */ | ||
6382 | char *zParentContext /* Parent context */ | ||
6383 | ){ | ||
6384 | MemPage *pPage; | ||
6385 | int i, rc, depth, d2, pgno, cnt; | ||
6386 | int hdr, cellStart; | ||
6387 | int nCell; | ||
6388 | u8 *data; | ||
6389 | BtShared *pBt; | ||
6390 | int usableSize; | ||
6391 | char zContext[100]; | ||
6392 | char *hit; | ||
6393 | |||
6394 | sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage); | ||
6395 | |||
6396 | /* Check that the page exists | ||
6397 | */ | ||
6398 | pBt = pCheck->pBt; | ||
6399 | usableSize = pBt->usableSize; | ||
6400 | if( iPage==0 ) return 0; | ||
6401 | if( checkRef(pCheck, iPage, zParentContext) ) return 0; | ||
6402 | if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ | ||
6403 | checkAppendMsg(pCheck, zContext, | ||
6404 | "unable to get the page. error code=%d", rc); | ||
6405 | return 0; | ||
6406 | } | ||
6407 | if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){ | ||
6408 | checkAppendMsg(pCheck, zContext, | ||
6409 | "sqlite3BtreeInitPage() returns error code %d", rc); | ||
6410 | releasePage(pPage); | ||
6411 | return 0; | ||
6412 | } | ||
6413 | |||
6414 | /* Check out all the cells. | ||
6415 | */ | ||
6416 | depth = 0; | ||
6417 | for(i=0; i<pPage->nCell && pCheck->mxErr; i++){ | ||
6418 | u8 *pCell; | ||
6419 | int sz; | ||
6420 | CellInfo info; | ||
6421 | |||
6422 | /* Check payload overflow pages | ||
6423 | */ | ||
6424 | sqlite3_snprintf(sizeof(zContext), zContext, | ||
6425 | "On tree page %d cell %d: ", iPage, i); | ||
6426 | pCell = findCell(pPage,i); | ||
6427 | sqlite3BtreeParseCellPtr(pPage, pCell, &info); | ||
6428 | sz = info.nData; | ||
6429 | if( !pPage->intKey ) sz += info.nKey; | ||
6430 | assert( sz==info.nPayload ); | ||
6431 | if( sz>info.nLocal ){ | ||
6432 | int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4); | ||
6433 | Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); | ||
6434 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6435 | if( pBt->autoVacuum ){ | ||
6436 | checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext); | ||
6437 | } | ||
6438 | #endif | ||
6439 | checkList(pCheck, 0, pgnoOvfl, nPage, zContext); | ||
6440 | } | ||
6441 | |||
6442 | /* Check sanity of left child page. | ||
6443 | */ | ||
6444 | if( !pPage->leaf ){ | ||
6445 | pgno = get4byte(pCell); | ||
6446 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6447 | if( pBt->autoVacuum ){ | ||
6448 | checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); | ||
6449 | } | ||
6450 | #endif | ||
6451 | d2 = checkTreePage(pCheck,pgno,pPage,zContext); | ||
6452 | if( i>0 && d2!=depth ){ | ||
6453 | checkAppendMsg(pCheck, zContext, "Child page depth differs"); | ||
6454 | } | ||
6455 | depth = d2; | ||
6456 | } | ||
6457 | } | ||
6458 | if( !pPage->leaf ){ | ||
6459 | pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); | ||
6460 | sqlite3_snprintf(sizeof(zContext), zContext, | ||
6461 | "On page %d at right child: ", iPage); | ||
6462 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6463 | if( pBt->autoVacuum ){ | ||
6464 | checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0); | ||
6465 | } | ||
6466 | #endif | ||
6467 | checkTreePage(pCheck, pgno, pPage, zContext); | ||
6468 | } | ||
6469 | |||
6470 | /* Check for complete coverage of the page | ||
6471 | */ | ||
6472 | data = pPage->aData; | ||
6473 | hdr = pPage->hdrOffset; | ||
6474 | hit = sqlite3MallocZero( usableSize ); | ||
6475 | if( hit ){ | ||
6476 | memset(hit, 1, get2byte(&data[hdr+5])); | ||
6477 | nCell = get2byte(&data[hdr+3]); | ||
6478 | cellStart = hdr + 12 - 4*pPage->leaf; | ||
6479 | for(i=0; i<nCell; i++){ | ||
6480 | int pc = get2byte(&data[cellStart+i*2]); | ||
6481 | int size = cellSizePtr(pPage, &data[pc]); | ||
6482 | int j; | ||
6483 | if( (pc+size-1)>=usableSize || pc<0 ){ | ||
6484 | checkAppendMsg(pCheck, 0, | ||
6485 | "Corruption detected in cell %d on page %d",i,iPage,0); | ||
6486 | }else{ | ||
6487 | for(j=pc+size-1; j>=pc; j--) hit[j]++; | ||
6488 | } | ||
6489 | } | ||
6490 | for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; | ||
6491 | cnt++){ | ||
6492 | int size = get2byte(&data[i+2]); | ||
6493 | int j; | ||
6494 | if( (i+size-1)>=usableSize || i<0 ){ | ||
6495 | checkAppendMsg(pCheck, 0, | ||
6496 | "Corruption detected in cell %d on page %d",i,iPage,0); | ||
6497 | }else{ | ||
6498 | for(j=i+size-1; j>=i; j--) hit[j]++; | ||
6499 | } | ||
6500 | i = get2byte(&data[i]); | ||
6501 | } | ||
6502 | for(i=cnt=0; i<usableSize; i++){ | ||
6503 | if( hit[i]==0 ){ | ||
6504 | cnt++; | ||
6505 | }else if( hit[i]>1 ){ | ||
6506 | checkAppendMsg(pCheck, 0, | ||
6507 | "Multiple uses for byte %d of page %d", i, iPage); | ||
6508 | break; | ||
6509 | } | ||
6510 | } | ||
6511 | if( cnt!=data[hdr+7] ){ | ||
6512 | checkAppendMsg(pCheck, 0, | ||
6513 | "Fragmented space is %d byte reported as %d on page %d", | ||
6514 | cnt, data[hdr+7], iPage); | ||
6515 | } | ||
6516 | } | ||
6517 | sqlite3_free(hit); | ||
6518 | |||
6519 | releasePage(pPage); | ||
6520 | return depth+1; | ||
6521 | } | ||
6522 | #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | ||
6523 | |||
6524 | #ifndef SQLITE_OMIT_INTEGRITY_CHECK | ||
6525 | /* | ||
6526 | ** This routine does a complete check of the given BTree file. aRoot[] is | ||
6527 | ** an array of pages numbers were each page number is the root page of | ||
6528 | ** a table. nRoot is the number of entries in aRoot. | ||
6529 | ** | ||
6530 | ** If everything checks out, this routine returns NULL. If something is | ||
6531 | ** amiss, an error message is written into memory obtained from malloc() | ||
6532 | ** and a pointer to that error message is returned. The calling function | ||
6533 | ** is responsible for freeing the error message when it is done. | ||
6534 | */ | ||
6535 | char *sqlite3BtreeIntegrityCheck( | ||
6536 | Btree *p, /* The btree to be checked */ | ||
6537 | int *aRoot, /* An array of root pages numbers for individual trees */ | ||
6538 | int nRoot, /* Number of entries in aRoot[] */ | ||
6539 | int mxErr, /* Stop reporting errors after this many */ | ||
6540 | int *pnErr /* Write number of errors seen to this variable */ | ||
6541 | ){ | ||
6542 | int i; | ||
6543 | int nRef; | ||
6544 | IntegrityCk sCheck; | ||
6545 | BtShared *pBt = p->pBt; | ||
6546 | |||
6547 | sqlite3BtreeEnter(p); | ||
6548 | nRef = sqlite3PagerRefcount(pBt->pPager); | ||
6549 | if( lockBtreeWithRetry(p)!=SQLITE_OK ){ | ||
6550 | sqlite3BtreeLeave(p); | ||
6551 | return sqlite3StrDup("Unable to acquire a read lock on the database"); | ||
6552 | } | ||
6553 | sCheck.pBt = pBt; | ||
6554 | sCheck.pPager = pBt->pPager; | ||
6555 | sCheck.nPage = sqlite3PagerPagecount(sCheck.pPager); | ||
6556 | sCheck.mxErr = mxErr; | ||
6557 | sCheck.nErr = 0; | ||
6558 | *pnErr = 0; | ||
6559 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6560 | if( pBt->nTrunc!=0 ){ | ||
6561 | sCheck.nPage = pBt->nTrunc; | ||
6562 | } | ||
6563 | #endif | ||
6564 | if( sCheck.nPage==0 ){ | ||
6565 | unlockBtreeIfUnused(pBt); | ||
6566 | sqlite3BtreeLeave(p); | ||
6567 | return 0; | ||
6568 | } | ||
6569 | sCheck.anRef = sqlite3_malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) ); | ||
6570 | if( !sCheck.anRef ){ | ||
6571 | unlockBtreeIfUnused(pBt); | ||
6572 | *pnErr = 1; | ||
6573 | sqlite3BtreeLeave(p); | ||
6574 | return sqlite3MPrintf(p->pSqlite, "Unable to malloc %d bytes", | ||
6575 | (sCheck.nPage+1)*sizeof(sCheck.anRef[0])); | ||
6576 | } | ||
6577 | for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; } | ||
6578 | i = PENDING_BYTE_PAGE(pBt); | ||
6579 | if( i<=sCheck.nPage ){ | ||
6580 | sCheck.anRef[i] = 1; | ||
6581 | } | ||
6582 | sCheck.zErrMsg = 0; | ||
6583 | |||
6584 | /* Check the integrity of the freelist | ||
6585 | */ | ||
6586 | checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), | ||
6587 | get4byte(&pBt->pPage1->aData[36]), "Main freelist: "); | ||
6588 | |||
6589 | /* Check all the tables. | ||
6590 | */ | ||
6591 | for(i=0; i<nRoot && sCheck.mxErr; i++){ | ||
6592 | if( aRoot[i]==0 ) continue; | ||
6593 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
6594 | if( pBt->autoVacuum && aRoot[i]>1 ){ | ||
6595 | checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0); | ||
6596 | } | ||
6597 | #endif | ||
6598 | checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: "); | ||
6599 | } | ||
6600 | |||
6601 | /* Make sure every page in the file is referenced | ||
6602 | */ | ||
6603 | for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ | ||
6604 | #ifdef SQLITE_OMIT_AUTOVACUUM | ||
6605 | if( sCheck.anRef[i]==0 ){ | ||
6606 | checkAppendMsg(&sCheck, 0, "Page %d is never used", i); | ||
6607 | } | ||
6608 | #else | ||
6609 | /* If the database supports auto-vacuum, make sure no tables contain | ||
6610 | ** references to pointer-map pages. | ||
6611 | */ | ||
6612 | if( sCheck.anRef[i]==0 && | ||
6613 | (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ | ||
6614 | checkAppendMsg(&sCheck, 0, "Page %d is never used", i); | ||
6615 | } | ||
6616 | if( sCheck.anRef[i]!=0 && | ||
6617 | (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ | ||
6618 | checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i); | ||
6619 | } | ||
6620 | #endif | ||
6621 | } | ||
6622 | |||
6623 | /* Make sure this analysis did not leave any unref() pages | ||
6624 | */ | ||
6625 | unlockBtreeIfUnused(pBt); | ||
6626 | if( nRef != sqlite3PagerRefcount(pBt->pPager) ){ | ||
6627 | checkAppendMsg(&sCheck, 0, | ||
6628 | "Outstanding page count goes from %d to %d during this analysis", | ||
6629 | nRef, sqlite3PagerRefcount(pBt->pPager) | ||
6630 | ); | ||
6631 | } | ||
6632 | |||
6633 | /* Clean up and report errors. | ||
6634 | */ | ||
6635 | sqlite3BtreeLeave(p); | ||
6636 | sqlite3_free(sCheck.anRef); | ||
6637 | *pnErr = sCheck.nErr; | ||
6638 | return sCheck.zErrMsg; | ||
6639 | } | ||
6640 | #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | ||
6641 | |||
6642 | /* | ||
6643 | ** Return the full pathname of the underlying database file. | ||
6644 | ** | ||
6645 | ** The pager filename is invariant as long as the pager is | ||
6646 | ** open so it is safe to access without the BtShared mutex. | ||
6647 | */ | ||
6648 | const char *sqlite3BtreeGetFilename(Btree *p){ | ||
6649 | assert( p->pBt->pPager!=0 ); | ||
6650 | return sqlite3PagerFilename(p->pBt->pPager); | ||
6651 | } | ||
6652 | |||
6653 | /* | ||
6654 | ** Return the pathname of the directory that contains the database file. | ||
6655 | ** | ||
6656 | ** The pager directory name is invariant as long as the pager is | ||
6657 | ** open so it is safe to access without the BtShared mutex. | ||
6658 | */ | ||
6659 | const char *sqlite3BtreeGetDirname(Btree *p){ | ||
6660 | assert( p->pBt->pPager!=0 ); | ||
6661 | return sqlite3PagerDirname(p->pBt->pPager); | ||
6662 | } | ||
6663 | |||
6664 | /* | ||
6665 | ** Return the pathname of the journal file for this database. The return | ||
6666 | ** value of this routine is the same regardless of whether the journal file | ||
6667 | ** has been created or not. | ||
6668 | ** | ||
6669 | ** The pager journal filename is invariant as long as the pager is | ||
6670 | ** open so it is safe to access without the BtShared mutex. | ||
6671 | */ | ||
6672 | const char *sqlite3BtreeGetJournalname(Btree *p){ | ||
6673 | assert( p->pBt->pPager!=0 ); | ||
6674 | return sqlite3PagerJournalname(p->pBt->pPager); | ||
6675 | } | ||
6676 | |||
6677 | #ifndef SQLITE_OMIT_VACUUM | ||
6678 | /* | ||
6679 | ** Copy the complete content of pBtFrom into pBtTo. A transaction | ||
6680 | ** must be active for both files. | ||
6681 | ** | ||
6682 | ** The size of file pBtFrom may be reduced by this operation. | ||
6683 | ** If anything goes wrong, the transaction on pBtFrom is rolled back. | ||
6684 | */ | ||
6685 | static int btreeCopyFile(Btree *pTo, Btree *pFrom){ | ||
6686 | int rc = SQLITE_OK; | ||
6687 | Pgno i, nPage, nToPage, iSkip; | ||
6688 | |||
6689 | BtShared *pBtTo = pTo->pBt; | ||
6690 | BtShared *pBtFrom = pFrom->pBt; | ||
6691 | |||
6692 | if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){ | ||
6693 | return SQLITE_ERROR; | ||
6694 | } | ||
6695 | if( pBtTo->pCursor ) return SQLITE_BUSY; | ||
6696 | nToPage = sqlite3PagerPagecount(pBtTo->pPager); | ||
6697 | nPage = sqlite3PagerPagecount(pBtFrom->pPager); | ||
6698 | iSkip = PENDING_BYTE_PAGE(pBtTo); | ||
6699 | for(i=1; rc==SQLITE_OK && i<=nPage; i++){ | ||
6700 | DbPage *pDbPage; | ||
6701 | if( i==iSkip ) continue; | ||
6702 | rc = sqlite3PagerGet(pBtFrom->pPager, i, &pDbPage); | ||
6703 | if( rc ) break; | ||
6704 | rc = sqlite3PagerOverwrite(pBtTo->pPager, i, sqlite3PagerGetData(pDbPage)); | ||
6705 | sqlite3PagerUnref(pDbPage); | ||
6706 | } | ||
6707 | |||
6708 | /* If the file is shrinking, journal the pages that are being truncated | ||
6709 | ** so that they can be rolled back if the commit fails. | ||
6710 | */ | ||
6711 | for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){ | ||
6712 | DbPage *pDbPage; | ||
6713 | if( i==iSkip ) continue; | ||
6714 | rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage); | ||
6715 | if( rc ) break; | ||
6716 | rc = sqlite3PagerWrite(pDbPage); | ||
6717 | sqlite3PagerDontWrite(pDbPage); | ||
6718 | /* Yeah. It seems wierd to call DontWrite() right after Write(). But | ||
6719 | ** that is because the names of those procedures do not exactly | ||
6720 | ** represent what they do. Write() really means "put this page in the | ||
6721 | ** rollback journal and mark it as dirty so that it will be written | ||
6722 | ** to the database file later." DontWrite() undoes the second part of | ||
6723 | ** that and prevents the page from being written to the database. The | ||
6724 | ** page is still on the rollback journal, though. And that is the whole | ||
6725 | ** point of this loop: to put pages on the rollback journal. */ | ||
6726 | sqlite3PagerUnref(pDbPage); | ||
6727 | } | ||
6728 | if( !rc && nPage<nToPage ){ | ||
6729 | rc = sqlite3PagerTruncate(pBtTo->pPager, nPage); | ||
6730 | } | ||
6731 | |||
6732 | if( rc ){ | ||
6733 | sqlite3BtreeRollback(pTo); | ||
6734 | } | ||
6735 | return rc; | ||
6736 | } | ||
6737 | int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){ | ||
6738 | int rc; | ||
6739 | sqlite3BtreeEnter(pTo); | ||
6740 | sqlite3BtreeEnter(pFrom); | ||
6741 | rc = btreeCopyFile(pTo, pFrom); | ||
6742 | sqlite3BtreeLeave(pFrom); | ||
6743 | sqlite3BtreeLeave(pTo); | ||
6744 | return rc; | ||
6745 | } | ||
6746 | |||
6747 | #endif /* SQLITE_OMIT_VACUUM */ | ||
6748 | |||
6749 | /* | ||
6750 | ** Return non-zero if a transaction is active. | ||
6751 | */ | ||
6752 | int sqlite3BtreeIsInTrans(Btree *p){ | ||
6753 | assert( p==0 || sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
6754 | return (p && (p->inTrans==TRANS_WRITE)); | ||
6755 | } | ||
6756 | |||
6757 | /* | ||
6758 | ** Return non-zero if a statement transaction is active. | ||
6759 | */ | ||
6760 | int sqlite3BtreeIsInStmt(Btree *p){ | ||
6761 | assert( sqlite3BtreeHoldsMutex(p) ); | ||
6762 | return (p->pBt && p->pBt->inStmt); | ||
6763 | } | ||
6764 | |||
6765 | /* | ||
6766 | ** Return non-zero if a read (or write) transaction is active. | ||
6767 | */ | ||
6768 | int sqlite3BtreeIsInReadTrans(Btree *p){ | ||
6769 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
6770 | return (p && (p->inTrans!=TRANS_NONE)); | ||
6771 | } | ||
6772 | |||
6773 | /* | ||
6774 | ** This function returns a pointer to a blob of memory associated with | ||
6775 | ** a single shared-btree. The memory is used by client code for it's own | ||
6776 | ** purposes (for example, to store a high-level schema associated with | ||
6777 | ** the shared-btree). The btree layer manages reference counting issues. | ||
6778 | ** | ||
6779 | ** The first time this is called on a shared-btree, nBytes bytes of memory | ||
6780 | ** are allocated, zeroed, and returned to the caller. For each subsequent | ||
6781 | ** call the nBytes parameter is ignored and a pointer to the same blob | ||
6782 | ** of memory returned. | ||
6783 | ** | ||
6784 | ** Just before the shared-btree is closed, the function passed as the | ||
6785 | ** xFree argument when the memory allocation was made is invoked on the | ||
6786 | ** blob of allocated memory. This function should not call sqlite3_free() | ||
6787 | ** on the memory, the btree layer does that. | ||
6788 | */ | ||
6789 | void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ | ||
6790 | BtShared *pBt = p->pBt; | ||
6791 | sqlite3BtreeEnter(p); | ||
6792 | if( !pBt->pSchema ){ | ||
6793 | pBt->pSchema = sqlite3MallocZero(nBytes); | ||
6794 | pBt->xFreeSchema = xFree; | ||
6795 | } | ||
6796 | sqlite3BtreeLeave(p); | ||
6797 | return pBt->pSchema; | ||
6798 | } | ||
6799 | |||
6800 | /* | ||
6801 | ** Return true if another user of the same shared btree as the argument | ||
6802 | ** handle holds an exclusive lock on the sqlite_master table. | ||
6803 | */ | ||
6804 | int sqlite3BtreeSchemaLocked(Btree *p){ | ||
6805 | int rc; | ||
6806 | assert( sqlite3_mutex_held(p->pSqlite->mutex) ); | ||
6807 | sqlite3BtreeEnter(p); | ||
6808 | rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK); | ||
6809 | sqlite3BtreeLeave(p); | ||
6810 | return rc; | ||
6811 | } | ||
6812 | |||
6813 | |||
6814 | #ifndef SQLITE_OMIT_SHARED_CACHE | ||
6815 | /* | ||
6816 | ** Obtain a lock on the table whose root page is iTab. The | ||
6817 | ** lock is a write lock if isWritelock is true or a read lock | ||
6818 | ** if it is false. | ||
6819 | */ | ||
6820 | int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ | ||
6821 | int rc = SQLITE_OK; | ||
6822 | u8 lockType = (isWriteLock?WRITE_LOCK:READ_LOCK); | ||
6823 | sqlite3BtreeEnter(p); | ||
6824 | rc = queryTableLock(p, iTab, lockType); | ||
6825 | if( rc==SQLITE_OK ){ | ||
6826 | rc = lockTable(p, iTab, lockType); | ||
6827 | } | ||
6828 | sqlite3BtreeLeave(p); | ||
6829 | return rc; | ||
6830 | } | ||
6831 | #endif | ||
6832 | |||
6833 | #ifndef SQLITE_OMIT_INCRBLOB | ||
6834 | /* | ||
6835 | ** Argument pCsr must be a cursor opened for writing on an | ||
6836 | ** INTKEY table currently pointing at a valid table entry. | ||
6837 | ** This function modifies the data stored as part of that entry. | ||
6838 | ** Only the data content may only be modified, it is not possible | ||
6839 | ** to change the length of the data stored. | ||
6840 | */ | ||
6841 | int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ | ||
6842 | assert( cursorHoldsMutex(pCsr) ); | ||
6843 | assert( sqlite3_mutex_held(pCsr->pBtree->pSqlite->mutex) ); | ||
6844 | assert(pCsr->isIncrblobHandle); | ||
6845 | if( pCsr->eState>=CURSOR_REQUIRESEEK ){ | ||
6846 | if( pCsr->eState==CURSOR_FAULT ){ | ||
6847 | return pCsr->skip; | ||
6848 | }else{ | ||
6849 | return SQLITE_ABORT; | ||
6850 | } | ||
6851 | } | ||
6852 | |||
6853 | /* Check some preconditions: | ||
6854 | ** (a) the cursor is open for writing, | ||
6855 | ** (b) there is no read-lock on the table being modified and | ||
6856 | ** (c) the cursor points at a valid row of an intKey table. | ||
6857 | */ | ||
6858 | if( !pCsr->wrFlag ){ | ||
6859 | return SQLITE_READONLY; | ||
6860 | } | ||
6861 | assert( !pCsr->pBt->readOnly | ||
6862 | && pCsr->pBt->inTransaction==TRANS_WRITE ); | ||
6863 | if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr) ){ | ||
6864 | return SQLITE_LOCKED; /* The table pCur points to has a read lock */ | ||
6865 | } | ||
6866 | if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){ | ||
6867 | return SQLITE_ERROR; | ||
6868 | } | ||
6869 | |||
6870 | return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1); | ||
6871 | } | ||
6872 | |||
6873 | /* | ||
6874 | ** Set a flag on this cursor to cache the locations of pages from the | ||
6875 | ** overflow list for the current row. This is used by cursors opened | ||
6876 | ** for incremental blob IO only. | ||
6877 | ** | ||
6878 | ** This function sets a flag only. The actual page location cache | ||
6879 | ** (stored in BtCursor.aOverflow[]) is allocated and used by function | ||
6880 | ** accessPayload() (the worker function for sqlite3BtreeData() and | ||
6881 | ** sqlite3BtreePutData()). | ||
6882 | */ | ||
6883 | void sqlite3BtreeCacheOverflow(BtCursor *pCur){ | ||
6884 | assert( cursorHoldsMutex(pCur) ); | ||
6885 | assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) ); | ||
6886 | assert(!pCur->isIncrblobHandle); | ||
6887 | assert(!pCur->aOverflow); | ||
6888 | pCur->isIncrblobHandle = 1; | ||
6889 | } | ||
6890 | #endif | ||