diff options
Diffstat (limited to 'libraries/sqlite/win32/pager.c')
-rwxr-xr-x | libraries/sqlite/win32/pager.c | 5104 |
1 files changed, 5104 insertions, 0 deletions
diff --git a/libraries/sqlite/win32/pager.c b/libraries/sqlite/win32/pager.c new file mode 100755 index 0000000..b0ad715 --- /dev/null +++ b/libraries/sqlite/win32/pager.c | |||
@@ -0,0 +1,5104 @@ | |||
1 | /* | ||
2 | ** 2001 September 15 | ||
3 | ** | ||
4 | ** The author disclaims copyright to this source code. In place of | ||
5 | ** a legal notice, here is a blessing: | ||
6 | ** | ||
7 | ** May you do good and not evil. | ||
8 | ** May you find forgiveness for yourself and forgive others. | ||
9 | ** May you share freely, never taking more than you give. | ||
10 | ** | ||
11 | ************************************************************************* | ||
12 | ** This is the implementation of the page cache subsystem or "pager". | ||
13 | ** | ||
14 | ** The pager is used to access a database disk file. It implements | ||
15 | ** atomic commit and rollback through the use of a journal file that | ||
16 | ** is separate from the database file. The pager also implements file | ||
17 | ** locking to prevent two processes from writing the same database | ||
18 | ** file simultaneously, or one process from reading the database while | ||
19 | ** another is writing. | ||
20 | ** | ||
21 | ** @(#) $Id: pager.c,v 1.392 2007/10/03 15:22:26 danielk1977 Exp $ | ||
22 | */ | ||
23 | #ifndef SQLITE_OMIT_DISKIO | ||
24 | #include "sqliteInt.h" | ||
25 | #include <assert.h> | ||
26 | #include <string.h> | ||
27 | |||
28 | /* | ||
29 | ** Macros for troubleshooting. Normally turned off | ||
30 | */ | ||
31 | #if 0 | ||
32 | #define sqlite3DebugPrintf printf | ||
33 | #define PAGERTRACE1(X) sqlite3DebugPrintf(X) | ||
34 | #define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y) | ||
35 | #define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z) | ||
36 | #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W) | ||
37 | #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V) | ||
38 | #else | ||
39 | #define PAGERTRACE1(X) | ||
40 | #define PAGERTRACE2(X,Y) | ||
41 | #define PAGERTRACE3(X,Y,Z) | ||
42 | #define PAGERTRACE4(X,Y,Z,W) | ||
43 | #define PAGERTRACE5(X,Y,Z,W,V) | ||
44 | #endif | ||
45 | |||
46 | /* | ||
47 | ** The following two macros are used within the PAGERTRACEX() macros above | ||
48 | ** to print out file-descriptors. | ||
49 | ** | ||
50 | ** PAGERID() takes a pointer to a Pager struct as it's argument. The | ||
51 | ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file | ||
52 | ** struct as it's argument. | ||
53 | */ | ||
54 | #define PAGERID(p) ((int)(p->fd)) | ||
55 | #define FILEHANDLEID(fd) ((int)fd) | ||
56 | |||
57 | /* | ||
58 | ** The page cache as a whole is always in one of the following | ||
59 | ** states: | ||
60 | ** | ||
61 | ** PAGER_UNLOCK The page cache is not currently reading or | ||
62 | ** writing the database file. There is no | ||
63 | ** data held in memory. This is the initial | ||
64 | ** state. | ||
65 | ** | ||
66 | ** PAGER_SHARED The page cache is reading the database. | ||
67 | ** Writing is not permitted. There can be | ||
68 | ** multiple readers accessing the same database | ||
69 | ** file at the same time. | ||
70 | ** | ||
71 | ** PAGER_RESERVED This process has reserved the database for writing | ||
72 | ** but has not yet made any changes. Only one process | ||
73 | ** at a time can reserve the database. The original | ||
74 | ** database file has not been modified so other | ||
75 | ** processes may still be reading the on-disk | ||
76 | ** database file. | ||
77 | ** | ||
78 | ** PAGER_EXCLUSIVE The page cache is writing the database. | ||
79 | ** Access is exclusive. No other processes or | ||
80 | ** threads can be reading or writing while one | ||
81 | ** process is writing. | ||
82 | ** | ||
83 | ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE | ||
84 | ** after all dirty pages have been written to the | ||
85 | ** database file and the file has been synced to | ||
86 | ** disk. All that remains to do is to remove or | ||
87 | ** truncate the journal file and the transaction | ||
88 | ** will be committed. | ||
89 | ** | ||
90 | ** The page cache comes up in PAGER_UNLOCK. The first time a | ||
91 | ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED. | ||
92 | ** After all pages have been released using sqlite_page_unref(), | ||
93 | ** the state transitions back to PAGER_UNLOCK. The first time | ||
94 | ** that sqlite3PagerWrite() is called, the state transitions to | ||
95 | ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be | ||
96 | ** called on an outstanding page which means that the pager must | ||
97 | ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) | ||
98 | ** PAGER_RESERVED means that there is an open rollback journal. | ||
99 | ** The transition to PAGER_EXCLUSIVE occurs before any changes | ||
100 | ** are made to the database file, though writes to the rollback | ||
101 | ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback() | ||
102 | ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED, | ||
103 | ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode. | ||
104 | */ | ||
105 | #define PAGER_UNLOCK 0 | ||
106 | #define PAGER_SHARED 1 /* same as SHARED_LOCK */ | ||
107 | #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */ | ||
108 | #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */ | ||
109 | #define PAGER_SYNCED 5 | ||
110 | |||
111 | /* | ||
112 | ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time, | ||
113 | ** then failed attempts to get a reserved lock will invoke the busy callback. | ||
114 | ** This is off by default. To see why, consider the following scenario: | ||
115 | ** | ||
116 | ** Suppose thread A already has a shared lock and wants a reserved lock. | ||
117 | ** Thread B already has a reserved lock and wants an exclusive lock. If | ||
118 | ** both threads are using their busy callbacks, it might be a long time | ||
119 | ** be for one of the threads give up and allows the other to proceed. | ||
120 | ** But if the thread trying to get the reserved lock gives up quickly | ||
121 | ** (if it never invokes its busy callback) then the contention will be | ||
122 | ** resolved quickly. | ||
123 | */ | ||
124 | #ifndef SQLITE_BUSY_RESERVED_LOCK | ||
125 | # define SQLITE_BUSY_RESERVED_LOCK 0 | ||
126 | #endif | ||
127 | |||
128 | /* | ||
129 | ** This macro rounds values up so that if the value is an address it | ||
130 | ** is guaranteed to be an address that is aligned to an 8-byte boundary. | ||
131 | */ | ||
132 | #define FORCE_ALIGNMENT(X) (((X)+7)&~7) | ||
133 | |||
134 | typedef struct PgHdr PgHdr; | ||
135 | |||
136 | /* | ||
137 | ** Each pager stores all currently unreferenced pages in a list sorted | ||
138 | ** in least-recently-used (LRU) order (i.e. the first item on the list has | ||
139 | ** not been referenced in a long time, the last item has been recently | ||
140 | ** used). An instance of this structure is included as part of each | ||
141 | ** pager structure for this purpose (variable Pager.lru). | ||
142 | ** | ||
143 | ** Additionally, if memory-management is enabled, all unreferenced pages | ||
144 | ** are stored in a global LRU list (global variable sqlite3LruPageList). | ||
145 | ** | ||
146 | ** In both cases, the PagerLruList.pFirstSynced variable points to | ||
147 | ** the first page in the corresponding list that does not require an | ||
148 | ** fsync() operation before it's memory can be reclaimed. If no such | ||
149 | ** page exists, PagerLruList.pFirstSynced is set to NULL. | ||
150 | */ | ||
151 | typedef struct PagerLruList PagerLruList; | ||
152 | struct PagerLruList { | ||
153 | PgHdr *pFirst; /* First page in LRU list */ | ||
154 | PgHdr *pLast; /* Last page in LRU list (the most recently used) */ | ||
155 | PgHdr *pFirstSynced; /* First page in list with PgHdr.needSync==0 */ | ||
156 | }; | ||
157 | |||
158 | /* | ||
159 | ** The following structure contains the next and previous pointers used | ||
160 | ** to link a PgHdr structure into a PagerLruList linked list. | ||
161 | */ | ||
162 | typedef struct PagerLruLink PagerLruLink; | ||
163 | struct PagerLruLink { | ||
164 | PgHdr *pNext; | ||
165 | PgHdr *pPrev; | ||
166 | }; | ||
167 | |||
168 | /* | ||
169 | ** Each in-memory image of a page begins with the following header. | ||
170 | ** This header is only visible to this pager module. The client | ||
171 | ** code that calls pager sees only the data that follows the header. | ||
172 | ** | ||
173 | ** Client code should call sqlite3PagerWrite() on a page prior to making | ||
174 | ** any modifications to that page. The first time sqlite3PagerWrite() | ||
175 | ** is called, the original page contents are written into the rollback | ||
176 | ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once | ||
177 | ** the journal page has made it onto the disk surface, PgHdr.needSync | ||
178 | ** is cleared. The modified page cannot be written back into the original | ||
179 | ** database file until the journal pages has been synced to disk and the | ||
180 | ** PgHdr.needSync has been cleared. | ||
181 | ** | ||
182 | ** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and | ||
183 | ** is cleared again when the page content is written back to the original | ||
184 | ** database file. | ||
185 | ** | ||
186 | ** Details of important structure elements: | ||
187 | ** | ||
188 | ** needSync | ||
189 | ** | ||
190 | ** If this is true, this means that it is not safe to write the page | ||
191 | ** content to the database because the original content needed | ||
192 | ** for rollback has not by synced to the main rollback journal. | ||
193 | ** The original content may have been written to the rollback journal | ||
194 | ** but it has not yet been synced. So we cannot write to the database | ||
195 | ** file because power failure might cause the page in the journal file | ||
196 | ** to never reach the disk. It is as if the write to the journal file | ||
197 | ** does not occur until the journal file is synced. | ||
198 | ** | ||
199 | ** This flag is false if the page content exactly matches what | ||
200 | ** currently exists in the database file. The needSync flag is also | ||
201 | ** false if the original content has been written to the main rollback | ||
202 | ** journal and synced. If the page represents a new page that has | ||
203 | ** been added onto the end of the database during the current | ||
204 | ** transaction, the needSync flag is true until the original database | ||
205 | ** size in the journal header has been synced to disk. | ||
206 | ** | ||
207 | ** inJournal | ||
208 | ** | ||
209 | ** This is true if the original page has been written into the main | ||
210 | ** rollback journal. This is always false for new pages added to | ||
211 | ** the end of the database file during the current transaction. | ||
212 | ** And this flag says nothing about whether or not the journal | ||
213 | ** has been synced to disk. For pages that are in the original | ||
214 | ** database file, the following expression should always be true: | ||
215 | ** | ||
216 | ** inJournal = (pPager->aInJournal[(pgno-1)/8] & (1<<((pgno-1)%8))!=0 | ||
217 | ** | ||
218 | ** The pPager->aInJournal[] array is only valid for the original | ||
219 | ** pages of the database, not new pages that are added to the end | ||
220 | ** of the database, so obviously the above expression cannot be | ||
221 | ** valid for new pages. For new pages inJournal is always 0. | ||
222 | ** | ||
223 | ** dirty | ||
224 | ** | ||
225 | ** When true, this means that the content of the page has been | ||
226 | ** modified and needs to be written back to the database file. | ||
227 | ** If false, it means that either the content of the page is | ||
228 | ** unchanged or else the content is unimportant and we do not | ||
229 | ** care whether or not it is preserved. | ||
230 | ** | ||
231 | ** alwaysRollback | ||
232 | ** | ||
233 | ** This means that the sqlite3PagerDontRollback() API should be | ||
234 | ** ignored for this page. The DontRollback() API attempts to say | ||
235 | ** that the content of the page on disk is unimportant (it is an | ||
236 | ** unused page on the freelist) so that it is unnecessary to | ||
237 | ** rollback changes to this page because the content of the page | ||
238 | ** can change without changing the meaning of the database. This | ||
239 | ** flag overrides any DontRollback() attempt. This flag is set | ||
240 | ** when a page that originally contained valid data is added to | ||
241 | ** the freelist. Later in the same transaction, this page might | ||
242 | ** be pulled from the freelist and reused for something different | ||
243 | ** and at that point the DontRollback() API will be called because | ||
244 | ** pages taken from the freelist do not need to be protected by | ||
245 | ** the rollback journal. But this flag says that the page was | ||
246 | ** not originally part of the freelist so that it still needs to | ||
247 | ** be rolled back in spite of any subsequent DontRollback() calls. | ||
248 | ** | ||
249 | ** needRead | ||
250 | ** | ||
251 | ** This flag means (when true) that the content of the page has | ||
252 | ** not yet been loaded from disk. The in-memory content is just | ||
253 | ** garbage. (Actually, we zero the content, but you should not | ||
254 | ** make any assumptions about the content nevertheless.) If the | ||
255 | ** content is needed in the future, it should be read from the | ||
256 | ** original database file. | ||
257 | */ | ||
258 | struct PgHdr { | ||
259 | Pager *pPager; /* The pager to which this page belongs */ | ||
260 | Pgno pgno; /* The page number for this page */ | ||
261 | PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ | ||
262 | PagerLruLink free; /* Next and previous free pages */ | ||
263 | PgHdr *pNextAll; /* A list of all pages */ | ||
264 | u8 inJournal; /* TRUE if has been written to journal */ | ||
265 | u8 dirty; /* TRUE if we need to write back changes */ | ||
266 | u8 needSync; /* Sync journal before writing this page */ | ||
267 | u8 alwaysRollback; /* Disable DontRollback() for this page */ | ||
268 | u8 needRead; /* Read content if PagerWrite() is called */ | ||
269 | short int nRef; /* Number of users of this page */ | ||
270 | PgHdr *pDirty, *pPrevDirty; /* Dirty pages */ | ||
271 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
272 | PagerLruLink gfree; /* Global list of nRef==0 pages */ | ||
273 | #endif | ||
274 | #ifdef SQLITE_CHECK_PAGES | ||
275 | u32 pageHash; | ||
276 | #endif | ||
277 | void *pData; /* Page data */ | ||
278 | /* Pager.nExtra bytes of local data appended to this header */ | ||
279 | }; | ||
280 | |||
281 | /* | ||
282 | ** For an in-memory only database, some extra information is recorded about | ||
283 | ** each page so that changes can be rolled back. (Journal files are not | ||
284 | ** used for in-memory databases.) The following information is added to | ||
285 | ** the end of every EXTRA block for in-memory databases. | ||
286 | ** | ||
287 | ** This information could have been added directly to the PgHdr structure. | ||
288 | ** But then it would take up an extra 8 bytes of storage on every PgHdr | ||
289 | ** even for disk-based databases. Splitting it out saves 8 bytes. This | ||
290 | ** is only a savings of 0.8% but those percentages add up. | ||
291 | */ | ||
292 | typedef struct PgHistory PgHistory; | ||
293 | struct PgHistory { | ||
294 | u8 *pOrig; /* Original page text. Restore to this on a full rollback */ | ||
295 | u8 *pStmt; /* Text as it was at the beginning of the current statement */ | ||
296 | PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */ | ||
297 | u8 inStmt; /* TRUE if in the statement subjournal */ | ||
298 | }; | ||
299 | |||
300 | /* | ||
301 | ** A macro used for invoking the codec if there is one | ||
302 | */ | ||
303 | #ifdef SQLITE_HAS_CODEC | ||
304 | # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); } | ||
305 | # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D)) | ||
306 | #else | ||
307 | # define CODEC1(P,D,N,X) /* NO-OP */ | ||
308 | # define CODEC2(P,D,N,X) ((char*)D) | ||
309 | #endif | ||
310 | |||
311 | /* | ||
312 | ** Convert a pointer to a PgHdr into a pointer to its data | ||
313 | ** and back again. | ||
314 | */ | ||
315 | #define PGHDR_TO_DATA(P) ((P)->pData) | ||
316 | #define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1])) | ||
317 | #define PGHDR_TO_HIST(P,PGR) \ | ||
318 | ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra]) | ||
319 | |||
320 | /* | ||
321 | ** A open page cache is an instance of the following structure. | ||
322 | ** | ||
323 | ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or | ||
324 | ** or SQLITE_FULL. Once one of the first three errors occurs, it persists | ||
325 | ** and is returned as the result of every major pager API call. The | ||
326 | ** SQLITE_FULL return code is slightly different. It persists only until the | ||
327 | ** next successful rollback is performed on the pager cache. Also, | ||
328 | ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup() | ||
329 | ** APIs, they may still be used successfully. | ||
330 | */ | ||
331 | struct Pager { | ||
332 | sqlite3_vfs *pVfs; /* OS functions to use for IO */ | ||
333 | u8 journalOpen; /* True if journal file descriptors is valid */ | ||
334 | u8 journalStarted; /* True if header of journal is synced */ | ||
335 | u8 useJournal; /* Use a rollback journal on this file */ | ||
336 | u8 noReadlock; /* Do not bother to obtain readlocks */ | ||
337 | u8 stmtOpen; /* True if the statement subjournal is open */ | ||
338 | u8 stmtInUse; /* True we are in a statement subtransaction */ | ||
339 | u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ | ||
340 | u8 noSync; /* Do not sync the journal if true */ | ||
341 | u8 fullSync; /* Do extra syncs of the journal for robustness */ | ||
342 | u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */ | ||
343 | u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ | ||
344 | u8 tempFile; /* zFilename is a temporary file */ | ||
345 | u8 readOnly; /* True for a read-only database */ | ||
346 | u8 needSync; /* True if an fsync() is needed on the journal */ | ||
347 | u8 dirtyCache; /* True if cached pages have changed */ | ||
348 | u8 alwaysRollback; /* Disable DontRollback() for all pages */ | ||
349 | u8 memDb; /* True to inhibit all file I/O */ | ||
350 | u8 setMaster; /* True if a m-j name has been written to jrnl */ | ||
351 | u8 doNotSync; /* Boolean. While true, do not spill the cache */ | ||
352 | u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ | ||
353 | u8 changeCountDone; /* Set after incrementing the change-counter */ | ||
354 | u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ | ||
355 | int errCode; /* One of several kinds of errors */ | ||
356 | int dbSize; /* Number of pages in the file */ | ||
357 | int origDbSize; /* dbSize before the current change */ | ||
358 | int stmtSize; /* Size of database (in pages) at stmt_begin() */ | ||
359 | int nRec; /* Number of pages written to the journal */ | ||
360 | u32 cksumInit; /* Quasi-random value added to every checksum */ | ||
361 | int stmtNRec; /* Number of records in stmt subjournal */ | ||
362 | int nExtra; /* Add this many bytes to each in-memory page */ | ||
363 | int pageSize; /* Number of bytes in a page */ | ||
364 | int nPage; /* Total number of in-memory pages */ | ||
365 | int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ | ||
366 | int mxPage; /* Maximum number of pages to hold in cache */ | ||
367 | Pgno mxPgno; /* Maximum allowed size of the database */ | ||
368 | u8 *aInJournal; /* One bit for each page in the database file */ | ||
369 | u8 *aInStmt; /* One bit for each page in the database */ | ||
370 | char *zFilename; /* Name of the database file */ | ||
371 | char *zJournal; /* Name of the journal file */ | ||
372 | char *zDirectory; /* Directory hold database and journal files */ | ||
373 | char *zStmtJrnl; /* Name of the statement journal file */ | ||
374 | sqlite3_file *fd, *jfd; /* File descriptors for database and journal */ | ||
375 | sqlite3_file *stfd; /* File descriptor for the statement subjournal*/ | ||
376 | BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ | ||
377 | PagerLruList lru; /* LRU list of free pages */ | ||
378 | PgHdr *pAll; /* List of all pages */ | ||
379 | PgHdr *pStmt; /* List of pages in the statement subjournal */ | ||
380 | PgHdr *pDirty; /* List of all dirty pages */ | ||
381 | i64 journalOff; /* Current byte offset in the journal file */ | ||
382 | i64 journalHdr; /* Byte offset to previous journal header */ | ||
383 | i64 stmtHdrOff; /* First journal header written this statement */ | ||
384 | i64 stmtCksum; /* cksumInit when statement was started */ | ||
385 | i64 stmtJSize; /* Size of journal at stmt_begin() */ | ||
386 | int sectorSize; /* Assumed sector size during rollback */ | ||
387 | #ifdef SQLITE_TEST | ||
388 | int nHit, nMiss; /* Cache hits and missing */ | ||
389 | int nRead, nWrite; /* Database pages read/written */ | ||
390 | #endif | ||
391 | void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */ | ||
392 | void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */ | ||
393 | #ifdef SQLITE_HAS_CODEC | ||
394 | void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ | ||
395 | void *pCodecArg; /* First argument to xCodec() */ | ||
396 | #endif | ||
397 | int nHash; /* Size of the pager hash table */ | ||
398 | PgHdr **aHash; /* Hash table to map page number to PgHdr */ | ||
399 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
400 | Pager *pNext; /* Doubly linked list of pagers on which */ | ||
401 | Pager *pPrev; /* sqlite3_release_memory() will work */ | ||
402 | int iInUseMM; /* Non-zero if unavailable to MM */ | ||
403 | int iInUseDB; /* Non-zero if in sqlite3_release_memory() */ | ||
404 | #endif | ||
405 | char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ | ||
406 | char dbFileVers[16]; /* Changes whenever database file changes */ | ||
407 | }; | ||
408 | |||
409 | /* | ||
410 | ** The following global variables hold counters used for | ||
411 | ** testing purposes only. These variables do not exist in | ||
412 | ** a non-testing build. These variables are not thread-safe. | ||
413 | */ | ||
414 | #ifdef SQLITE_TEST | ||
415 | int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ | ||
416 | int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ | ||
417 | int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ | ||
418 | int sqlite3_pager_pgfree_count = 0; /* Number of cache pages freed */ | ||
419 | # define PAGER_INCR(v) v++ | ||
420 | #else | ||
421 | # define PAGER_INCR(v) | ||
422 | #endif | ||
423 | |||
424 | /* | ||
425 | ** The following variable points to the head of a double-linked list | ||
426 | ** of all pagers that are eligible for page stealing by the | ||
427 | ** sqlite3_release_memory() interface. Access to this list is | ||
428 | ** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex. | ||
429 | */ | ||
430 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
431 | static Pager *sqlite3PagerList = 0; | ||
432 | static PagerLruList sqlite3LruPageList = {0, 0, 0}; | ||
433 | #endif | ||
434 | |||
435 | |||
436 | /* | ||
437 | ** Journal files begin with the following magic string. The data | ||
438 | ** was obtained from /dev/random. It is used only as a sanity check. | ||
439 | ** | ||
440 | ** Since version 2.8.0, the journal format contains additional sanity | ||
441 | ** checking information. If the power fails while the journal is begin | ||
442 | ** written, semi-random garbage data might appear in the journal | ||
443 | ** file after power is restored. If an attempt is then made | ||
444 | ** to roll the journal back, the database could be corrupted. The additional | ||
445 | ** sanity checking data is an attempt to discover the garbage in the | ||
446 | ** journal and ignore it. | ||
447 | ** | ||
448 | ** The sanity checking information for the new journal format consists | ||
449 | ** of a 32-bit checksum on each page of data. The checksum covers both | ||
450 | ** the page number and the pPager->pageSize bytes of data for the page. | ||
451 | ** This cksum is initialized to a 32-bit random value that appears in the | ||
452 | ** journal file right after the header. The random initializer is important, | ||
453 | ** because garbage data that appears at the end of a journal is likely | ||
454 | ** data that was once in other files that have now been deleted. If the | ||
455 | ** garbage data came from an obsolete journal file, the checksums might | ||
456 | ** be correct. But by initializing the checksum to random value which | ||
457 | ** is different for every journal, we minimize that risk. | ||
458 | */ | ||
459 | static const unsigned char aJournalMagic[] = { | ||
460 | 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, | ||
461 | }; | ||
462 | |||
463 | /* | ||
464 | ** The size of the header and of each page in the journal is determined | ||
465 | ** by the following macros. | ||
466 | */ | ||
467 | #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) | ||
468 | |||
469 | /* | ||
470 | ** The journal header size for this pager. In the future, this could be | ||
471 | ** set to some value read from the disk controller. The important | ||
472 | ** characteristic is that it is the same size as a disk sector. | ||
473 | */ | ||
474 | #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) | ||
475 | |||
476 | /* | ||
477 | ** The macro MEMDB is true if we are dealing with an in-memory database. | ||
478 | ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, | ||
479 | ** the value of MEMDB will be a constant and the compiler will optimize | ||
480 | ** out code that would never execute. | ||
481 | */ | ||
482 | #ifdef SQLITE_OMIT_MEMORYDB | ||
483 | # define MEMDB 0 | ||
484 | #else | ||
485 | # define MEMDB pPager->memDb | ||
486 | #endif | ||
487 | |||
488 | /* | ||
489 | ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is | ||
490 | ** reserved for working around a windows/posix incompatibility). It is | ||
491 | ** used in the journal to signify that the remainder of the journal file | ||
492 | ** is devoted to storing a master journal name - there are no more pages to | ||
493 | ** roll back. See comments for function writeMasterJournal() for details. | ||
494 | */ | ||
495 | /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */ | ||
496 | #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1) | ||
497 | |||
498 | /* | ||
499 | ** The maximum legal page number is (2^31 - 1). | ||
500 | */ | ||
501 | #define PAGER_MAX_PGNO 2147483647 | ||
502 | |||
503 | /* | ||
504 | ** The pagerEnter() and pagerLeave() routines acquire and release | ||
505 | ** a mutex on each pager. The mutex is recursive. | ||
506 | ** | ||
507 | ** This is a special-purpose mutex. It only provides mutual exclusion | ||
508 | ** between the Btree and the Memory Management sqlite3_release_memory() | ||
509 | ** function. It does not prevent, for example, two Btrees from accessing | ||
510 | ** the same pager at the same time. Other general-purpose mutexes in | ||
511 | ** the btree layer handle that chore. | ||
512 | */ | ||
513 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
514 | static void pagerEnter(Pager *p){ | ||
515 | p->iInUseDB++; | ||
516 | if( p->iInUseMM && p->iInUseDB==1 ){ | ||
517 | sqlite3_mutex *mutex; | ||
518 | mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2); | ||
519 | p->iInUseDB = 0; | ||
520 | sqlite3_mutex_enter(mutex); | ||
521 | p->iInUseDB = 1; | ||
522 | sqlite3_mutex_leave(mutex); | ||
523 | } | ||
524 | assert( p->iInUseMM==0 ); | ||
525 | } | ||
526 | static void pagerLeave(Pager *p){ | ||
527 | p->iInUseDB--; | ||
528 | assert( p->iInUseDB>=0 ); | ||
529 | } | ||
530 | #else | ||
531 | # define pagerEnter(X) | ||
532 | # define pagerLeave(X) | ||
533 | #endif | ||
534 | |||
535 | /* | ||
536 | ** Enable reference count tracking (for debugging) here: | ||
537 | */ | ||
538 | #ifdef SQLITE_DEBUG | ||
539 | int pager3_refinfo_enable = 0; | ||
540 | static void pager_refinfo(PgHdr *p){ | ||
541 | static int cnt = 0; | ||
542 | if( !pager3_refinfo_enable ) return; | ||
543 | sqlite3DebugPrintf( | ||
544 | "REFCNT: %4d addr=%p nRef=%-3d total=%d\n", | ||
545 | p->pgno, PGHDR_TO_DATA(p), p->nRef, p->pPager->nRef | ||
546 | ); | ||
547 | cnt++; /* Something to set a breakpoint on */ | ||
548 | } | ||
549 | # define REFINFO(X) pager_refinfo(X) | ||
550 | #else | ||
551 | # define REFINFO(X) | ||
552 | #endif | ||
553 | |||
554 | /* | ||
555 | ** Add page pPg to the end of the linked list managed by structure | ||
556 | ** pList (pPg becomes the last entry in the list - the most recently | ||
557 | ** used). Argument pLink should point to either pPg->free or pPg->gfree, | ||
558 | ** depending on whether pPg is being added to the pager-specific or | ||
559 | ** global LRU list. | ||
560 | */ | ||
561 | static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){ | ||
562 | pLink->pNext = 0; | ||
563 | pLink->pPrev = pList->pLast; | ||
564 | |||
565 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
566 | assert(pLink==&pPg->free || pLink==&pPg->gfree); | ||
567 | assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList); | ||
568 | #endif | ||
569 | |||
570 | if( pList->pLast ){ | ||
571 | int iOff = (char *)pLink - (char *)pPg; | ||
572 | PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]); | ||
573 | pLastLink->pNext = pPg; | ||
574 | }else{ | ||
575 | assert(!pList->pFirst); | ||
576 | pList->pFirst = pPg; | ||
577 | } | ||
578 | |||
579 | pList->pLast = pPg; | ||
580 | if( !pList->pFirstSynced && pPg->needSync==0 ){ | ||
581 | pList->pFirstSynced = pPg; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | ** Remove pPg from the list managed by the structure pointed to by pList. | ||
587 | ** | ||
588 | ** Argument pLink should point to either pPg->free or pPg->gfree, depending | ||
589 | ** on whether pPg is being added to the pager-specific or global LRU list. | ||
590 | */ | ||
591 | static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){ | ||
592 | int iOff = (char *)pLink - (char *)pPg; | ||
593 | |||
594 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
595 | assert(pLink==&pPg->free || pLink==&pPg->gfree); | ||
596 | assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList); | ||
597 | #endif | ||
598 | |||
599 | if( pPg==pList->pFirst ){ | ||
600 | pList->pFirst = pLink->pNext; | ||
601 | } | ||
602 | if( pPg==pList->pLast ){ | ||
603 | pList->pLast = pLink->pPrev; | ||
604 | } | ||
605 | if( pLink->pPrev ){ | ||
606 | PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]); | ||
607 | pPrevLink->pNext = pLink->pNext; | ||
608 | } | ||
609 | if( pLink->pNext ){ | ||
610 | PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]); | ||
611 | pNextLink->pPrev = pLink->pPrev; | ||
612 | } | ||
613 | if( pPg==pList->pFirstSynced ){ | ||
614 | PgHdr *p = pLink->pNext; | ||
615 | while( p && p->needSync ){ | ||
616 | PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]); | ||
617 | p = pL->pNext; | ||
618 | } | ||
619 | pList->pFirstSynced = p; | ||
620 | } | ||
621 | |||
622 | pLink->pNext = pLink->pPrev = 0; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | ** Add page pPg to the list of free pages for the pager. If | ||
627 | ** memory-management is enabled, also add the page to the global | ||
628 | ** list of free pages. | ||
629 | */ | ||
630 | static void lruListAdd(PgHdr *pPg){ | ||
631 | listAdd(&pPg->pPager->lru, &pPg->free, pPg); | ||
632 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
633 | if( !pPg->pPager->memDb ){ | ||
634 | sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
635 | listAdd(&sqlite3LruPageList, &pPg->gfree, pPg); | ||
636 | sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
637 | } | ||
638 | #endif | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | ** Remove page pPg from the list of free pages for the associated pager. | ||
643 | ** If memory-management is enabled, also remove pPg from the global list | ||
644 | ** of free pages. | ||
645 | */ | ||
646 | static void lruListRemove(PgHdr *pPg){ | ||
647 | listRemove(&pPg->pPager->lru, &pPg->free, pPg); | ||
648 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
649 | if( !pPg->pPager->memDb ){ | ||
650 | sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
651 | listRemove(&sqlite3LruPageList, &pPg->gfree, pPg); | ||
652 | sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
653 | } | ||
654 | #endif | ||
655 | } | ||
656 | |||
657 | /* | ||
658 | ** This function is called just after the needSync flag has been cleared | ||
659 | ** from all pages managed by pPager (usually because the journal file | ||
660 | ** has just been synced). It updates the pPager->lru.pFirstSynced variable | ||
661 | ** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced | ||
662 | ** variable also. | ||
663 | */ | ||
664 | static void lruListSetFirstSynced(Pager *pPager){ | ||
665 | pPager->lru.pFirstSynced = pPager->lru.pFirst; | ||
666 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
667 | if( !pPager->memDb ){ | ||
668 | PgHdr *p; | ||
669 | sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
670 | for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext); | ||
671 | assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced); | ||
672 | sqlite3LruPageList.pFirstSynced = p; | ||
673 | sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
674 | } | ||
675 | #endif | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | ** Return true if page *pPg has already been written to the statement | ||
680 | ** journal (or statement snapshot has been created, if *pPg is part | ||
681 | ** of an in-memory database). | ||
682 | */ | ||
683 | static int pageInStatement(PgHdr *pPg){ | ||
684 | Pager *pPager = pPg->pPager; | ||
685 | if( MEMDB ){ | ||
686 | return PGHDR_TO_HIST(pPg, pPager)->inStmt; | ||
687 | }else{ | ||
688 | Pgno pgno = pPg->pgno; | ||
689 | u8 *a = pPager->aInStmt; | ||
690 | return (a && (int)pgno<=pPager->stmtSize && (a[pgno/8] & (1<<(pgno&7)))); | ||
691 | } | ||
692 | } | ||
693 | |||
694 | /* | ||
695 | ** Change the size of the pager hash table to N. N must be a power | ||
696 | ** of two. | ||
697 | */ | ||
698 | static void pager_resize_hash_table(Pager *pPager, int N){ | ||
699 | PgHdr **aHash, *pPg; | ||
700 | assert( N>0 && (N&(N-1))==0 ); | ||
701 | pagerLeave(pPager); | ||
702 | sqlite3MallocBenignFailure((int)pPager->aHash); | ||
703 | aHash = sqlite3MallocZero( sizeof(aHash[0])*N ); | ||
704 | pagerEnter(pPager); | ||
705 | if( aHash==0 ){ | ||
706 | /* Failure to rehash is not an error. It is only a performance hit. */ | ||
707 | return; | ||
708 | } | ||
709 | sqlite3_free(pPager->aHash); | ||
710 | pPager->nHash = N; | ||
711 | pPager->aHash = aHash; | ||
712 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
713 | int h; | ||
714 | if( pPg->pgno==0 ){ | ||
715 | assert( pPg->pNextHash==0 && pPg->pPrevHash==0 ); | ||
716 | continue; | ||
717 | } | ||
718 | h = pPg->pgno & (N-1); | ||
719 | pPg->pNextHash = aHash[h]; | ||
720 | if( aHash[h] ){ | ||
721 | aHash[h]->pPrevHash = pPg; | ||
722 | } | ||
723 | aHash[h] = pPg; | ||
724 | pPg->pPrevHash = 0; | ||
725 | } | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | ** Read a 32-bit integer from the given file descriptor. Store the integer | ||
730 | ** that is read in *pRes. Return SQLITE_OK if everything worked, or an | ||
731 | ** error code is something goes wrong. | ||
732 | ** | ||
733 | ** All values are stored on disk as big-endian. | ||
734 | */ | ||
735 | static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ | ||
736 | unsigned char ac[4]; | ||
737 | int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); | ||
738 | if( rc==SQLITE_OK ){ | ||
739 | *pRes = sqlite3Get4byte(ac); | ||
740 | } | ||
741 | return rc; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | ** Write a 32-bit integer into a string buffer in big-endian byte order. | ||
746 | */ | ||
747 | #define put32bits(A,B) sqlite3Put4byte((u8*)A,B) | ||
748 | |||
749 | /* | ||
750 | ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK | ||
751 | ** on success or an error code is something goes wrong. | ||
752 | */ | ||
753 | static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ | ||
754 | char ac[4]; | ||
755 | put32bits(ac, val); | ||
756 | return sqlite3OsWrite(fd, ac, 4, offset); | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | ** If file pFd is open, call sqlite3OsUnlock() on it. | ||
761 | */ | ||
762 | static int osUnlock(sqlite3_file *pFd, int eLock){ | ||
763 | if( !pFd->pMethods ){ | ||
764 | return SQLITE_OK; | ||
765 | } | ||
766 | return sqlite3OsUnlock(pFd, eLock); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | ** This function determines whether or not the atomic-write optimization | ||
771 | ** can be used with this pager. The optimization can be used if: | ||
772 | ** | ||
773 | ** (a) the value returned by OsDeviceCharacteristics() indicates that | ||
774 | ** a database page may be written atomically, and | ||
775 | ** (b) the value returned by OsSectorSize() is less than or equal | ||
776 | ** to the page size. | ||
777 | ** | ||
778 | ** If the optimization cannot be used, 0 is returned. If it can be used, | ||
779 | ** then the value returned is the size of the journal file when it | ||
780 | ** contains rollback data for exactly one page. | ||
781 | */ | ||
782 | #ifdef SQLITE_ENABLE_ATOMIC_WRITE | ||
783 | static int jrnlBufferSize(Pager *pPager){ | ||
784 | int dc; /* Device characteristics */ | ||
785 | int nSector; /* Sector size */ | ||
786 | int nPage; /* Page size */ | ||
787 | sqlite3_file *fd = pPager->fd; | ||
788 | |||
789 | if( fd->pMethods ){ | ||
790 | dc = sqlite3OsDeviceCharacteristics(fd); | ||
791 | nSector = sqlite3OsSectorSize(fd); | ||
792 | nPage = pPager->pageSize; | ||
793 | } | ||
794 | |||
795 | assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); | ||
796 | assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); | ||
797 | |||
798 | if( !fd->pMethods || (dc&(SQLITE_IOCAP_ATOMIC|(nPage>>8))&&nSector<=nPage) ){ | ||
799 | return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); | ||
800 | } | ||
801 | return 0; | ||
802 | } | ||
803 | #endif | ||
804 | |||
805 | /* | ||
806 | ** This function should be called when an error occurs within the pager | ||
807 | ** code. The first argument is a pointer to the pager structure, the | ||
808 | ** second the error-code about to be returned by a pager API function. | ||
809 | ** The value returned is a copy of the second argument to this function. | ||
810 | ** | ||
811 | ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL | ||
812 | ** the error becomes persistent. Until the persisten error is cleared, | ||
813 | ** subsequent API calls on this Pager will immediately return the same | ||
814 | ** error code. | ||
815 | ** | ||
816 | ** A persistent error indicates that the contents of the pager-cache | ||
817 | ** cannot be trusted. This state can be cleared by completely discarding | ||
818 | ** the contents of the pager-cache. If a transaction was active when | ||
819 | ** the persistent error occured, then the rollback journal may need | ||
820 | ** to be replayed. | ||
821 | */ | ||
822 | static void pager_unlock(Pager *pPager); | ||
823 | static int pager_error(Pager *pPager, int rc){ | ||
824 | int rc2 = rc & 0xff; | ||
825 | assert( | ||
826 | pPager->errCode==SQLITE_FULL || | ||
827 | pPager->errCode==SQLITE_OK || | ||
828 | (pPager->errCode & 0xff)==SQLITE_IOERR | ||
829 | ); | ||
830 | if( | ||
831 | rc2==SQLITE_FULL || | ||
832 | rc2==SQLITE_IOERR || | ||
833 | rc2==SQLITE_CORRUPT | ||
834 | ){ | ||
835 | pPager->errCode = rc; | ||
836 | if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){ | ||
837 | /* If the pager is already unlocked, call pager_unlock() now to | ||
838 | ** clear the error state and ensure that the pager-cache is | ||
839 | ** completely empty. | ||
840 | */ | ||
841 | pager_unlock(pPager); | ||
842 | } | ||
843 | } | ||
844 | return rc; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking | ||
849 | ** on the cache using a hash function. This is used for testing | ||
850 | ** and debugging only. | ||
851 | */ | ||
852 | #ifdef SQLITE_CHECK_PAGES | ||
853 | /* | ||
854 | ** Return a 32-bit hash of the page data for pPage. | ||
855 | */ | ||
856 | static u32 pager_datahash(int nByte, unsigned char *pData){ | ||
857 | u32 hash = 0; | ||
858 | int i; | ||
859 | for(i=0; i<nByte; i++){ | ||
860 | hash = (hash*1039) + pData[i]; | ||
861 | } | ||
862 | return hash; | ||
863 | } | ||
864 | static u32 pager_pagehash(PgHdr *pPage){ | ||
865 | return pager_datahash(pPage->pPager->pageSize, | ||
866 | (unsigned char *)PGHDR_TO_DATA(pPage)); | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES | ||
871 | ** is defined, and NDEBUG is not defined, an assert() statement checks | ||
872 | ** that the page is either dirty or still matches the calculated page-hash. | ||
873 | */ | ||
874 | #define CHECK_PAGE(x) checkPage(x) | ||
875 | static void checkPage(PgHdr *pPg){ | ||
876 | Pager *pPager = pPg->pPager; | ||
877 | assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty || | ||
878 | pPg->pageHash==pager_pagehash(pPg) ); | ||
879 | } | ||
880 | |||
881 | #else | ||
882 | #define pager_datahash(X,Y) 0 | ||
883 | #define pager_pagehash(X) 0 | ||
884 | #define CHECK_PAGE(x) | ||
885 | #endif | ||
886 | |||
887 | /* | ||
888 | ** When this is called the journal file for pager pPager must be open. | ||
889 | ** The master journal file name is read from the end of the file and | ||
890 | ** written into memory supplied by the caller. | ||
891 | ** | ||
892 | ** zMaster must point to a buffer of at least nMaster bytes allocated by | ||
893 | ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is | ||
894 | ** enough space to write the master journal name). If the master journal | ||
895 | ** name in the journal is longer than nMaster bytes (including a | ||
896 | ** nul-terminator), then this is handled as if no master journal name | ||
897 | ** were present in the journal. | ||
898 | ** | ||
899 | ** If no master journal file name is present zMaster[0] is set to 0 and | ||
900 | ** SQLITE_OK returned. | ||
901 | */ | ||
902 | static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){ | ||
903 | int rc; | ||
904 | u32 len; | ||
905 | i64 szJ; | ||
906 | u32 cksum; | ||
907 | int i; | ||
908 | unsigned char aMagic[8]; /* A buffer to hold the magic header */ | ||
909 | |||
910 | zMaster[0] = '\0'; | ||
911 | |||
912 | rc = sqlite3OsFileSize(pJrnl, &szJ); | ||
913 | if( rc!=SQLITE_OK || szJ<16 ) return rc; | ||
914 | |||
915 | rc = read32bits(pJrnl, szJ-16, &len); | ||
916 | if( rc!=SQLITE_OK ) return rc; | ||
917 | |||
918 | if( len>=nMaster ){ | ||
919 | return SQLITE_OK; | ||
920 | } | ||
921 | |||
922 | rc = read32bits(pJrnl, szJ-12, &cksum); | ||
923 | if( rc!=SQLITE_OK ) return rc; | ||
924 | |||
925 | rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8); | ||
926 | if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc; | ||
927 | |||
928 | rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len); | ||
929 | if( rc!=SQLITE_OK ){ | ||
930 | return rc; | ||
931 | } | ||
932 | zMaster[len] = '\0'; | ||
933 | |||
934 | /* See if the checksum matches the master journal name */ | ||
935 | for(i=0; i<len; i++){ | ||
936 | cksum -= zMaster[i]; | ||
937 | } | ||
938 | if( cksum ){ | ||
939 | /* If the checksum doesn't add up, then one or more of the disk sectors | ||
940 | ** containing the master journal filename is corrupted. This means | ||
941 | ** definitely roll back, so just return SQLITE_OK and report a (nul) | ||
942 | ** master-journal filename. | ||
943 | */ | ||
944 | zMaster[0] = '\0'; | ||
945 | } | ||
946 | |||
947 | return SQLITE_OK; | ||
948 | } | ||
949 | |||
950 | /* | ||
951 | ** Seek the journal file descriptor to the next sector boundary where a | ||
952 | ** journal header may be read or written. Pager.journalOff is updated with | ||
953 | ** the new seek offset. | ||
954 | ** | ||
955 | ** i.e for a sector size of 512: | ||
956 | ** | ||
957 | ** Input Offset Output Offset | ||
958 | ** --------------------------------------- | ||
959 | ** 0 0 | ||
960 | ** 512 512 | ||
961 | ** 100 512 | ||
962 | ** 2000 2048 | ||
963 | ** | ||
964 | */ | ||
965 | static void seekJournalHdr(Pager *pPager){ | ||
966 | i64 offset = 0; | ||
967 | i64 c = pPager->journalOff; | ||
968 | if( c ){ | ||
969 | offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); | ||
970 | } | ||
971 | assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); | ||
972 | assert( offset>=c ); | ||
973 | assert( (offset-c)<JOURNAL_HDR_SZ(pPager) ); | ||
974 | pPager->journalOff = offset; | ||
975 | } | ||
976 | |||
977 | /* | ||
978 | ** The journal file must be open when this routine is called. A journal | ||
979 | ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the | ||
980 | ** current location. | ||
981 | ** | ||
982 | ** The format for the journal header is as follows: | ||
983 | ** - 8 bytes: Magic identifying journal format. | ||
984 | ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. | ||
985 | ** - 4 bytes: Random number used for page hash. | ||
986 | ** - 4 bytes: Initial database page count. | ||
987 | ** - 4 bytes: Sector size used by the process that wrote this journal. | ||
988 | ** | ||
989 | ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space. | ||
990 | */ | ||
991 | static int writeJournalHdr(Pager *pPager){ | ||
992 | char zHeader[sizeof(aJournalMagic)+16]; | ||
993 | int rc; | ||
994 | |||
995 | if( pPager->stmtHdrOff==0 ){ | ||
996 | pPager->stmtHdrOff = pPager->journalOff; | ||
997 | } | ||
998 | |||
999 | seekJournalHdr(pPager); | ||
1000 | pPager->journalHdr = pPager->journalOff; | ||
1001 | |||
1002 | memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); | ||
1003 | |||
1004 | /* | ||
1005 | ** Write the nRec Field - the number of page records that follow this | ||
1006 | ** journal header. Normally, zero is written to this value at this time. | ||
1007 | ** After the records are added to the journal (and the journal synced, | ||
1008 | ** if in full-sync mode), the zero is overwritten with the true number | ||
1009 | ** of records (see syncJournal()). | ||
1010 | ** | ||
1011 | ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When | ||
1012 | ** reading the journal this value tells SQLite to assume that the | ||
1013 | ** rest of the journal file contains valid page records. This assumption | ||
1014 | ** is dangerous, as if a failure occured whilst writing to the journal | ||
1015 | ** file it may contain some garbage data. There are two scenarios | ||
1016 | ** where this risk can be ignored: | ||
1017 | ** | ||
1018 | ** * When the pager is in no-sync mode. Corruption can follow a | ||
1019 | ** power failure in this case anyway. | ||
1020 | ** | ||
1021 | ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees | ||
1022 | ** that garbage data is never appended to the journal file. | ||
1023 | */ | ||
1024 | assert(pPager->fd->pMethods||pPager->noSync); | ||
1025 | if( (pPager->noSync) | ||
1026 | || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) | ||
1027 | ){ | ||
1028 | put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); | ||
1029 | }else{ | ||
1030 | put32bits(&zHeader[sizeof(aJournalMagic)], 0); | ||
1031 | } | ||
1032 | |||
1033 | /* The random check-hash initialiser */ | ||
1034 | sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); | ||
1035 | put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); | ||
1036 | /* The initial database size */ | ||
1037 | put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize); | ||
1038 | /* The assumed sector size for this process */ | ||
1039 | put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); | ||
1040 | IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, sizeof(zHeader))) | ||
1041 | rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader),pPager->journalOff); | ||
1042 | pPager->journalOff += JOURNAL_HDR_SZ(pPager); | ||
1043 | |||
1044 | /* The journal header has been written successfully. Seek the journal | ||
1045 | ** file descriptor to the end of the journal header sector. | ||
1046 | */ | ||
1047 | if( rc==SQLITE_OK ){ | ||
1048 | IOTRACE(("JTAIL %p %lld\n", pPager, pPager->journalOff-1)) | ||
1049 | rc = sqlite3OsWrite(pPager->jfd, "\000", 1, pPager->journalOff-1); | ||
1050 | } | ||
1051 | return rc; | ||
1052 | } | ||
1053 | |||
1054 | /* | ||
1055 | ** The journal file must be open when this is called. A journal header file | ||
1056 | ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal | ||
1057 | ** file. See comments above function writeJournalHdr() for a description of | ||
1058 | ** the journal header format. | ||
1059 | ** | ||
1060 | ** If the header is read successfully, *nRec is set to the number of | ||
1061 | ** page records following this header and *dbSize is set to the size of the | ||
1062 | ** database before the transaction began, in pages. Also, pPager->cksumInit | ||
1063 | ** is set to the value read from the journal header. SQLITE_OK is returned | ||
1064 | ** in this case. | ||
1065 | ** | ||
1066 | ** If the journal header file appears to be corrupted, SQLITE_DONE is | ||
1067 | ** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes | ||
1068 | ** cannot be read from the journal file an error code is returned. | ||
1069 | */ | ||
1070 | static int readJournalHdr( | ||
1071 | Pager *pPager, | ||
1072 | i64 journalSize, | ||
1073 | u32 *pNRec, | ||
1074 | u32 *pDbSize | ||
1075 | ){ | ||
1076 | int rc; | ||
1077 | unsigned char aMagic[8]; /* A buffer to hold the magic header */ | ||
1078 | i64 jrnlOff; | ||
1079 | |||
1080 | seekJournalHdr(pPager); | ||
1081 | if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ | ||
1082 | return SQLITE_DONE; | ||
1083 | } | ||
1084 | jrnlOff = pPager->journalOff; | ||
1085 | |||
1086 | rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff); | ||
1087 | if( rc ) return rc; | ||
1088 | jrnlOff += sizeof(aMagic); | ||
1089 | |||
1090 | if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ | ||
1091 | return SQLITE_DONE; | ||
1092 | } | ||
1093 | |||
1094 | rc = read32bits(pPager->jfd, jrnlOff, pNRec); | ||
1095 | if( rc ) return rc; | ||
1096 | |||
1097 | rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit); | ||
1098 | if( rc ) return rc; | ||
1099 | |||
1100 | rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize); | ||
1101 | if( rc ) return rc; | ||
1102 | |||
1103 | /* Update the assumed sector-size to match the value used by | ||
1104 | ** the process that created this journal. If this journal was | ||
1105 | ** created by a process other than this one, then this routine | ||
1106 | ** is being called from within pager_playback(). The local value | ||
1107 | ** of Pager.sectorSize is restored at the end of that routine. | ||
1108 | */ | ||
1109 | rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize); | ||
1110 | if( rc ) return rc; | ||
1111 | |||
1112 | pPager->journalOff += JOURNAL_HDR_SZ(pPager); | ||
1113 | return SQLITE_OK; | ||
1114 | } | ||
1115 | |||
1116 | |||
1117 | /* | ||
1118 | ** Write the supplied master journal name into the journal file for pager | ||
1119 | ** pPager at the current location. The master journal name must be the last | ||
1120 | ** thing written to a journal file. If the pager is in full-sync mode, the | ||
1121 | ** journal file descriptor is advanced to the next sector boundary before | ||
1122 | ** anything is written. The format is: | ||
1123 | ** | ||
1124 | ** + 4 bytes: PAGER_MJ_PGNO. | ||
1125 | ** + N bytes: length of master journal name. | ||
1126 | ** + 4 bytes: N | ||
1127 | ** + 4 bytes: Master journal name checksum. | ||
1128 | ** + 8 bytes: aJournalMagic[]. | ||
1129 | ** | ||
1130 | ** The master journal page checksum is the sum of the bytes in the master | ||
1131 | ** journal name. | ||
1132 | ** | ||
1133 | ** If zMaster is a NULL pointer (occurs for a single database transaction), | ||
1134 | ** this call is a no-op. | ||
1135 | */ | ||
1136 | static int writeMasterJournal(Pager *pPager, const char *zMaster){ | ||
1137 | int rc; | ||
1138 | int len; | ||
1139 | int i; | ||
1140 | i64 jrnlOff; | ||
1141 | u32 cksum = 0; | ||
1142 | char zBuf[sizeof(aJournalMagic)+2*4]; | ||
1143 | |||
1144 | if( !zMaster || pPager->setMaster) return SQLITE_OK; | ||
1145 | pPager->setMaster = 1; | ||
1146 | |||
1147 | len = strlen(zMaster); | ||
1148 | for(i=0; i<len; i++){ | ||
1149 | cksum += zMaster[i]; | ||
1150 | } | ||
1151 | |||
1152 | /* If in full-sync mode, advance to the next disk sector before writing | ||
1153 | ** the master journal name. This is in case the previous page written to | ||
1154 | ** the journal has already been synced. | ||
1155 | */ | ||
1156 | if( pPager->fullSync ){ | ||
1157 | seekJournalHdr(pPager); | ||
1158 | } | ||
1159 | jrnlOff = pPager->journalOff; | ||
1160 | pPager->journalOff += (len+20); | ||
1161 | |||
1162 | rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager)); | ||
1163 | if( rc!=SQLITE_OK ) return rc; | ||
1164 | jrnlOff += 4; | ||
1165 | |||
1166 | rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff); | ||
1167 | if( rc!=SQLITE_OK ) return rc; | ||
1168 | jrnlOff += len; | ||
1169 | |||
1170 | put32bits(zBuf, len); | ||
1171 | put32bits(&zBuf[4], cksum); | ||
1172 | memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic)); | ||
1173 | rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff); | ||
1174 | pPager->needSync = !pPager->noSync; | ||
1175 | return rc; | ||
1176 | } | ||
1177 | |||
1178 | /* | ||
1179 | ** Add or remove a page from the list of all pages that are in the | ||
1180 | ** statement journal. | ||
1181 | ** | ||
1182 | ** The Pager keeps a separate list of pages that are currently in | ||
1183 | ** the statement journal. This helps the sqlite3PagerStmtCommit() | ||
1184 | ** routine run MUCH faster for the common case where there are many | ||
1185 | ** pages in memory but only a few are in the statement journal. | ||
1186 | */ | ||
1187 | static void page_add_to_stmt_list(PgHdr *pPg){ | ||
1188 | Pager *pPager = pPg->pPager; | ||
1189 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
1190 | assert( MEMDB ); | ||
1191 | if( !pHist->inStmt ){ | ||
1192 | assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 ); | ||
1193 | if( pPager->pStmt ){ | ||
1194 | PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg; | ||
1195 | } | ||
1196 | pHist->pNextStmt = pPager->pStmt; | ||
1197 | pPager->pStmt = pPg; | ||
1198 | pHist->inStmt = 1; | ||
1199 | } | ||
1200 | } | ||
1201 | |||
1202 | /* | ||
1203 | ** Find a page in the hash table given its page number. Return | ||
1204 | ** a pointer to the page or NULL if not found. | ||
1205 | */ | ||
1206 | static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ | ||
1207 | PgHdr *p; | ||
1208 | if( pPager->aHash==0 ) return 0; | ||
1209 | p = pPager->aHash[pgno & (pPager->nHash-1)]; | ||
1210 | while( p && p->pgno!=pgno ){ | ||
1211 | p = p->pNextHash; | ||
1212 | } | ||
1213 | return p; | ||
1214 | } | ||
1215 | |||
1216 | /* | ||
1217 | ** Clear the in-memory cache. This routine | ||
1218 | ** sets the state of the pager back to what it was when it was first | ||
1219 | ** opened. Any outstanding pages are invalidated and subsequent attempts | ||
1220 | ** to access those pages will likely result in a coredump. | ||
1221 | */ | ||
1222 | static void pager_reset(Pager *pPager){ | ||
1223 | PgHdr *pPg, *pNext; | ||
1224 | if( pPager->errCode ) return; | ||
1225 | for(pPg=pPager->pAll; pPg; pPg=pNext){ | ||
1226 | IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno)); | ||
1227 | PAGER_INCR(sqlite3_pager_pgfree_count); | ||
1228 | pNext = pPg->pNextAll; | ||
1229 | lruListRemove(pPg); | ||
1230 | sqlite3_free(pPg->pData); | ||
1231 | sqlite3_free(pPg); | ||
1232 | } | ||
1233 | assert(pPager->lru.pFirst==0); | ||
1234 | assert(pPager->lru.pFirstSynced==0); | ||
1235 | assert(pPager->lru.pLast==0); | ||
1236 | pPager->pStmt = 0; | ||
1237 | pPager->pAll = 0; | ||
1238 | pPager->pDirty = 0; | ||
1239 | pPager->nHash = 0; | ||
1240 | sqlite3_free(pPager->aHash); | ||
1241 | pPager->nPage = 0; | ||
1242 | pPager->aHash = 0; | ||
1243 | pPager->nRef = 0; | ||
1244 | } | ||
1245 | |||
1246 | /* | ||
1247 | ** Unlock the database file. | ||
1248 | ** | ||
1249 | ** If the pager is currently in error state, discard the contents of | ||
1250 | ** the cache and reset the Pager structure internal state. If there is | ||
1251 | ** an open journal-file, then the next time a shared-lock is obtained | ||
1252 | ** on the pager file (by this or any other process), it will be | ||
1253 | ** treated as a hot-journal and rolled back. | ||
1254 | */ | ||
1255 | static void pager_unlock(Pager *pPager){ | ||
1256 | if( !pPager->exclusiveMode ){ | ||
1257 | if( !MEMDB ){ | ||
1258 | if( pPager->fd->pMethods ){ | ||
1259 | osUnlock(pPager->fd, NO_LOCK); | ||
1260 | } | ||
1261 | pPager->dbSize = -1; | ||
1262 | IOTRACE(("UNLOCK %p\n", pPager)) | ||
1263 | |||
1264 | /* If Pager.errCode is set, the contents of the pager cache cannot be | ||
1265 | ** trusted. Now that the pager file is unlocked, the contents of the | ||
1266 | ** cache can be discarded and the error code safely cleared. | ||
1267 | */ | ||
1268 | if( pPager->errCode ){ | ||
1269 | pPager->errCode = SQLITE_OK; | ||
1270 | pager_reset(pPager); | ||
1271 | if( pPager->stmtOpen ){ | ||
1272 | sqlite3OsClose(pPager->stfd); | ||
1273 | sqlite3_free(pPager->aInStmt); | ||
1274 | pPager->aInStmt = 0; | ||
1275 | } | ||
1276 | if( pPager->journalOpen ){ | ||
1277 | sqlite3OsClose(pPager->jfd); | ||
1278 | pPager->journalOpen = 0; | ||
1279 | sqlite3_free(pPager->aInJournal); | ||
1280 | pPager->aInJournal = 0; | ||
1281 | } | ||
1282 | pPager->stmtOpen = 0; | ||
1283 | pPager->stmtInUse = 0; | ||
1284 | pPager->journalOff = 0; | ||
1285 | pPager->journalStarted = 0; | ||
1286 | pPager->stmtAutoopen = 0; | ||
1287 | pPager->origDbSize = 0; | ||
1288 | } | ||
1289 | } | ||
1290 | |||
1291 | if( !MEMDB || pPager->errCode==SQLITE_OK ){ | ||
1292 | pPager->state = PAGER_UNLOCK; | ||
1293 | pPager->changeCountDone = 0; | ||
1294 | } | ||
1295 | } | ||
1296 | } | ||
1297 | |||
1298 | /* | ||
1299 | ** Execute a rollback if a transaction is active and unlock the | ||
1300 | ** database file. If the pager has already entered the error state, | ||
1301 | ** do not attempt the rollback. | ||
1302 | */ | ||
1303 | static void pagerUnlockAndRollback(Pager *p){ | ||
1304 | assert( p->state>=PAGER_RESERVED || p->journalOpen==0 ); | ||
1305 | if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){ | ||
1306 | sqlite3PagerRollback(p); | ||
1307 | } | ||
1308 | pager_unlock(p); | ||
1309 | assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) ); | ||
1310 | assert( p->errCode || !p->stmtOpen || p->exclusiveMode ); | ||
1311 | } | ||
1312 | |||
1313 | /* | ||
1314 | ** This routine ends a transaction. A transaction is ended by either | ||
1315 | ** a COMMIT or a ROLLBACK. | ||
1316 | ** | ||
1317 | ** When this routine is called, the pager has the journal file open and | ||
1318 | ** a RESERVED or EXCLUSIVE lock on the database. This routine will release | ||
1319 | ** the database lock and acquires a SHARED lock in its place if that is | ||
1320 | ** the appropriate thing to do. Release locks usually is appropriate, | ||
1321 | ** unless we are in exclusive access mode or unless this is a | ||
1322 | ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation. | ||
1323 | ** | ||
1324 | ** The journal file is either deleted or truncated. | ||
1325 | ** | ||
1326 | ** TODO: Consider keeping the journal file open for temporary databases. | ||
1327 | ** This might give a performance improvement on windows where opening | ||
1328 | ** a file is an expensive operation. | ||
1329 | */ | ||
1330 | static int pager_end_transaction(Pager *pPager){ | ||
1331 | PgHdr *pPg; | ||
1332 | int rc = SQLITE_OK; | ||
1333 | int rc2 = SQLITE_OK; | ||
1334 | assert( !MEMDB ); | ||
1335 | if( pPager->state<PAGER_RESERVED ){ | ||
1336 | return SQLITE_OK; | ||
1337 | } | ||
1338 | sqlite3PagerStmtCommit(pPager); | ||
1339 | if( pPager->stmtOpen && !pPager->exclusiveMode ){ | ||
1340 | sqlite3OsClose(pPager->stfd); | ||
1341 | pPager->stmtOpen = 0; | ||
1342 | } | ||
1343 | if( pPager->journalOpen ){ | ||
1344 | if( pPager->exclusiveMode | ||
1345 | && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){; | ||
1346 | pPager->journalOff = 0; | ||
1347 | pPager->journalStarted = 0; | ||
1348 | }else{ | ||
1349 | sqlite3OsClose(pPager->jfd); | ||
1350 | pPager->journalOpen = 0; | ||
1351 | if( rc==SQLITE_OK ){ | ||
1352 | rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); | ||
1353 | } | ||
1354 | } | ||
1355 | sqlite3_free( pPager->aInJournal ); | ||
1356 | pPager->aInJournal = 0; | ||
1357 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
1358 | pPg->inJournal = 0; | ||
1359 | pPg->dirty = 0; | ||
1360 | pPg->needSync = 0; | ||
1361 | pPg->alwaysRollback = 0; | ||
1362 | #ifdef SQLITE_CHECK_PAGES | ||
1363 | pPg->pageHash = pager_pagehash(pPg); | ||
1364 | #endif | ||
1365 | } | ||
1366 | pPager->pDirty = 0; | ||
1367 | pPager->dirtyCache = 0; | ||
1368 | pPager->nRec = 0; | ||
1369 | }else{ | ||
1370 | assert( pPager->aInJournal==0 ); | ||
1371 | assert( pPager->dirtyCache==0 || pPager->useJournal==0 ); | ||
1372 | } | ||
1373 | |||
1374 | if( !pPager->exclusiveMode ){ | ||
1375 | rc2 = osUnlock(pPager->fd, SHARED_LOCK); | ||
1376 | pPager->state = PAGER_SHARED; | ||
1377 | }else if( pPager->state==PAGER_SYNCED ){ | ||
1378 | pPager->state = PAGER_EXCLUSIVE; | ||
1379 | } | ||
1380 | pPager->origDbSize = 0; | ||
1381 | pPager->setMaster = 0; | ||
1382 | pPager->needSync = 0; | ||
1383 | lruListSetFirstSynced(pPager); | ||
1384 | pPager->dbSize = -1; | ||
1385 | |||
1386 | return (rc==SQLITE_OK?rc2:rc); | ||
1387 | } | ||
1388 | |||
1389 | /* | ||
1390 | ** Compute and return a checksum for the page of data. | ||
1391 | ** | ||
1392 | ** This is not a real checksum. It is really just the sum of the | ||
1393 | ** random initial value and the page number. We experimented with | ||
1394 | ** a checksum of the entire data, but that was found to be too slow. | ||
1395 | ** | ||
1396 | ** Note that the page number is stored at the beginning of data and | ||
1397 | ** the checksum is stored at the end. This is important. If journal | ||
1398 | ** corruption occurs due to a power failure, the most likely scenario | ||
1399 | ** is that one end or the other of the record will be changed. It is | ||
1400 | ** much less likely that the two ends of the journal record will be | ||
1401 | ** correct and the middle be corrupt. Thus, this "checksum" scheme, | ||
1402 | ** though fast and simple, catches the mostly likely kind of corruption. | ||
1403 | ** | ||
1404 | ** FIX ME: Consider adding every 200th (or so) byte of the data to the | ||
1405 | ** checksum. That way if a single page spans 3 or more disk sectors and | ||
1406 | ** only the middle sector is corrupt, we will still have a reasonable | ||
1407 | ** chance of failing the checksum and thus detecting the problem. | ||
1408 | */ | ||
1409 | static u32 pager_cksum(Pager *pPager, const u8 *aData){ | ||
1410 | u32 cksum = pPager->cksumInit; | ||
1411 | int i = pPager->pageSize-200; | ||
1412 | while( i>0 ){ | ||
1413 | cksum += aData[i]; | ||
1414 | i -= 200; | ||
1415 | } | ||
1416 | return cksum; | ||
1417 | } | ||
1418 | |||
1419 | /* Forward declaration */ | ||
1420 | static void makeClean(PgHdr*); | ||
1421 | |||
1422 | /* | ||
1423 | ** Read a single page from the journal file opened on file descriptor | ||
1424 | ** jfd. Playback this one page. | ||
1425 | ** | ||
1426 | ** If useCksum==0 it means this journal does not use checksums. Checksums | ||
1427 | ** are not used in statement journals because statement journals do not | ||
1428 | ** need to survive power failures. | ||
1429 | */ | ||
1430 | static int pager_playback_one_page( | ||
1431 | Pager *pPager, | ||
1432 | sqlite3_file *jfd, | ||
1433 | i64 offset, | ||
1434 | int useCksum | ||
1435 | ){ | ||
1436 | int rc; | ||
1437 | PgHdr *pPg; /* An existing page in the cache */ | ||
1438 | Pgno pgno; /* The page number of a page in journal */ | ||
1439 | u32 cksum; /* Checksum used for sanity checking */ | ||
1440 | u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */ | ||
1441 | |||
1442 | /* useCksum should be true for the main journal and false for | ||
1443 | ** statement journals. Verify that this is always the case | ||
1444 | */ | ||
1445 | assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) ); | ||
1446 | assert( aData ); | ||
1447 | |||
1448 | rc = read32bits(jfd, offset, &pgno); | ||
1449 | if( rc!=SQLITE_OK ) return rc; | ||
1450 | rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4); | ||
1451 | if( rc!=SQLITE_OK ) return rc; | ||
1452 | pPager->journalOff += pPager->pageSize + 4; | ||
1453 | |||
1454 | /* Sanity checking on the page. This is more important that I originally | ||
1455 | ** thought. If a power failure occurs while the journal is being written, | ||
1456 | ** it could cause invalid data to be written into the journal. We need to | ||
1457 | ** detect this invalid data (with high probability) and ignore it. | ||
1458 | */ | ||
1459 | if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ | ||
1460 | return SQLITE_DONE; | ||
1461 | } | ||
1462 | if( pgno>(unsigned)pPager->dbSize ){ | ||
1463 | return SQLITE_OK; | ||
1464 | } | ||
1465 | if( useCksum ){ | ||
1466 | rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum); | ||
1467 | if( rc ) return rc; | ||
1468 | pPager->journalOff += 4; | ||
1469 | if( pager_cksum(pPager, aData)!=cksum ){ | ||
1470 | return SQLITE_DONE; | ||
1471 | } | ||
1472 | } | ||
1473 | |||
1474 | assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE ); | ||
1475 | |||
1476 | /* If the pager is in RESERVED state, then there must be a copy of this | ||
1477 | ** page in the pager cache. In this case just update the pager cache, | ||
1478 | ** not the database file. The page is left marked dirty in this case. | ||
1479 | ** | ||
1480 | ** An exception to the above rule: If the database is in no-sync mode | ||
1481 | ** and a page is moved during an incremental vacuum then the page may | ||
1482 | ** not be in the pager cache. Later: if a malloc() or IO error occurs | ||
1483 | ** during a Movepage() call, then the page may not be in the cache | ||
1484 | ** either. So the condition described in the above paragraph is not | ||
1485 | ** assert()able. | ||
1486 | ** | ||
1487 | ** If in EXCLUSIVE state, then we update the pager cache if it exists | ||
1488 | ** and the main file. The page is then marked not dirty. | ||
1489 | ** | ||
1490 | ** Ticket #1171: The statement journal might contain page content that is | ||
1491 | ** different from the page content at the start of the transaction. | ||
1492 | ** This occurs when a page is changed prior to the start of a statement | ||
1493 | ** then changed again within the statement. When rolling back such a | ||
1494 | ** statement we must not write to the original database unless we know | ||
1495 | ** for certain that original page contents are synced into the main rollback | ||
1496 | ** journal. Otherwise, a power loss might leave modified data in the | ||
1497 | ** database file without an entry in the rollback journal that can | ||
1498 | ** restore the database to its original form. Two conditions must be | ||
1499 | ** met before writing to the database files. (1) the database must be | ||
1500 | ** locked. (2) we know that the original page content is fully synced | ||
1501 | ** in the main journal either because the page is not in cache or else | ||
1502 | ** the page is marked as needSync==0. | ||
1503 | */ | ||
1504 | pPg = pager_lookup(pPager, pgno); | ||
1505 | PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n", | ||
1506 | PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData)); | ||
1507 | if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){ | ||
1508 | i64 offset = (pgno-1)*(i64)pPager->pageSize; | ||
1509 | rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, offset); | ||
1510 | if( pPg ){ | ||
1511 | makeClean(pPg); | ||
1512 | } | ||
1513 | } | ||
1514 | if( pPg ){ | ||
1515 | /* No page should ever be explicitly rolled back that is in use, except | ||
1516 | ** for page 1 which is held in use in order to keep the lock on the | ||
1517 | ** database active. However such a page may be rolled back as a result | ||
1518 | ** of an internal error resulting in an automatic call to | ||
1519 | ** sqlite3PagerRollback(). | ||
1520 | */ | ||
1521 | void *pData; | ||
1522 | /* assert( pPg->nRef==0 || pPg->pgno==1 ); */ | ||
1523 | pData = PGHDR_TO_DATA(pPg); | ||
1524 | memcpy(pData, aData, pPager->pageSize); | ||
1525 | if( pPager->xReiniter ){ | ||
1526 | pPager->xReiniter(pPg, pPager->pageSize); | ||
1527 | } | ||
1528 | #ifdef SQLITE_CHECK_PAGES | ||
1529 | pPg->pageHash = pager_pagehash(pPg); | ||
1530 | #endif | ||
1531 | /* If this was page 1, then restore the value of Pager.dbFileVers. | ||
1532 | ** Do this before any decoding. */ | ||
1533 | if( pgno==1 ){ | ||
1534 | memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); | ||
1535 | } | ||
1536 | |||
1537 | /* Decode the page just read from disk */ | ||
1538 | CODEC1(pPager, pData, pPg->pgno, 3); | ||
1539 | } | ||
1540 | return rc; | ||
1541 | } | ||
1542 | |||
1543 | /* | ||
1544 | ** Parameter zMaster is the name of a master journal file. A single journal | ||
1545 | ** file that referred to the master journal file has just been rolled back. | ||
1546 | ** This routine checks if it is possible to delete the master journal file, | ||
1547 | ** and does so if it is. | ||
1548 | ** | ||
1549 | ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not | ||
1550 | ** available for use within this function. | ||
1551 | ** | ||
1552 | ** | ||
1553 | ** The master journal file contains the names of all child journals. | ||
1554 | ** To tell if a master journal can be deleted, check to each of the | ||
1555 | ** children. If all children are either missing or do not refer to | ||
1556 | ** a different master journal, then this master journal can be deleted. | ||
1557 | */ | ||
1558 | static int pager_delmaster(Pager *pPager, const char *zMaster){ | ||
1559 | sqlite3_vfs *pVfs = pPager->pVfs; | ||
1560 | int rc; | ||
1561 | int master_open = 0; | ||
1562 | sqlite3_file *pMaster; | ||
1563 | sqlite3_file *pJournal; | ||
1564 | char *zMasterJournal = 0; /* Contents of master journal file */ | ||
1565 | i64 nMasterJournal; /* Size of master journal file */ | ||
1566 | |||
1567 | /* Open the master journal file exclusively in case some other process | ||
1568 | ** is running this routine also. Not that it makes too much difference. | ||
1569 | */ | ||
1570 | pMaster = (sqlite3_file *)sqlite3_malloc(pVfs->szOsFile * 2); | ||
1571 | pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile); | ||
1572 | if( !pMaster ){ | ||
1573 | rc = SQLITE_NOMEM; | ||
1574 | }else{ | ||
1575 | int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL); | ||
1576 | rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0); | ||
1577 | } | ||
1578 | if( rc!=SQLITE_OK ) goto delmaster_out; | ||
1579 | master_open = 1; | ||
1580 | |||
1581 | rc = sqlite3OsFileSize(pMaster, &nMasterJournal); | ||
1582 | if( rc!=SQLITE_OK ) goto delmaster_out; | ||
1583 | |||
1584 | if( nMasterJournal>0 ){ | ||
1585 | char *zJournal; | ||
1586 | char *zMasterPtr = 0; | ||
1587 | int nMasterPtr = pPager->pVfs->mxPathname+1; | ||
1588 | |||
1589 | /* Load the entire master journal file into space obtained from | ||
1590 | ** sqlite3_malloc() and pointed to by zMasterJournal. | ||
1591 | */ | ||
1592 | zMasterJournal = (char *)sqlite3_malloc(nMasterJournal + nMasterPtr); | ||
1593 | if( !zMasterJournal ){ | ||
1594 | rc = SQLITE_NOMEM; | ||
1595 | goto delmaster_out; | ||
1596 | } | ||
1597 | zMasterPtr = &zMasterJournal[nMasterJournal]; | ||
1598 | rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0); | ||
1599 | if( rc!=SQLITE_OK ) goto delmaster_out; | ||
1600 | |||
1601 | zJournal = zMasterJournal; | ||
1602 | while( (zJournal-zMasterJournal)<nMasterJournal ){ | ||
1603 | if( sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS) ){ | ||
1604 | /* One of the journals pointed to by the master journal exists. | ||
1605 | ** Open it and check if it points at the master journal. If | ||
1606 | ** so, return without deleting the master journal file. | ||
1607 | */ | ||
1608 | int c; | ||
1609 | int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL); | ||
1610 | rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0); | ||
1611 | if( rc!=SQLITE_OK ){ | ||
1612 | goto delmaster_out; | ||
1613 | } | ||
1614 | |||
1615 | rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr); | ||
1616 | sqlite3OsClose(pJournal); | ||
1617 | if( rc!=SQLITE_OK ){ | ||
1618 | goto delmaster_out; | ||
1619 | } | ||
1620 | |||
1621 | c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0; | ||
1622 | if( c ){ | ||
1623 | /* We have a match. Do not delete the master journal file. */ | ||
1624 | goto delmaster_out; | ||
1625 | } | ||
1626 | } | ||
1627 | zJournal += (strlen(zJournal)+1); | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1631 | rc = sqlite3OsDelete(pVfs, zMaster, 0); | ||
1632 | |||
1633 | delmaster_out: | ||
1634 | if( zMasterJournal ){ | ||
1635 | sqlite3_free(zMasterJournal); | ||
1636 | } | ||
1637 | if( master_open ){ | ||
1638 | sqlite3OsClose(pMaster); | ||
1639 | } | ||
1640 | sqlite3_free(pMaster); | ||
1641 | return rc; | ||
1642 | } | ||
1643 | |||
1644 | |||
1645 | static void pager_truncate_cache(Pager *pPager); | ||
1646 | |||
1647 | /* | ||
1648 | ** Truncate the main file of the given pager to the number of pages | ||
1649 | ** indicated. Also truncate the cached representation of the file. | ||
1650 | */ | ||
1651 | static int pager_truncate(Pager *pPager, int nPage){ | ||
1652 | int rc = SQLITE_OK; | ||
1653 | if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){ | ||
1654 | rc = sqlite3OsTruncate(pPager->fd, pPager->pageSize*(i64)nPage); | ||
1655 | } | ||
1656 | if( rc==SQLITE_OK ){ | ||
1657 | pPager->dbSize = nPage; | ||
1658 | pager_truncate_cache(pPager); | ||
1659 | } | ||
1660 | return rc; | ||
1661 | } | ||
1662 | |||
1663 | /* | ||
1664 | ** Set the sectorSize for the given pager. | ||
1665 | ** | ||
1666 | ** The sector size is the larger of the sector size reported | ||
1667 | ** by sqlite3OsSectorSize() and the pageSize. | ||
1668 | */ | ||
1669 | static void setSectorSize(Pager *pPager){ | ||
1670 | assert(pPager->fd->pMethods||pPager->tempFile); | ||
1671 | if( !pPager->tempFile ){ | ||
1672 | /* Sector size doesn't matter for temporary files. Also, the file | ||
1673 | ** may not have been opened yet, in whcih case the OsSectorSize() | ||
1674 | ** call will segfault. | ||
1675 | */ | ||
1676 | pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); | ||
1677 | } | ||
1678 | if( pPager->sectorSize<pPager->pageSize ){ | ||
1679 | pPager->sectorSize = pPager->pageSize; | ||
1680 | } | ||
1681 | } | ||
1682 | |||
1683 | /* | ||
1684 | ** Playback the journal and thus restore the database file to | ||
1685 | ** the state it was in before we started making changes. | ||
1686 | ** | ||
1687 | ** The journal file format is as follows: | ||
1688 | ** | ||
1689 | ** (1) 8 byte prefix. A copy of aJournalMagic[]. | ||
1690 | ** (2) 4 byte big-endian integer which is the number of valid page records | ||
1691 | ** in the journal. If this value is 0xffffffff, then compute the | ||
1692 | ** number of page records from the journal size. | ||
1693 | ** (3) 4 byte big-endian integer which is the initial value for the | ||
1694 | ** sanity checksum. | ||
1695 | ** (4) 4 byte integer which is the number of pages to truncate the | ||
1696 | ** database to during a rollback. | ||
1697 | ** (5) 4 byte integer which is the number of bytes in the master journal | ||
1698 | ** name. The value may be zero (indicate that there is no master | ||
1699 | ** journal.) | ||
1700 | ** (6) N bytes of the master journal name. The name will be nul-terminated | ||
1701 | ** and might be shorter than the value read from (5). If the first byte | ||
1702 | ** of the name is \000 then there is no master journal. The master | ||
1703 | ** journal name is stored in UTF-8. | ||
1704 | ** (7) Zero or more pages instances, each as follows: | ||
1705 | ** + 4 byte page number. | ||
1706 | ** + pPager->pageSize bytes of data. | ||
1707 | ** + 4 byte checksum | ||
1708 | ** | ||
1709 | ** When we speak of the journal header, we mean the first 6 items above. | ||
1710 | ** Each entry in the journal is an instance of the 7th item. | ||
1711 | ** | ||
1712 | ** Call the value from the second bullet "nRec". nRec is the number of | ||
1713 | ** valid page entries in the journal. In most cases, you can compute the | ||
1714 | ** value of nRec from the size of the journal file. But if a power | ||
1715 | ** failure occurred while the journal was being written, it could be the | ||
1716 | ** case that the size of the journal file had already been increased but | ||
1717 | ** the extra entries had not yet made it safely to disk. In such a case, | ||
1718 | ** the value of nRec computed from the file size would be too large. For | ||
1719 | ** that reason, we always use the nRec value in the header. | ||
1720 | ** | ||
1721 | ** If the nRec value is 0xffffffff it means that nRec should be computed | ||
1722 | ** from the file size. This value is used when the user selects the | ||
1723 | ** no-sync option for the journal. A power failure could lead to corruption | ||
1724 | ** in this case. But for things like temporary table (which will be | ||
1725 | ** deleted when the power is restored) we don't care. | ||
1726 | ** | ||
1727 | ** If the file opened as the journal file is not a well-formed | ||
1728 | ** journal file then all pages up to the first corrupted page are rolled | ||
1729 | ** back (or no pages if the journal header is corrupted). The journal file | ||
1730 | ** is then deleted and SQLITE_OK returned, just as if no corruption had | ||
1731 | ** been encountered. | ||
1732 | ** | ||
1733 | ** If an I/O or malloc() error occurs, the journal-file is not deleted | ||
1734 | ** and an error code is returned. | ||
1735 | */ | ||
1736 | static int pager_playback(Pager *pPager, int isHot){ | ||
1737 | sqlite3_vfs *pVfs = pPager->pVfs; | ||
1738 | i64 szJ; /* Size of the journal file in bytes */ | ||
1739 | u32 nRec; /* Number of Records in the journal */ | ||
1740 | int i; /* Loop counter */ | ||
1741 | Pgno mxPg = 0; /* Size of the original file in pages */ | ||
1742 | int rc; /* Result code of a subroutine */ | ||
1743 | char *zMaster = 0; /* Name of master journal file if any */ | ||
1744 | |||
1745 | /* Figure out how many records are in the journal. Abort early if | ||
1746 | ** the journal is empty. | ||
1747 | */ | ||
1748 | assert( pPager->journalOpen ); | ||
1749 | rc = sqlite3OsFileSize(pPager->jfd, &szJ); | ||
1750 | if( rc!=SQLITE_OK || szJ==0 ){ | ||
1751 | goto end_playback; | ||
1752 | } | ||
1753 | |||
1754 | /* Read the master journal name from the journal, if it is present. | ||
1755 | ** If a master journal file name is specified, but the file is not | ||
1756 | ** present on disk, then the journal is not hot and does not need to be | ||
1757 | ** played back. | ||
1758 | */ | ||
1759 | zMaster = pPager->pTmpSpace; | ||
1760 | rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); | ||
1761 | assert( rc!=SQLITE_DONE ); | ||
1762 | if( rc!=SQLITE_OK | ||
1763 | || (zMaster[0] && !sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS)) | ||
1764 | ){ | ||
1765 | zMaster = 0; | ||
1766 | if( rc==SQLITE_DONE ) rc = SQLITE_OK; | ||
1767 | goto end_playback; | ||
1768 | } | ||
1769 | pPager->journalOff = 0; | ||
1770 | zMaster = 0; | ||
1771 | |||
1772 | /* This loop terminates either when the readJournalHdr() call returns | ||
1773 | ** SQLITE_DONE or an IO error occurs. */ | ||
1774 | while( 1 ){ | ||
1775 | |||
1776 | /* Read the next journal header from the journal file. If there are | ||
1777 | ** not enough bytes left in the journal file for a complete header, or | ||
1778 | ** it is corrupted, then a process must of failed while writing it. | ||
1779 | ** This indicates nothing more needs to be rolled back. | ||
1780 | */ | ||
1781 | rc = readJournalHdr(pPager, szJ, &nRec, &mxPg); | ||
1782 | if( rc!=SQLITE_OK ){ | ||
1783 | if( rc==SQLITE_DONE ){ | ||
1784 | rc = SQLITE_OK; | ||
1785 | } | ||
1786 | goto end_playback; | ||
1787 | } | ||
1788 | |||
1789 | /* If nRec is 0xffffffff, then this journal was created by a process | ||
1790 | ** working in no-sync mode. This means that the rest of the journal | ||
1791 | ** file consists of pages, there are no more journal headers. Compute | ||
1792 | ** the value of nRec based on this assumption. | ||
1793 | */ | ||
1794 | if( nRec==0xffffffff ){ | ||
1795 | assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); | ||
1796 | nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager); | ||
1797 | } | ||
1798 | |||
1799 | /* If nRec is 0 and this rollback is of a transaction created by this | ||
1800 | ** process and if this is the final header in the journal, then it means | ||
1801 | ** that this part of the journal was being filled but has not yet been | ||
1802 | ** synced to disk. Compute the number of pages based on the remaining | ||
1803 | ** size of the file. | ||
1804 | ** | ||
1805 | ** The third term of the test was added to fix ticket #2565. | ||
1806 | */ | ||
1807 | if( nRec==0 && !isHot && | ||
1808 | pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ | ||
1809 | nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager); | ||
1810 | } | ||
1811 | |||
1812 | /* If this is the first header read from the journal, truncate the | ||
1813 | ** database file back to it's original size. | ||
1814 | */ | ||
1815 | if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ | ||
1816 | rc = pager_truncate(pPager, mxPg); | ||
1817 | if( rc!=SQLITE_OK ){ | ||
1818 | goto end_playback; | ||
1819 | } | ||
1820 | } | ||
1821 | |||
1822 | /* Copy original pages out of the journal and back into the database file. | ||
1823 | */ | ||
1824 | for(i=0; i<nRec; i++){ | ||
1825 | rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); | ||
1826 | if( rc!=SQLITE_OK ){ | ||
1827 | if( rc==SQLITE_DONE ){ | ||
1828 | rc = SQLITE_OK; | ||
1829 | pPager->journalOff = szJ; | ||
1830 | break; | ||
1831 | }else{ | ||
1832 | goto end_playback; | ||
1833 | } | ||
1834 | } | ||
1835 | } | ||
1836 | } | ||
1837 | /*NOTREACHED*/ | ||
1838 | assert( 0 ); | ||
1839 | |||
1840 | end_playback: | ||
1841 | if( rc==SQLITE_OK ){ | ||
1842 | zMaster = pPager->pTmpSpace; | ||
1843 | rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); | ||
1844 | } | ||
1845 | if( rc==SQLITE_OK ){ | ||
1846 | rc = pager_end_transaction(pPager); | ||
1847 | } | ||
1848 | if( rc==SQLITE_OK && zMaster[0] ){ | ||
1849 | /* If there was a master journal and this routine will return success, | ||
1850 | ** see if it is possible to delete the master journal. | ||
1851 | */ | ||
1852 | rc = pager_delmaster(pPager, zMaster); | ||
1853 | } | ||
1854 | |||
1855 | /* The Pager.sectorSize variable may have been updated while rolling | ||
1856 | ** back a journal created by a process with a different sector size | ||
1857 | ** value. Reset it to the correct value for this process. | ||
1858 | */ | ||
1859 | setSectorSize(pPager); | ||
1860 | return rc; | ||
1861 | } | ||
1862 | |||
1863 | /* | ||
1864 | ** Playback the statement journal. | ||
1865 | ** | ||
1866 | ** This is similar to playing back the transaction journal but with | ||
1867 | ** a few extra twists. | ||
1868 | ** | ||
1869 | ** (1) The number of pages in the database file at the start of | ||
1870 | ** the statement is stored in pPager->stmtSize, not in the | ||
1871 | ** journal file itself. | ||
1872 | ** | ||
1873 | ** (2) In addition to playing back the statement journal, also | ||
1874 | ** playback all pages of the transaction journal beginning | ||
1875 | ** at offset pPager->stmtJSize. | ||
1876 | */ | ||
1877 | static int pager_stmt_playback(Pager *pPager){ | ||
1878 | i64 szJ; /* Size of the full journal */ | ||
1879 | i64 hdrOff; | ||
1880 | int nRec; /* Number of Records */ | ||
1881 | int i; /* Loop counter */ | ||
1882 | int rc; | ||
1883 | |||
1884 | szJ = pPager->journalOff; | ||
1885 | #ifndef NDEBUG | ||
1886 | { | ||
1887 | i64 os_szJ; | ||
1888 | rc = sqlite3OsFileSize(pPager->jfd, &os_szJ); | ||
1889 | if( rc!=SQLITE_OK ) return rc; | ||
1890 | assert( szJ==os_szJ ); | ||
1891 | } | ||
1892 | #endif | ||
1893 | |||
1894 | /* Set hdrOff to be the offset just after the end of the last journal | ||
1895 | ** page written before the first journal-header for this statement | ||
1896 | ** transaction was written, or the end of the file if no journal | ||
1897 | ** header was written. | ||
1898 | */ | ||
1899 | hdrOff = pPager->stmtHdrOff; | ||
1900 | assert( pPager->fullSync || !hdrOff ); | ||
1901 | if( !hdrOff ){ | ||
1902 | hdrOff = szJ; | ||
1903 | } | ||
1904 | |||
1905 | /* Truncate the database back to its original size. | ||
1906 | */ | ||
1907 | rc = pager_truncate(pPager, pPager->stmtSize); | ||
1908 | assert( pPager->state>=PAGER_SHARED ); | ||
1909 | |||
1910 | /* Figure out how many records are in the statement journal. | ||
1911 | */ | ||
1912 | assert( pPager->stmtInUse && pPager->journalOpen ); | ||
1913 | nRec = pPager->stmtNRec; | ||
1914 | |||
1915 | /* Copy original pages out of the statement journal and back into the | ||
1916 | ** database file. Note that the statement journal omits checksums from | ||
1917 | ** each record since power-failure recovery is not important to statement | ||
1918 | ** journals. | ||
1919 | */ | ||
1920 | for(i=0; i<nRec; i++){ | ||
1921 | i64 offset = i*(4+pPager->pageSize); | ||
1922 | rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0); | ||
1923 | assert( rc!=SQLITE_DONE ); | ||
1924 | if( rc!=SQLITE_OK ) goto end_stmt_playback; | ||
1925 | } | ||
1926 | |||
1927 | /* Now roll some pages back from the transaction journal. Pager.stmtJSize | ||
1928 | ** was the size of the journal file when this statement was started, so | ||
1929 | ** everything after that needs to be rolled back, either into the | ||
1930 | ** database, the memory cache, or both. | ||
1931 | ** | ||
1932 | ** If it is not zero, then Pager.stmtHdrOff is the offset to the start | ||
1933 | ** of the first journal header written during this statement transaction. | ||
1934 | */ | ||
1935 | pPager->journalOff = pPager->stmtJSize; | ||
1936 | pPager->cksumInit = pPager->stmtCksum; | ||
1937 | while( pPager->journalOff < hdrOff ){ | ||
1938 | rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); | ||
1939 | assert( rc!=SQLITE_DONE ); | ||
1940 | if( rc!=SQLITE_OK ) goto end_stmt_playback; | ||
1941 | } | ||
1942 | |||
1943 | while( pPager->journalOff < szJ ){ | ||
1944 | u32 nJRec; /* Number of Journal Records */ | ||
1945 | u32 dummy; | ||
1946 | rc = readJournalHdr(pPager, szJ, &nJRec, &dummy); | ||
1947 | if( rc!=SQLITE_OK ){ | ||
1948 | assert( rc!=SQLITE_DONE ); | ||
1949 | goto end_stmt_playback; | ||
1950 | } | ||
1951 | if( nJRec==0 ){ | ||
1952 | nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8); | ||
1953 | } | ||
1954 | for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){ | ||
1955 | rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1); | ||
1956 | assert( rc!=SQLITE_DONE ); | ||
1957 | if( rc!=SQLITE_OK ) goto end_stmt_playback; | ||
1958 | } | ||
1959 | } | ||
1960 | |||
1961 | pPager->journalOff = szJ; | ||
1962 | |||
1963 | end_stmt_playback: | ||
1964 | if( rc==SQLITE_OK) { | ||
1965 | pPager->journalOff = szJ; | ||
1966 | /* pager_reload_cache(pPager); */ | ||
1967 | } | ||
1968 | return rc; | ||
1969 | } | ||
1970 | |||
1971 | /* | ||
1972 | ** Change the maximum number of in-memory pages that are allowed. | ||
1973 | */ | ||
1974 | void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ | ||
1975 | if( mxPage>10 ){ | ||
1976 | pPager->mxPage = mxPage; | ||
1977 | }else{ | ||
1978 | pPager->mxPage = 10; | ||
1979 | } | ||
1980 | } | ||
1981 | |||
1982 | /* | ||
1983 | ** Adjust the robustness of the database to damage due to OS crashes | ||
1984 | ** or power failures by changing the number of syncs()s when writing | ||
1985 | ** the rollback journal. There are three levels: | ||
1986 | ** | ||
1987 | ** OFF sqlite3OsSync() is never called. This is the default | ||
1988 | ** for temporary and transient files. | ||
1989 | ** | ||
1990 | ** NORMAL The journal is synced once before writes begin on the | ||
1991 | ** database. This is normally adequate protection, but | ||
1992 | ** it is theoretically possible, though very unlikely, | ||
1993 | ** that an inopertune power failure could leave the journal | ||
1994 | ** in a state which would cause damage to the database | ||
1995 | ** when it is rolled back. | ||
1996 | ** | ||
1997 | ** FULL The journal is synced twice before writes begin on the | ||
1998 | ** database (with some additional information - the nRec field | ||
1999 | ** of the journal header - being written in between the two | ||
2000 | ** syncs). If we assume that writing a | ||
2001 | ** single disk sector is atomic, then this mode provides | ||
2002 | ** assurance that the journal will not be corrupted to the | ||
2003 | ** point of causing damage to the database during rollback. | ||
2004 | ** | ||
2005 | ** Numeric values associated with these states are OFF==1, NORMAL=2, | ||
2006 | ** and FULL=3. | ||
2007 | */ | ||
2008 | #ifndef SQLITE_OMIT_PAGER_PRAGMAS | ||
2009 | void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){ | ||
2010 | pPager->noSync = level==1 || pPager->tempFile; | ||
2011 | pPager->fullSync = level==3 && !pPager->tempFile; | ||
2012 | pPager->sync_flags = (full_fsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL); | ||
2013 | if( pPager->noSync ) pPager->needSync = 0; | ||
2014 | } | ||
2015 | #endif | ||
2016 | |||
2017 | /* | ||
2018 | ** The following global variable is incremented whenever the library | ||
2019 | ** attempts to open a temporary file. This information is used for | ||
2020 | ** testing and analysis only. | ||
2021 | */ | ||
2022 | #ifdef SQLITE_TEST | ||
2023 | int sqlite3_opentemp_count = 0; | ||
2024 | #endif | ||
2025 | |||
2026 | /* | ||
2027 | ** Open a temporary file. | ||
2028 | ** | ||
2029 | ** Write the file descriptor into *fd. Return SQLITE_OK on success or some | ||
2030 | ** other error code if we fail. The OS will automatically delete the temporary | ||
2031 | ** file when it is closed. | ||
2032 | */ | ||
2033 | static int sqlite3PagerOpentemp( | ||
2034 | sqlite3_vfs *pVfs, /* The virtual file system layer */ | ||
2035 | sqlite3_file *pFile, /* Write the file descriptor here */ | ||
2036 | char *zFilename, /* Name of the file. Might be NULL */ | ||
2037 | int vfsFlags /* Flags passed through to the VFS */ | ||
2038 | ){ | ||
2039 | int rc; | ||
2040 | assert( zFilename!=0 ); | ||
2041 | |||
2042 | #ifdef SQLITE_TEST | ||
2043 | sqlite3_opentemp_count++; /* Used for testing and analysis only */ | ||
2044 | #endif | ||
2045 | |||
2046 | vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | | ||
2047 | SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; | ||
2048 | rc = sqlite3OsOpen(pVfs, zFilename, pFile, vfsFlags, 0); | ||
2049 | assert( rc!=SQLITE_OK || pFile->pMethods ); | ||
2050 | return rc; | ||
2051 | } | ||
2052 | |||
2053 | /* | ||
2054 | ** Create a new page cache and put a pointer to the page cache in *ppPager. | ||
2055 | ** The file to be cached need not exist. The file is not locked until | ||
2056 | ** the first call to sqlite3PagerGet() and is only held open until the | ||
2057 | ** last page is released using sqlite3PagerUnref(). | ||
2058 | ** | ||
2059 | ** If zFilename is NULL then a randomly-named temporary file is created | ||
2060 | ** and used as the file to be cached. The file will be deleted | ||
2061 | ** automatically when it is closed. | ||
2062 | ** | ||
2063 | ** If zFilename is ":memory:" then all information is held in cache. | ||
2064 | ** It is never written to disk. This can be used to implement an | ||
2065 | ** in-memory database. | ||
2066 | */ | ||
2067 | int sqlite3PagerOpen( | ||
2068 | sqlite3_vfs *pVfs, /* The virtual file system to use */ | ||
2069 | Pager **ppPager, /* Return the Pager structure here */ | ||
2070 | const char *zFilename, /* Name of the database file to open */ | ||
2071 | int nExtra, /* Extra bytes append to each in-memory page */ | ||
2072 | int flags, /* flags controlling this file */ | ||
2073 | int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */ | ||
2074 | ){ | ||
2075 | u8 *pPtr; | ||
2076 | Pager *pPager = 0; | ||
2077 | int rc = SQLITE_OK; | ||
2078 | int i; | ||
2079 | int tempFile = 0; | ||
2080 | int memDb = 0; | ||
2081 | int readOnly = 0; | ||
2082 | int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; | ||
2083 | int noReadlock = (flags & PAGER_NO_READLOCK)!=0; | ||
2084 | int journalFileSize = sqlite3JournalSize(pVfs); | ||
2085 | int nDefaultPage = SQLITE_DEFAULT_PAGE_SIZE; | ||
2086 | char *zPathname; | ||
2087 | int nPathname; | ||
2088 | |||
2089 | /* The default return is a NULL pointer */ | ||
2090 | *ppPager = 0; | ||
2091 | |||
2092 | /* Compute the full pathname */ | ||
2093 | nPathname = pVfs->mxPathname+1; | ||
2094 | zPathname = sqlite3_malloc(nPathname); | ||
2095 | if( zPathname==0 ){ | ||
2096 | return SQLITE_NOMEM; | ||
2097 | } | ||
2098 | if( zFilename && zFilename[0] ){ | ||
2099 | #ifndef SQLITE_OMIT_MEMORYDB | ||
2100 | if( strcmp(zFilename,":memory:")==0 ){ | ||
2101 | memDb = 1; | ||
2102 | zPathname[0] = 0; | ||
2103 | }else | ||
2104 | #endif | ||
2105 | { | ||
2106 | rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); | ||
2107 | } | ||
2108 | }else{ | ||
2109 | rc = sqlite3OsGetTempname(pVfs, nPathname, zPathname); | ||
2110 | } | ||
2111 | if( rc!=SQLITE_OK ){ | ||
2112 | sqlite3_free(zPathname); | ||
2113 | return rc; | ||
2114 | } | ||
2115 | nPathname = strlen(zPathname); | ||
2116 | |||
2117 | /* Allocate memory for the pager structure */ | ||
2118 | pPager = sqlite3MallocZero( | ||
2119 | sizeof(*pPager) + /* Pager structure */ | ||
2120 | journalFileSize + /* The journal file structure */ | ||
2121 | pVfs->szOsFile * 2 + /* The db and stmt journal files */ | ||
2122 | 4*nPathname + 40 /* zFilename, zDirectory, zJournal, zStmtJrnl */ | ||
2123 | ); | ||
2124 | if( !pPager ){ | ||
2125 | sqlite3_free(zPathname); | ||
2126 | return SQLITE_NOMEM; | ||
2127 | } | ||
2128 | pPtr = (u8 *)&pPager[1]; | ||
2129 | pPager->vfsFlags = vfsFlags; | ||
2130 | pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0]; | ||
2131 | pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1]; | ||
2132 | pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2]; | ||
2133 | pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize]; | ||
2134 | pPager->zDirectory = &pPager->zFilename[nPathname+1]; | ||
2135 | pPager->zJournal = &pPager->zDirectory[nPathname+1]; | ||
2136 | pPager->zStmtJrnl = &pPager->zJournal[nPathname+10]; | ||
2137 | pPager->pVfs = pVfs; | ||
2138 | memcpy(pPager->zFilename, zPathname, nPathname+1); | ||
2139 | sqlite3_free(zPathname); | ||
2140 | |||
2141 | /* Open the pager file. | ||
2142 | */ | ||
2143 | if( zFilename && zFilename[0] && !memDb ){ | ||
2144 | if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){ | ||
2145 | rc = SQLITE_CANTOPEN; | ||
2146 | }else{ | ||
2147 | int fout = 0; | ||
2148 | rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, | ||
2149 | pPager->vfsFlags, &fout); | ||
2150 | readOnly = (fout&SQLITE_OPEN_READONLY); | ||
2151 | |||
2152 | /* If the file was successfully opened for read/write access, | ||
2153 | ** choose a default page size in case we have to create the | ||
2154 | ** database file. The default page size is the maximum of: | ||
2155 | ** | ||
2156 | ** + SQLITE_DEFAULT_PAGE_SIZE, | ||
2157 | ** + The value returned by sqlite3OsSectorSize() | ||
2158 | ** + The largest page size that can be written atomically. | ||
2159 | */ | ||
2160 | if( rc==SQLITE_OK && !readOnly ){ | ||
2161 | int iSectorSize = sqlite3OsSectorSize(pPager->fd); | ||
2162 | if( nDefaultPage<iSectorSize ){ | ||
2163 | nDefaultPage = iSectorSize; | ||
2164 | } | ||
2165 | #ifdef SQLITE_ENABLE_ATOMIC_WRITE | ||
2166 | { | ||
2167 | int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); | ||
2168 | int ii; | ||
2169 | assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); | ||
2170 | assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); | ||
2171 | assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); | ||
2172 | for(ii=nDefaultPage; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ | ||
2173 | if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) nDefaultPage = ii; | ||
2174 | } | ||
2175 | } | ||
2176 | #endif | ||
2177 | if( nDefaultPage>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ | ||
2178 | nDefaultPage = SQLITE_MAX_DEFAULT_PAGE_SIZE; | ||
2179 | } | ||
2180 | } | ||
2181 | } | ||
2182 | }else if( !memDb ){ | ||
2183 | /* If a temporary file is requested, it is not opened immediately. | ||
2184 | ** In this case we accept the default page size and delay actually | ||
2185 | ** opening the file until the first call to OsWrite(). | ||
2186 | */ | ||
2187 | tempFile = 1; | ||
2188 | pPager->state = PAGER_EXCLUSIVE; | ||
2189 | } | ||
2190 | |||
2191 | if( pPager && rc==SQLITE_OK ){ | ||
2192 | pPager->pTmpSpace = (char *)sqlite3_malloc(nDefaultPage); | ||
2193 | } | ||
2194 | |||
2195 | /* If an error occured in either of the blocks above. | ||
2196 | ** Free the Pager structure and close the file. | ||
2197 | ** Since the pager is not allocated there is no need to set | ||
2198 | ** any Pager.errMask variables. | ||
2199 | */ | ||
2200 | if( !pPager || !pPager->pTmpSpace ){ | ||
2201 | sqlite3OsClose(pPager->fd); | ||
2202 | sqlite3_free(pPager); | ||
2203 | return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc); | ||
2204 | } | ||
2205 | |||
2206 | PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename); | ||
2207 | IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) | ||
2208 | |||
2209 | /* Fill in Pager.zDirectory[] */ | ||
2210 | memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1); | ||
2211 | for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){} | ||
2212 | if( i>0 ) pPager->zDirectory[i-1] = 0; | ||
2213 | |||
2214 | /* Fill in Pager.zJournal[] and Pager.zStmtJrnl[] */ | ||
2215 | memcpy(pPager->zJournal, pPager->zFilename, nPathname); | ||
2216 | memcpy(&pPager->zJournal[nPathname], "-journal", 9); | ||
2217 | memcpy(pPager->zStmtJrnl, pPager->zFilename, nPathname); | ||
2218 | memcpy(&pPager->zStmtJrnl[nPathname], "-stmtjrnl", 10); | ||
2219 | |||
2220 | /* pPager->journalOpen = 0; */ | ||
2221 | pPager->useJournal = useJournal && !memDb; | ||
2222 | pPager->noReadlock = noReadlock && readOnly; | ||
2223 | /* pPager->stmtOpen = 0; */ | ||
2224 | /* pPager->stmtInUse = 0; */ | ||
2225 | /* pPager->nRef = 0; */ | ||
2226 | pPager->dbSize = memDb-1; | ||
2227 | pPager->pageSize = nDefaultPage; | ||
2228 | /* pPager->stmtSize = 0; */ | ||
2229 | /* pPager->stmtJSize = 0; */ | ||
2230 | /* pPager->nPage = 0; */ | ||
2231 | pPager->mxPage = 100; | ||
2232 | pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; | ||
2233 | /* pPager->state = PAGER_UNLOCK; */ | ||
2234 | assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) ); | ||
2235 | /* pPager->errMask = 0; */ | ||
2236 | pPager->tempFile = tempFile; | ||
2237 | assert( tempFile==PAGER_LOCKINGMODE_NORMAL | ||
2238 | || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); | ||
2239 | assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); | ||
2240 | pPager->exclusiveMode = tempFile; | ||
2241 | pPager->memDb = memDb; | ||
2242 | pPager->readOnly = readOnly; | ||
2243 | /* pPager->needSync = 0; */ | ||
2244 | pPager->noSync = pPager->tempFile || !useJournal; | ||
2245 | pPager->fullSync = (pPager->noSync?0:1); | ||
2246 | pPager->sync_flags = SQLITE_SYNC_NORMAL; | ||
2247 | /* pPager->pFirst = 0; */ | ||
2248 | /* pPager->pFirstSynced = 0; */ | ||
2249 | /* pPager->pLast = 0; */ | ||
2250 | pPager->nExtra = FORCE_ALIGNMENT(nExtra); | ||
2251 | assert(pPager->fd->pMethods||memDb||tempFile); | ||
2252 | if( !memDb ){ | ||
2253 | setSectorSize(pPager); | ||
2254 | } | ||
2255 | /* pPager->pBusyHandler = 0; */ | ||
2256 | /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ | ||
2257 | *ppPager = pPager; | ||
2258 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
2259 | pPager->iInUseMM = 0; | ||
2260 | pPager->iInUseDB = 0; | ||
2261 | if( !memDb ){ | ||
2262 | sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2); | ||
2263 | sqlite3_mutex_enter(mutex); | ||
2264 | pPager->pNext = sqlite3PagerList; | ||
2265 | if( sqlite3PagerList ){ | ||
2266 | assert( sqlite3PagerList->pPrev==0 ); | ||
2267 | sqlite3PagerList->pPrev = pPager; | ||
2268 | } | ||
2269 | pPager->pPrev = 0; | ||
2270 | sqlite3PagerList = pPager; | ||
2271 | sqlite3_mutex_leave(mutex); | ||
2272 | } | ||
2273 | #endif | ||
2274 | return SQLITE_OK; | ||
2275 | } | ||
2276 | |||
2277 | /* | ||
2278 | ** Set the busy handler function. | ||
2279 | */ | ||
2280 | void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){ | ||
2281 | pPager->pBusyHandler = pBusyHandler; | ||
2282 | } | ||
2283 | |||
2284 | /* | ||
2285 | ** Set the destructor for this pager. If not NULL, the destructor is called | ||
2286 | ** when the reference count on each page reaches zero. The destructor can | ||
2287 | ** be used to clean up information in the extra segment appended to each page. | ||
2288 | ** | ||
2289 | ** The destructor is not called as a result sqlite3PagerClose(). | ||
2290 | ** Destructors are only called by sqlite3PagerUnref(). | ||
2291 | */ | ||
2292 | void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){ | ||
2293 | pPager->xDestructor = xDesc; | ||
2294 | } | ||
2295 | |||
2296 | /* | ||
2297 | ** Set the reinitializer for this pager. If not NULL, the reinitializer | ||
2298 | ** is called when the content of a page in cache is restored to its original | ||
2299 | ** value as a result of a rollback. The callback gives higher-level code | ||
2300 | ** an opportunity to restore the EXTRA section to agree with the restored | ||
2301 | ** page data. | ||
2302 | */ | ||
2303 | void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){ | ||
2304 | pPager->xReiniter = xReinit; | ||
2305 | } | ||
2306 | |||
2307 | /* | ||
2308 | ** Set the page size to *pPageSize. If the suggest new page size is | ||
2309 | ** inappropriate, then an alternative page size is set to that | ||
2310 | ** value before returning. | ||
2311 | */ | ||
2312 | int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){ | ||
2313 | int rc = SQLITE_OK; | ||
2314 | u16 pageSize = *pPageSize; | ||
2315 | assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); | ||
2316 | if( pageSize && pageSize!=pPager->pageSize | ||
2317 | && !pPager->memDb && pPager->nRef==0 | ||
2318 | ){ | ||
2319 | char *pNew = (char *)sqlite3_malloc(pageSize); | ||
2320 | if( !pNew ){ | ||
2321 | rc = SQLITE_NOMEM; | ||
2322 | }else{ | ||
2323 | pagerEnter(pPager); | ||
2324 | pager_reset(pPager); | ||
2325 | pPager->pageSize = pageSize; | ||
2326 | setSectorSize(pPager); | ||
2327 | sqlite3_free(pPager->pTmpSpace); | ||
2328 | pPager->pTmpSpace = pNew; | ||
2329 | pagerLeave(pPager); | ||
2330 | } | ||
2331 | } | ||
2332 | *pPageSize = pPager->pageSize; | ||
2333 | return rc; | ||
2334 | } | ||
2335 | |||
2336 | /* | ||
2337 | ** Attempt to set the maximum database page count if mxPage is positive. | ||
2338 | ** Make no changes if mxPage is zero or negative. And never reduce the | ||
2339 | ** maximum page count below the current size of the database. | ||
2340 | ** | ||
2341 | ** Regardless of mxPage, return the current maximum page count. | ||
2342 | */ | ||
2343 | int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){ | ||
2344 | if( mxPage>0 ){ | ||
2345 | pPager->mxPgno = mxPage; | ||
2346 | } | ||
2347 | sqlite3PagerPagecount(pPager); | ||
2348 | return pPager->mxPgno; | ||
2349 | } | ||
2350 | |||
2351 | /* | ||
2352 | ** The following set of routines are used to disable the simulated | ||
2353 | ** I/O error mechanism. These routines are used to avoid simulated | ||
2354 | ** errors in places where we do not care about errors. | ||
2355 | ** | ||
2356 | ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops | ||
2357 | ** and generate no code. | ||
2358 | */ | ||
2359 | #ifdef SQLITE_TEST | ||
2360 | extern int sqlite3_io_error_pending; | ||
2361 | extern int sqlite3_io_error_hit; | ||
2362 | static int saved_cnt; | ||
2363 | void disable_simulated_io_errors(void){ | ||
2364 | saved_cnt = sqlite3_io_error_pending; | ||
2365 | sqlite3_io_error_pending = -1; | ||
2366 | } | ||
2367 | void enable_simulated_io_errors(void){ | ||
2368 | sqlite3_io_error_pending = saved_cnt; | ||
2369 | } | ||
2370 | #else | ||
2371 | # define disable_simulated_io_errors() | ||
2372 | # define enable_simulated_io_errors() | ||
2373 | #endif | ||
2374 | |||
2375 | /* | ||
2376 | ** Read the first N bytes from the beginning of the file into memory | ||
2377 | ** that pDest points to. | ||
2378 | ** | ||
2379 | ** No error checking is done. The rational for this is that this function | ||
2380 | ** may be called even if the file does not exist or contain a header. In | ||
2381 | ** these cases sqlite3OsRead() will return an error, to which the correct | ||
2382 | ** response is to zero the memory at pDest and continue. A real IO error | ||
2383 | ** will presumably recur and be picked up later (Todo: Think about this). | ||
2384 | */ | ||
2385 | int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ | ||
2386 | int rc = SQLITE_OK; | ||
2387 | memset(pDest, 0, N); | ||
2388 | assert(MEMDB||pPager->fd->pMethods||pPager->tempFile); | ||
2389 | if( pPager->fd->pMethods ){ | ||
2390 | IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) | ||
2391 | rc = sqlite3OsRead(pPager->fd, pDest, N, 0); | ||
2392 | if( rc==SQLITE_IOERR_SHORT_READ ){ | ||
2393 | rc = SQLITE_OK; | ||
2394 | } | ||
2395 | } | ||
2396 | return rc; | ||
2397 | } | ||
2398 | |||
2399 | /* | ||
2400 | ** Return the total number of pages in the disk file associated with | ||
2401 | ** pPager. | ||
2402 | ** | ||
2403 | ** If the PENDING_BYTE lies on the page directly after the end of the | ||
2404 | ** file, then consider this page part of the file too. For example, if | ||
2405 | ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the | ||
2406 | ** file is 4096 bytes, 5 is returned instead of 4. | ||
2407 | */ | ||
2408 | int sqlite3PagerPagecount(Pager *pPager){ | ||
2409 | i64 n = 0; | ||
2410 | int rc; | ||
2411 | assert( pPager!=0 ); | ||
2412 | if( pPager->errCode ){ | ||
2413 | return 0; | ||
2414 | } | ||
2415 | if( pPager->dbSize>=0 ){ | ||
2416 | n = pPager->dbSize; | ||
2417 | } else { | ||
2418 | assert(pPager->fd->pMethods||pPager->tempFile); | ||
2419 | if( (pPager->fd->pMethods) | ||
2420 | && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){ | ||
2421 | pPager->nRef++; | ||
2422 | pager_error(pPager, rc); | ||
2423 | pPager->nRef--; | ||
2424 | return 0; | ||
2425 | } | ||
2426 | if( n>0 && n<pPager->pageSize ){ | ||
2427 | n = 1; | ||
2428 | }else{ | ||
2429 | n /= pPager->pageSize; | ||
2430 | } | ||
2431 | if( pPager->state!=PAGER_UNLOCK ){ | ||
2432 | pPager->dbSize = n; | ||
2433 | } | ||
2434 | } | ||
2435 | if( n==(PENDING_BYTE/pPager->pageSize) ){ | ||
2436 | n++; | ||
2437 | } | ||
2438 | if( n>pPager->mxPgno ){ | ||
2439 | pPager->mxPgno = n; | ||
2440 | } | ||
2441 | return n; | ||
2442 | } | ||
2443 | |||
2444 | |||
2445 | #ifndef SQLITE_OMIT_MEMORYDB | ||
2446 | /* | ||
2447 | ** Clear a PgHistory block | ||
2448 | */ | ||
2449 | static void clearHistory(PgHistory *pHist){ | ||
2450 | sqlite3_free(pHist->pOrig); | ||
2451 | sqlite3_free(pHist->pStmt); | ||
2452 | pHist->pOrig = 0; | ||
2453 | pHist->pStmt = 0; | ||
2454 | } | ||
2455 | #else | ||
2456 | #define clearHistory(x) | ||
2457 | #endif | ||
2458 | |||
2459 | /* | ||
2460 | ** Forward declaration | ||
2461 | */ | ||
2462 | static int syncJournal(Pager*); | ||
2463 | |||
2464 | /* | ||
2465 | ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate | ||
2466 | ** that the page is not part of any hash chain. This is required because the | ||
2467 | ** sqlite3PagerMovepage() routine can leave a page in the | ||
2468 | ** pNextFree/pPrevFree list that is not a part of any hash-chain. | ||
2469 | */ | ||
2470 | static void unlinkHashChain(Pager *pPager, PgHdr *pPg){ | ||
2471 | if( pPg->pgno==0 ){ | ||
2472 | assert( pPg->pNextHash==0 && pPg->pPrevHash==0 ); | ||
2473 | return; | ||
2474 | } | ||
2475 | if( pPg->pNextHash ){ | ||
2476 | pPg->pNextHash->pPrevHash = pPg->pPrevHash; | ||
2477 | } | ||
2478 | if( pPg->pPrevHash ){ | ||
2479 | assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg ); | ||
2480 | pPg->pPrevHash->pNextHash = pPg->pNextHash; | ||
2481 | }else{ | ||
2482 | int h = pPg->pgno & (pPager->nHash-1); | ||
2483 | pPager->aHash[h] = pPg->pNextHash; | ||
2484 | } | ||
2485 | if( MEMDB ){ | ||
2486 | clearHistory(PGHDR_TO_HIST(pPg, pPager)); | ||
2487 | } | ||
2488 | pPg->pgno = 0; | ||
2489 | pPg->pNextHash = pPg->pPrevHash = 0; | ||
2490 | } | ||
2491 | |||
2492 | /* | ||
2493 | ** Unlink a page from the free list (the list of all pages where nRef==0) | ||
2494 | ** and from its hash collision chain. | ||
2495 | */ | ||
2496 | static void unlinkPage(PgHdr *pPg){ | ||
2497 | Pager *pPager = pPg->pPager; | ||
2498 | |||
2499 | /* Unlink from free page list */ | ||
2500 | lruListRemove(pPg); | ||
2501 | |||
2502 | /* Unlink from the pgno hash table */ | ||
2503 | unlinkHashChain(pPager, pPg); | ||
2504 | } | ||
2505 | |||
2506 | /* | ||
2507 | ** This routine is used to truncate the cache when a database | ||
2508 | ** is truncated. Drop from the cache all pages whose pgno is | ||
2509 | ** larger than pPager->dbSize and is unreferenced. | ||
2510 | ** | ||
2511 | ** Referenced pages larger than pPager->dbSize are zeroed. | ||
2512 | ** | ||
2513 | ** Actually, at the point this routine is called, it would be | ||
2514 | ** an error to have a referenced page. But rather than delete | ||
2515 | ** that page and guarantee a subsequent segfault, it seems better | ||
2516 | ** to zero it and hope that we error out sanely. | ||
2517 | */ | ||
2518 | static void pager_truncate_cache(Pager *pPager){ | ||
2519 | PgHdr *pPg; | ||
2520 | PgHdr **ppPg; | ||
2521 | int dbSize = pPager->dbSize; | ||
2522 | |||
2523 | ppPg = &pPager->pAll; | ||
2524 | while( (pPg = *ppPg)!=0 ){ | ||
2525 | if( pPg->pgno<=dbSize ){ | ||
2526 | ppPg = &pPg->pNextAll; | ||
2527 | }else if( pPg->nRef>0 ){ | ||
2528 | memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); | ||
2529 | ppPg = &pPg->pNextAll; | ||
2530 | }else{ | ||
2531 | *ppPg = pPg->pNextAll; | ||
2532 | IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno)); | ||
2533 | PAGER_INCR(sqlite3_pager_pgfree_count); | ||
2534 | unlinkPage(pPg); | ||
2535 | makeClean(pPg); | ||
2536 | sqlite3_free(pPg->pData); | ||
2537 | sqlite3_free(pPg); | ||
2538 | pPager->nPage--; | ||
2539 | } | ||
2540 | } | ||
2541 | } | ||
2542 | |||
2543 | /* | ||
2544 | ** Try to obtain a lock on a file. Invoke the busy callback if the lock | ||
2545 | ** is currently not available. Repeat until the busy callback returns | ||
2546 | ** false or until the lock succeeds. | ||
2547 | ** | ||
2548 | ** Return SQLITE_OK on success and an error code if we cannot obtain | ||
2549 | ** the lock. | ||
2550 | */ | ||
2551 | static int pager_wait_on_lock(Pager *pPager, int locktype){ | ||
2552 | int rc; | ||
2553 | |||
2554 | /* The OS lock values must be the same as the Pager lock values */ | ||
2555 | assert( PAGER_SHARED==SHARED_LOCK ); | ||
2556 | assert( PAGER_RESERVED==RESERVED_LOCK ); | ||
2557 | assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK ); | ||
2558 | |||
2559 | /* If the file is currently unlocked then the size must be unknown */ | ||
2560 | assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB ); | ||
2561 | |||
2562 | if( pPager->state>=locktype ){ | ||
2563 | rc = SQLITE_OK; | ||
2564 | }else{ | ||
2565 | do { | ||
2566 | rc = sqlite3OsLock(pPager->fd, locktype); | ||
2567 | }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) ); | ||
2568 | if( rc==SQLITE_OK ){ | ||
2569 | pPager->state = locktype; | ||
2570 | IOTRACE(("LOCK %p %d\n", pPager, locktype)) | ||
2571 | } | ||
2572 | } | ||
2573 | return rc; | ||
2574 | } | ||
2575 | |||
2576 | /* | ||
2577 | ** Truncate the file to the number of pages specified. | ||
2578 | */ | ||
2579 | int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){ | ||
2580 | int rc; | ||
2581 | assert( pPager->state>=PAGER_SHARED || MEMDB ); | ||
2582 | sqlite3PagerPagecount(pPager); | ||
2583 | if( pPager->errCode ){ | ||
2584 | rc = pPager->errCode; | ||
2585 | return rc; | ||
2586 | } | ||
2587 | if( nPage>=(unsigned)pPager->dbSize ){ | ||
2588 | return SQLITE_OK; | ||
2589 | } | ||
2590 | if( MEMDB ){ | ||
2591 | pPager->dbSize = nPage; | ||
2592 | pager_truncate_cache(pPager); | ||
2593 | return SQLITE_OK; | ||
2594 | } | ||
2595 | pagerEnter(pPager); | ||
2596 | rc = syncJournal(pPager); | ||
2597 | pagerLeave(pPager); | ||
2598 | if( rc!=SQLITE_OK ){ | ||
2599 | return rc; | ||
2600 | } | ||
2601 | |||
2602 | /* Get an exclusive lock on the database before truncating. */ | ||
2603 | pagerEnter(pPager); | ||
2604 | rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); | ||
2605 | pagerLeave(pPager); | ||
2606 | if( rc!=SQLITE_OK ){ | ||
2607 | return rc; | ||
2608 | } | ||
2609 | |||
2610 | rc = pager_truncate(pPager, nPage); | ||
2611 | return rc; | ||
2612 | } | ||
2613 | |||
2614 | /* | ||
2615 | ** Shutdown the page cache. Free all memory and close all files. | ||
2616 | ** | ||
2617 | ** If a transaction was in progress when this routine is called, that | ||
2618 | ** transaction is rolled back. All outstanding pages are invalidated | ||
2619 | ** and their memory is freed. Any attempt to use a page associated | ||
2620 | ** with this page cache after this function returns will likely | ||
2621 | ** result in a coredump. | ||
2622 | ** | ||
2623 | ** This function always succeeds. If a transaction is active an attempt | ||
2624 | ** is made to roll it back. If an error occurs during the rollback | ||
2625 | ** a hot journal may be left in the filesystem but no error is returned | ||
2626 | ** to the caller. | ||
2627 | */ | ||
2628 | int sqlite3PagerClose(Pager *pPager){ | ||
2629 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
2630 | if( !MEMDB ){ | ||
2631 | sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2); | ||
2632 | sqlite3_mutex_enter(mutex); | ||
2633 | if( pPager->pPrev ){ | ||
2634 | pPager->pPrev->pNext = pPager->pNext; | ||
2635 | }else{ | ||
2636 | sqlite3PagerList = pPager->pNext; | ||
2637 | } | ||
2638 | if( pPager->pNext ){ | ||
2639 | pPager->pNext->pPrev = pPager->pPrev; | ||
2640 | } | ||
2641 | sqlite3_mutex_leave(mutex); | ||
2642 | } | ||
2643 | #endif | ||
2644 | |||
2645 | disable_simulated_io_errors(); | ||
2646 | pPager->errCode = 0; | ||
2647 | pPager->exclusiveMode = 0; | ||
2648 | pager_reset(pPager); | ||
2649 | pagerUnlockAndRollback(pPager); | ||
2650 | enable_simulated_io_errors(); | ||
2651 | PAGERTRACE2("CLOSE %d\n", PAGERID(pPager)); | ||
2652 | IOTRACE(("CLOSE %p\n", pPager)) | ||
2653 | assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) ); | ||
2654 | if( pPager->journalOpen ){ | ||
2655 | sqlite3OsClose(pPager->jfd); | ||
2656 | } | ||
2657 | sqlite3_free(pPager->aInJournal); | ||
2658 | if( pPager->stmtOpen ){ | ||
2659 | sqlite3OsClose(pPager->stfd); | ||
2660 | } | ||
2661 | sqlite3OsClose(pPager->fd); | ||
2662 | /* Temp files are automatically deleted by the OS | ||
2663 | ** if( pPager->tempFile ){ | ||
2664 | ** sqlite3OsDelete(pPager->zFilename); | ||
2665 | ** } | ||
2666 | */ | ||
2667 | |||
2668 | sqlite3_free(pPager->aHash); | ||
2669 | sqlite3_free(pPager->pTmpSpace); | ||
2670 | sqlite3_free(pPager); | ||
2671 | return SQLITE_OK; | ||
2672 | } | ||
2673 | |||
2674 | #if !defined(NDEBUG) || defined(SQLITE_TEST) | ||
2675 | /* | ||
2676 | ** Return the page number for the given page data. | ||
2677 | */ | ||
2678 | Pgno sqlite3PagerPagenumber(DbPage *p){ | ||
2679 | return p->pgno; | ||
2680 | } | ||
2681 | #endif | ||
2682 | |||
2683 | /* | ||
2684 | ** The page_ref() function increments the reference count for a page. | ||
2685 | ** If the page is currently on the freelist (the reference count is zero) then | ||
2686 | ** remove it from the freelist. | ||
2687 | ** | ||
2688 | ** For non-test systems, page_ref() is a macro that calls _page_ref() | ||
2689 | ** online of the reference count is zero. For test systems, page_ref() | ||
2690 | ** is a real function so that we can set breakpoints and trace it. | ||
2691 | */ | ||
2692 | static void _page_ref(PgHdr *pPg){ | ||
2693 | if( pPg->nRef==0 ){ | ||
2694 | /* The page is currently on the freelist. Remove it. */ | ||
2695 | lruListRemove(pPg); | ||
2696 | pPg->pPager->nRef++; | ||
2697 | } | ||
2698 | pPg->nRef++; | ||
2699 | REFINFO(pPg); | ||
2700 | } | ||
2701 | #ifdef SQLITE_DEBUG | ||
2702 | static void page_ref(PgHdr *pPg){ | ||
2703 | if( pPg->nRef==0 ){ | ||
2704 | _page_ref(pPg); | ||
2705 | }else{ | ||
2706 | pPg->nRef++; | ||
2707 | REFINFO(pPg); | ||
2708 | } | ||
2709 | } | ||
2710 | #else | ||
2711 | # define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++) | ||
2712 | #endif | ||
2713 | |||
2714 | /* | ||
2715 | ** Increment the reference count for a page. The input pointer is | ||
2716 | ** a reference to the page data. | ||
2717 | */ | ||
2718 | int sqlite3PagerRef(DbPage *pPg){ | ||
2719 | pagerEnter(pPg->pPager); | ||
2720 | page_ref(pPg); | ||
2721 | pagerLeave(pPg->pPager); | ||
2722 | return SQLITE_OK; | ||
2723 | } | ||
2724 | |||
2725 | /* | ||
2726 | ** Sync the journal. In other words, make sure all the pages that have | ||
2727 | ** been written to the journal have actually reached the surface of the | ||
2728 | ** disk. It is not safe to modify the original database file until after | ||
2729 | ** the journal has been synced. If the original database is modified before | ||
2730 | ** the journal is synced and a power failure occurs, the unsynced journal | ||
2731 | ** data would be lost and we would be unable to completely rollback the | ||
2732 | ** database changes. Database corruption would occur. | ||
2733 | ** | ||
2734 | ** This routine also updates the nRec field in the header of the journal. | ||
2735 | ** (See comments on the pager_playback() routine for additional information.) | ||
2736 | ** If the sync mode is FULL, two syncs will occur. First the whole journal | ||
2737 | ** is synced, then the nRec field is updated, then a second sync occurs. | ||
2738 | ** | ||
2739 | ** For temporary databases, we do not care if we are able to rollback | ||
2740 | ** after a power failure, so no sync occurs. | ||
2741 | ** | ||
2742 | ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which | ||
2743 | ** the database is stored, then OsSync() is never called on the journal | ||
2744 | ** file. In this case all that is required is to update the nRec field in | ||
2745 | ** the journal header. | ||
2746 | ** | ||
2747 | ** This routine clears the needSync field of every page current held in | ||
2748 | ** memory. | ||
2749 | */ | ||
2750 | static int syncJournal(Pager *pPager){ | ||
2751 | PgHdr *pPg; | ||
2752 | int rc = SQLITE_OK; | ||
2753 | |||
2754 | |||
2755 | /* Sync the journal before modifying the main database | ||
2756 | ** (assuming there is a journal and it needs to be synced.) | ||
2757 | */ | ||
2758 | if( pPager->needSync ){ | ||
2759 | if( !pPager->tempFile ){ | ||
2760 | int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); | ||
2761 | assert( pPager->journalOpen ); | ||
2762 | |||
2763 | /* assert( !pPager->noSync ); // noSync might be set if synchronous | ||
2764 | ** was turned off after the transaction was started. Ticket #615 */ | ||
2765 | #ifndef NDEBUG | ||
2766 | { | ||
2767 | /* Make sure the pPager->nRec counter we are keeping agrees | ||
2768 | ** with the nRec computed from the size of the journal file. | ||
2769 | */ | ||
2770 | i64 jSz; | ||
2771 | rc = sqlite3OsFileSize(pPager->jfd, &jSz); | ||
2772 | if( rc!=0 ) return rc; | ||
2773 | assert( pPager->journalOff==jSz ); | ||
2774 | } | ||
2775 | #endif | ||
2776 | if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ | ||
2777 | /* Write the nRec value into the journal file header. If in | ||
2778 | ** full-synchronous mode, sync the journal first. This ensures that | ||
2779 | ** all data has really hit the disk before nRec is updated to mark | ||
2780 | ** it as a candidate for rollback. | ||
2781 | ** | ||
2782 | ** This is not required if the persistent media supports the | ||
2783 | ** SAFE_APPEND property. Because in this case it is not possible | ||
2784 | ** for garbage data to be appended to the file, the nRec field | ||
2785 | ** is populated with 0xFFFFFFFF when the journal header is written | ||
2786 | ** and never needs to be updated. | ||
2787 | */ | ||
2788 | i64 jrnlOff; | ||
2789 | if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ | ||
2790 | PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager)); | ||
2791 | IOTRACE(("JSYNC %p\n", pPager)) | ||
2792 | rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags); | ||
2793 | if( rc!=0 ) return rc; | ||
2794 | } | ||
2795 | |||
2796 | jrnlOff = pPager->journalHdr + sizeof(aJournalMagic); | ||
2797 | IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4)); | ||
2798 | rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec); | ||
2799 | if( rc ) return rc; | ||
2800 | } | ||
2801 | if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ | ||
2802 | PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager)); | ||
2803 | IOTRACE(("JSYNC %p\n", pPager)) | ||
2804 | rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| | ||
2805 | (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) | ||
2806 | ); | ||
2807 | if( rc!=0 ) return rc; | ||
2808 | } | ||
2809 | pPager->journalStarted = 1; | ||
2810 | } | ||
2811 | pPager->needSync = 0; | ||
2812 | |||
2813 | /* Erase the needSync flag from every page. | ||
2814 | */ | ||
2815 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
2816 | pPg->needSync = 0; | ||
2817 | } | ||
2818 | lruListSetFirstSynced(pPager); | ||
2819 | } | ||
2820 | |||
2821 | #ifndef NDEBUG | ||
2822 | /* If the Pager.needSync flag is clear then the PgHdr.needSync | ||
2823 | ** flag must also be clear for all pages. Verify that this | ||
2824 | ** invariant is true. | ||
2825 | */ | ||
2826 | else{ | ||
2827 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
2828 | assert( pPg->needSync==0 ); | ||
2829 | } | ||
2830 | assert( pPager->lru.pFirstSynced==pPager->lru.pFirst ); | ||
2831 | } | ||
2832 | #endif | ||
2833 | |||
2834 | return rc; | ||
2835 | } | ||
2836 | |||
2837 | /* | ||
2838 | ** Merge two lists of pages connected by pDirty and in pgno order. | ||
2839 | ** Do not both fixing the pPrevDirty pointers. | ||
2840 | */ | ||
2841 | static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){ | ||
2842 | PgHdr result, *pTail; | ||
2843 | pTail = &result; | ||
2844 | while( pA && pB ){ | ||
2845 | if( pA->pgno<pB->pgno ){ | ||
2846 | pTail->pDirty = pA; | ||
2847 | pTail = pA; | ||
2848 | pA = pA->pDirty; | ||
2849 | }else{ | ||
2850 | pTail->pDirty = pB; | ||
2851 | pTail = pB; | ||
2852 | pB = pB->pDirty; | ||
2853 | } | ||
2854 | } | ||
2855 | if( pA ){ | ||
2856 | pTail->pDirty = pA; | ||
2857 | }else if( pB ){ | ||
2858 | pTail->pDirty = pB; | ||
2859 | }else{ | ||
2860 | pTail->pDirty = 0; | ||
2861 | } | ||
2862 | return result.pDirty; | ||
2863 | } | ||
2864 | |||
2865 | /* | ||
2866 | ** Sort the list of pages in accending order by pgno. Pages are | ||
2867 | ** connected by pDirty pointers. The pPrevDirty pointers are | ||
2868 | ** corrupted by this sort. | ||
2869 | */ | ||
2870 | #define N_SORT_BUCKET_ALLOC 25 | ||
2871 | #define N_SORT_BUCKET 25 | ||
2872 | #ifdef SQLITE_TEST | ||
2873 | int sqlite3_pager_n_sort_bucket = 0; | ||
2874 | #undef N_SORT_BUCKET | ||
2875 | #define N_SORT_BUCKET \ | ||
2876 | (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC) | ||
2877 | #endif | ||
2878 | static PgHdr *sort_pagelist(PgHdr *pIn){ | ||
2879 | PgHdr *a[N_SORT_BUCKET_ALLOC], *p; | ||
2880 | int i; | ||
2881 | memset(a, 0, sizeof(a)); | ||
2882 | while( pIn ){ | ||
2883 | p = pIn; | ||
2884 | pIn = p->pDirty; | ||
2885 | p->pDirty = 0; | ||
2886 | for(i=0; i<N_SORT_BUCKET-1; i++){ | ||
2887 | if( a[i]==0 ){ | ||
2888 | a[i] = p; | ||
2889 | break; | ||
2890 | }else{ | ||
2891 | p = merge_pagelist(a[i], p); | ||
2892 | a[i] = 0; | ||
2893 | } | ||
2894 | } | ||
2895 | if( i==N_SORT_BUCKET-1 ){ | ||
2896 | /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET) | ||
2897 | ** elements in the input list. This is possible, but impractical. | ||
2898 | ** Testing this line is the point of global variable | ||
2899 | ** sqlite3_pager_n_sort_bucket. | ||
2900 | */ | ||
2901 | a[i] = merge_pagelist(a[i], p); | ||
2902 | } | ||
2903 | } | ||
2904 | p = a[0]; | ||
2905 | for(i=1; i<N_SORT_BUCKET; i++){ | ||
2906 | p = merge_pagelist(p, a[i]); | ||
2907 | } | ||
2908 | return p; | ||
2909 | } | ||
2910 | |||
2911 | /* | ||
2912 | ** Given a list of pages (connected by the PgHdr.pDirty pointer) write | ||
2913 | ** every one of those pages out to the database file and mark them all | ||
2914 | ** as clean. | ||
2915 | */ | ||
2916 | static int pager_write_pagelist(PgHdr *pList){ | ||
2917 | Pager *pPager; | ||
2918 | PgHdr *p; | ||
2919 | int rc; | ||
2920 | |||
2921 | if( pList==0 ) return SQLITE_OK; | ||
2922 | pPager = pList->pPager; | ||
2923 | |||
2924 | /* At this point there may be either a RESERVED or EXCLUSIVE lock on the | ||
2925 | ** database file. If there is already an EXCLUSIVE lock, the following | ||
2926 | ** calls to sqlite3OsLock() are no-ops. | ||
2927 | ** | ||
2928 | ** Moving the lock from RESERVED to EXCLUSIVE actually involves going | ||
2929 | ** through an intermediate state PENDING. A PENDING lock prevents new | ||
2930 | ** readers from attaching to the database but is unsufficient for us to | ||
2931 | ** write. The idea of a PENDING lock is to prevent new readers from | ||
2932 | ** coming in while we wait for existing readers to clear. | ||
2933 | ** | ||
2934 | ** While the pager is in the RESERVED state, the original database file | ||
2935 | ** is unchanged and we can rollback without having to playback the | ||
2936 | ** journal into the original database file. Once we transition to | ||
2937 | ** EXCLUSIVE, it means the database file has been changed and any rollback | ||
2938 | ** will require a journal playback. | ||
2939 | */ | ||
2940 | rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); | ||
2941 | if( rc!=SQLITE_OK ){ | ||
2942 | return rc; | ||
2943 | } | ||
2944 | |||
2945 | pList = sort_pagelist(pList); | ||
2946 | for(p=pList; p; p=p->pDirty){ | ||
2947 | assert( p->dirty ); | ||
2948 | p->dirty = 0; | ||
2949 | } | ||
2950 | while( pList ){ | ||
2951 | |||
2952 | /* If the file has not yet been opened, open it now. */ | ||
2953 | if( !pPager->fd->pMethods ){ | ||
2954 | assert(pPager->tempFile); | ||
2955 | rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->fd, pPager->zFilename, | ||
2956 | pPager->vfsFlags); | ||
2957 | if( rc ) return rc; | ||
2958 | } | ||
2959 | |||
2960 | /* If there are dirty pages in the page cache with page numbers greater | ||
2961 | ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to | ||
2962 | ** make the file smaller (presumably by auto-vacuum code). Do not write | ||
2963 | ** any such pages to the file. | ||
2964 | */ | ||
2965 | if( pList->pgno<=pPager->dbSize ){ | ||
2966 | i64 offset = (pList->pgno-1)*(i64)pPager->pageSize; | ||
2967 | char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6); | ||
2968 | PAGERTRACE4("STORE %d page %d hash(%08x)\n", | ||
2969 | PAGERID(pPager), pList->pgno, pager_pagehash(pList)); | ||
2970 | IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno)); | ||
2971 | rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); | ||
2972 | PAGER_INCR(sqlite3_pager_writedb_count); | ||
2973 | PAGER_INCR(pPager->nWrite); | ||
2974 | if( pList->pgno==1 ){ | ||
2975 | memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); | ||
2976 | } | ||
2977 | } | ||
2978 | #ifndef NDEBUG | ||
2979 | else{ | ||
2980 | PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno); | ||
2981 | } | ||
2982 | #endif | ||
2983 | if( rc ) return rc; | ||
2984 | #ifdef SQLITE_CHECK_PAGES | ||
2985 | pList->pageHash = pager_pagehash(pList); | ||
2986 | #endif | ||
2987 | pList = pList->pDirty; | ||
2988 | } | ||
2989 | return SQLITE_OK; | ||
2990 | } | ||
2991 | |||
2992 | /* | ||
2993 | ** Collect every dirty page into a dirty list and | ||
2994 | ** return a pointer to the head of that list. All pages are | ||
2995 | ** collected even if they are still in use. | ||
2996 | */ | ||
2997 | static PgHdr *pager_get_all_dirty_pages(Pager *pPager){ | ||
2998 | return pPager->pDirty; | ||
2999 | } | ||
3000 | |||
3001 | /* | ||
3002 | ** Return TRUE if there is a hot journal on the given pager. | ||
3003 | ** A hot journal is one that needs to be played back. | ||
3004 | ** | ||
3005 | ** If the current size of the database file is 0 but a journal file | ||
3006 | ** exists, that is probably an old journal left over from a prior | ||
3007 | ** database with the same name. Just delete the journal. | ||
3008 | */ | ||
3009 | static int hasHotJournal(Pager *pPager){ | ||
3010 | sqlite3_vfs *pVfs = pPager->pVfs; | ||
3011 | if( !pPager->useJournal ) return 0; | ||
3012 | if( !sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){ | ||
3013 | return 0; | ||
3014 | } | ||
3015 | if( sqlite3OsCheckReservedLock(pPager->fd) ){ | ||
3016 | return 0; | ||
3017 | } | ||
3018 | if( sqlite3PagerPagecount(pPager)==0 ){ | ||
3019 | sqlite3OsDelete(pVfs, pPager->zJournal, 0); | ||
3020 | return 0; | ||
3021 | }else{ | ||
3022 | return 1; | ||
3023 | } | ||
3024 | } | ||
3025 | |||
3026 | /* | ||
3027 | ** Try to find a page in the cache that can be recycled. | ||
3028 | ** | ||
3029 | ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It | ||
3030 | ** does not set the pPager->errCode variable. | ||
3031 | */ | ||
3032 | static int pager_recycle(Pager *pPager, PgHdr **ppPg){ | ||
3033 | PgHdr *pPg; | ||
3034 | *ppPg = 0; | ||
3035 | |||
3036 | /* It is illegal to call this function unless the pager object | ||
3037 | ** pointed to by pPager has at least one free page (page with nRef==0). | ||
3038 | */ | ||
3039 | assert(!MEMDB); | ||
3040 | assert(pPager->lru.pFirst); | ||
3041 | |||
3042 | /* Find a page to recycle. Try to locate a page that does not | ||
3043 | ** require us to do an fsync() on the journal. | ||
3044 | */ | ||
3045 | pPg = pPager->lru.pFirstSynced; | ||
3046 | |||
3047 | /* If we could not find a page that does not require an fsync() | ||
3048 | ** on the journal file then fsync the journal file. This is a | ||
3049 | ** very slow operation, so we work hard to avoid it. But sometimes | ||
3050 | ** it can't be helped. | ||
3051 | */ | ||
3052 | if( pPg==0 && pPager->lru.pFirst){ | ||
3053 | int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); | ||
3054 | int rc = syncJournal(pPager); | ||
3055 | if( rc!=0 ){ | ||
3056 | return rc; | ||
3057 | } | ||
3058 | if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ | ||
3059 | /* If in full-sync mode, write a new journal header into the | ||
3060 | ** journal file. This is done to avoid ever modifying a journal | ||
3061 | ** header that is involved in the rollback of pages that have | ||
3062 | ** already been written to the database (in case the header is | ||
3063 | ** trashed when the nRec field is updated). | ||
3064 | */ | ||
3065 | pPager->nRec = 0; | ||
3066 | assert( pPager->journalOff > 0 ); | ||
3067 | assert( pPager->doNotSync==0 ); | ||
3068 | rc = writeJournalHdr(pPager); | ||
3069 | if( rc!=0 ){ | ||
3070 | return rc; | ||
3071 | } | ||
3072 | } | ||
3073 | pPg = pPager->lru.pFirst; | ||
3074 | } | ||
3075 | |||
3076 | assert( pPg->nRef==0 ); | ||
3077 | |||
3078 | /* Write the page to the database file if it is dirty. | ||
3079 | */ | ||
3080 | if( pPg->dirty ){ | ||
3081 | int rc; | ||
3082 | assert( pPg->needSync==0 ); | ||
3083 | makeClean(pPg); | ||
3084 | pPg->dirty = 1; | ||
3085 | pPg->pDirty = 0; | ||
3086 | rc = pager_write_pagelist( pPg ); | ||
3087 | pPg->dirty = 0; | ||
3088 | if( rc!=SQLITE_OK ){ | ||
3089 | return rc; | ||
3090 | } | ||
3091 | } | ||
3092 | assert( pPg->dirty==0 ); | ||
3093 | |||
3094 | /* If the page we are recycling is marked as alwaysRollback, then | ||
3095 | ** set the global alwaysRollback flag, thus disabling the | ||
3096 | ** sqlite3PagerDontRollback() optimization for the rest of this transaction. | ||
3097 | ** It is necessary to do this because the page marked alwaysRollback | ||
3098 | ** might be reloaded at a later time but at that point we won't remember | ||
3099 | ** that is was marked alwaysRollback. This means that all pages must | ||
3100 | ** be marked as alwaysRollback from here on out. | ||
3101 | */ | ||
3102 | if( pPg->alwaysRollback ){ | ||
3103 | IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager)) | ||
3104 | pPager->alwaysRollback = 1; | ||
3105 | } | ||
3106 | |||
3107 | /* Unlink the old page from the free list and the hash table | ||
3108 | */ | ||
3109 | unlinkPage(pPg); | ||
3110 | assert( pPg->pgno==0 ); | ||
3111 | |||
3112 | *ppPg = pPg; | ||
3113 | return SQLITE_OK; | ||
3114 | } | ||
3115 | |||
3116 | #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT | ||
3117 | /* | ||
3118 | ** This function is called to free superfluous dynamically allocated memory | ||
3119 | ** held by the pager system. Memory in use by any SQLite pager allocated | ||
3120 | ** by the current thread may be sqlite3_free()ed. | ||
3121 | ** | ||
3122 | ** nReq is the number of bytes of memory required. Once this much has | ||
3123 | ** been released, the function returns. The return value is the total number | ||
3124 | ** of bytes of memory released. | ||
3125 | */ | ||
3126 | int sqlite3PagerReleaseMemory(int nReq){ | ||
3127 | int nReleased = 0; /* Bytes of memory released so far */ | ||
3128 | sqlite3_mutex *mutex; /* The MEM2 mutex */ | ||
3129 | Pager *pPager; /* For looping over pagers */ | ||
3130 | int rc = SQLITE_OK; | ||
3131 | |||
3132 | /* Acquire the memory-management mutex | ||
3133 | */ | ||
3134 | mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2); | ||
3135 | sqlite3_mutex_enter(mutex); | ||
3136 | |||
3137 | /* Signal all database connections that memory management wants | ||
3138 | ** to have access to the pagers. | ||
3139 | */ | ||
3140 | for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){ | ||
3141 | pPager->iInUseMM = 1; | ||
3142 | } | ||
3143 | |||
3144 | while( rc==SQLITE_OK && (nReq<0 || nReleased<nReq) ){ | ||
3145 | PgHdr *pPg; | ||
3146 | PgHdr *pRecycled; | ||
3147 | |||
3148 | /* Try to find a page to recycle that does not require a sync(). If | ||
3149 | ** this is not possible, find one that does require a sync(). | ||
3150 | */ | ||
3151 | sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
3152 | pPg = sqlite3LruPageList.pFirstSynced; | ||
3153 | while( pPg && (pPg->needSync || pPg->pPager->iInUseDB) ){ | ||
3154 | pPg = pPg->gfree.pNext; | ||
3155 | } | ||
3156 | if( !pPg ){ | ||
3157 | pPg = sqlite3LruPageList.pFirst; | ||
3158 | while( pPg && pPg->pPager->iInUseDB ){ | ||
3159 | pPg = pPg->gfree.pNext; | ||
3160 | } | ||
3161 | } | ||
3162 | sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); | ||
3163 | |||
3164 | /* If pPg==0, then the block above has failed to find a page to | ||
3165 | ** recycle. In this case return early - no further memory will | ||
3166 | ** be released. | ||
3167 | */ | ||
3168 | if( !pPg ) break; | ||
3169 | |||
3170 | pPager = pPg->pPager; | ||
3171 | assert(!pPg->needSync || pPg==pPager->lru.pFirst); | ||
3172 | assert(pPg->needSync || pPg==pPager->lru.pFirstSynced); | ||
3173 | |||
3174 | rc = pager_recycle(pPager, &pRecycled); | ||
3175 | assert(pRecycled==pPg || rc!=SQLITE_OK); | ||
3176 | if( rc==SQLITE_OK ){ | ||
3177 | /* We've found a page to free. At this point the page has been | ||
3178 | ** removed from the page hash-table, free-list and synced-list | ||
3179 | ** (pFirstSynced). It is still in the all pages (pAll) list. | ||
3180 | ** Remove it from this list before freeing. | ||
3181 | ** | ||
3182 | ** Todo: Check the Pager.pStmt list to make sure this is Ok. It | ||
3183 | ** probably is though. | ||
3184 | */ | ||
3185 | PgHdr *pTmp; | ||
3186 | assert( pPg ); | ||
3187 | if( pPg==pPager->pAll ){ | ||
3188 | pPager->pAll = pPg->pNextAll; | ||
3189 | }else{ | ||
3190 | for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){} | ||
3191 | pTmp->pNextAll = pPg->pNextAll; | ||
3192 | } | ||
3193 | nReleased += ( | ||
3194 | sizeof(*pPg) + pPager->pageSize | ||
3195 | + sizeof(u32) + pPager->nExtra | ||
3196 | + MEMDB*sizeof(PgHistory) | ||
3197 | ); | ||
3198 | IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno)); | ||
3199 | PAGER_INCR(sqlite3_pager_pgfree_count); | ||
3200 | sqlite3_free(pPg->pData); | ||
3201 | sqlite3_free(pPg); | ||
3202 | pPager->nPage--; | ||
3203 | }else{ | ||
3204 | /* An error occured whilst writing to the database file or | ||
3205 | ** journal in pager_recycle(). The error is not returned to the | ||
3206 | ** caller of this function. Instead, set the Pager.errCode variable. | ||
3207 | ** The error will be returned to the user (or users, in the case | ||
3208 | ** of a shared pager cache) of the pager for which the error occured. | ||
3209 | */ | ||
3210 | assert( | ||
3211 | (rc&0xff)==SQLITE_IOERR || | ||
3212 | rc==SQLITE_FULL || | ||
3213 | rc==SQLITE_BUSY | ||
3214 | ); | ||
3215 | assert( pPager->state>=PAGER_RESERVED ); | ||
3216 | pager_error(pPager, rc); | ||
3217 | } | ||
3218 | } | ||
3219 | |||
3220 | /* Clear the memory management flags and release the mutex | ||
3221 | */ | ||
3222 | for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){ | ||
3223 | pPager->iInUseMM = 0; | ||
3224 | } | ||
3225 | sqlite3_mutex_leave(mutex); | ||
3226 | |||
3227 | /* Return the number of bytes released | ||
3228 | */ | ||
3229 | return nReleased; | ||
3230 | } | ||
3231 | #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ | ||
3232 | |||
3233 | /* | ||
3234 | ** Read the content of page pPg out of the database file. | ||
3235 | */ | ||
3236 | static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){ | ||
3237 | int rc; | ||
3238 | i64 offset; | ||
3239 | assert( MEMDB==0 ); | ||
3240 | assert(pPager->fd->pMethods||pPager->tempFile); | ||
3241 | if( !pPager->fd->pMethods ){ | ||
3242 | return SQLITE_IOERR_SHORT_READ; | ||
3243 | } | ||
3244 | offset = (pgno-1)*(i64)pPager->pageSize; | ||
3245 | rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize, offset); | ||
3246 | PAGER_INCR(sqlite3_pager_readdb_count); | ||
3247 | PAGER_INCR(pPager->nRead); | ||
3248 | IOTRACE(("PGIN %p %d\n", pPager, pgno)); | ||
3249 | if( pgno==1 ){ | ||
3250 | memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24], | ||
3251 | sizeof(pPager->dbFileVers)); | ||
3252 | } | ||
3253 | CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3); | ||
3254 | PAGERTRACE4("FETCH %d page %d hash(%08x)\n", | ||
3255 | PAGERID(pPager), pPg->pgno, pager_pagehash(pPg)); | ||
3256 | return rc; | ||
3257 | } | ||
3258 | |||
3259 | |||
3260 | /* | ||
3261 | ** This function is called to obtain the shared lock required before | ||
3262 | ** data may be read from the pager cache. If the shared lock has already | ||
3263 | ** been obtained, this function is a no-op. | ||
3264 | ** | ||
3265 | ** Immediately after obtaining the shared lock (if required), this function | ||
3266 | ** checks for a hot-journal file. If one is found, an emergency rollback | ||
3267 | ** is performed immediately. | ||
3268 | */ | ||
3269 | static int pagerSharedLock(Pager *pPager){ | ||
3270 | int rc = SQLITE_OK; | ||
3271 | int isHot = 0; | ||
3272 | |||
3273 | /* If this database is opened for exclusive access, has no outstanding | ||
3274 | ** page references and is in an error-state, now is the chance to clear | ||
3275 | ** the error. Discard the contents of the pager-cache and treat any | ||
3276 | ** open journal file as a hot-journal. | ||
3277 | */ | ||
3278 | if( !MEMDB && pPager->exclusiveMode && pPager->nRef==0 && pPager->errCode ){ | ||
3279 | if( pPager->journalOpen ){ | ||
3280 | isHot = 1; | ||
3281 | } | ||
3282 | pager_reset(pPager); | ||
3283 | pPager->errCode = SQLITE_OK; | ||
3284 | } | ||
3285 | |||
3286 | /* If the pager is still in an error state, do not proceed. The error | ||
3287 | ** state will be cleared at some point in the future when all page | ||
3288 | ** references are dropped and the cache can be discarded. | ||
3289 | */ | ||
3290 | if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ | ||
3291 | return pPager->errCode; | ||
3292 | } | ||
3293 | |||
3294 | if( pPager->state==PAGER_UNLOCK || isHot ){ | ||
3295 | sqlite3_vfs *pVfs = pPager->pVfs; | ||
3296 | if( !MEMDB ){ | ||
3297 | assert( pPager->nRef==0 ); | ||
3298 | if( !pPager->noReadlock ){ | ||
3299 | rc = pager_wait_on_lock(pPager, SHARED_LOCK); | ||
3300 | if( rc!=SQLITE_OK ){ | ||
3301 | return pager_error(pPager, rc); | ||
3302 | } | ||
3303 | assert( pPager->state>=SHARED_LOCK ); | ||
3304 | } | ||
3305 | |||
3306 | /* If a journal file exists, and there is no RESERVED lock on the | ||
3307 | ** database file, then it either needs to be played back or deleted. | ||
3308 | */ | ||
3309 | if( hasHotJournal(pPager) || isHot ){ | ||
3310 | /* Get an EXCLUSIVE lock on the database file. At this point it is | ||
3311 | ** important that a RESERVED lock is not obtained on the way to the | ||
3312 | ** EXCLUSIVE lock. If it were, another process might open the | ||
3313 | ** database file, detect the RESERVED lock, and conclude that the | ||
3314 | ** database is safe to read while this process is still rolling it | ||
3315 | ** back. | ||
3316 | ** | ||
3317 | ** Because the intermediate RESERVED lock is not requested, the | ||
3318 | ** second process will get to this point in the code and fail to | ||
3319 | ** obtain it's own EXCLUSIVE lock on the database file. | ||
3320 | */ | ||
3321 | if( pPager->state<EXCLUSIVE_LOCK ){ | ||
3322 | rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK); | ||
3323 | if( rc!=SQLITE_OK ){ | ||
3324 | pager_unlock(pPager); | ||
3325 | return pager_error(pPager, rc); | ||
3326 | } | ||
3327 | pPager->state = PAGER_EXCLUSIVE; | ||
3328 | } | ||
3329 | |||
3330 | /* Open the journal for reading only. Return SQLITE_BUSY if | ||
3331 | ** we are unable to open the journal file. | ||
3332 | ** | ||
3333 | ** The journal file does not need to be locked itself. The | ||
3334 | ** journal file is never open unless the main database file holds | ||
3335 | ** a write lock, so there is never any chance of two or more | ||
3336 | ** processes opening the journal at the same time. | ||
3337 | ** | ||
3338 | ** Open the journal for read/write access. This is because in | ||
3339 | ** exclusive-access mode the file descriptor will be kept open and | ||
3340 | ** possibly used for a transaction later on. On some systems, the | ||
3341 | ** OsTruncate() call used in exclusive-access mode also requires | ||
3342 | ** a read/write file handle. | ||
3343 | */ | ||
3344 | if( !isHot ){ | ||
3345 | rc = SQLITE_BUSY; | ||
3346 | if( sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){ | ||
3347 | int fout = 0; | ||
3348 | int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; | ||
3349 | assert( !pPager->tempFile ); | ||
3350 | rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); | ||
3351 | assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); | ||
3352 | if( fout&SQLITE_OPEN_READONLY ){ | ||
3353 | rc = SQLITE_BUSY; | ||
3354 | sqlite3OsClose(pPager->jfd); | ||
3355 | } | ||
3356 | } | ||
3357 | } | ||
3358 | if( rc!=SQLITE_OK ){ | ||
3359 | pager_unlock(pPager); | ||
3360 | return ((rc==SQLITE_NOMEM||rc==SQLITE_IOERR_NOMEM)?rc:SQLITE_BUSY); | ||
3361 | } | ||
3362 | pPager->journalOpen = 1; | ||
3363 | pPager->journalStarted = 0; | ||
3364 | pPager->journalOff = 0; | ||
3365 | pPager->setMaster = 0; | ||
3366 | pPager->journalHdr = 0; | ||
3367 | |||
3368 | /* Playback and delete the journal. Drop the database write | ||
3369 | ** lock and reacquire the read lock. | ||
3370 | */ | ||
3371 | rc = pager_playback(pPager, 1); | ||
3372 | if( rc!=SQLITE_OK ){ | ||
3373 | return pager_error(pPager, rc); | ||
3374 | } | ||
3375 | assert(pPager->state==PAGER_SHARED || | ||
3376 | (pPager->exclusiveMode && pPager->state>PAGER_SHARED) | ||
3377 | ); | ||
3378 | } | ||
3379 | |||
3380 | if( pPager->pAll ){ | ||
3381 | /* The shared-lock has just been acquired on the database file | ||
3382 | ** and there are already pages in the cache (from a previous | ||
3383 | ** read or write transaction). Check to see if the database | ||
3384 | ** has been modified. If the database has changed, flush the | ||
3385 | ** cache. | ||
3386 | ** | ||
3387 | ** Database changes is detected by looking at 15 bytes beginning | ||
3388 | ** at offset 24 into the file. The first 4 of these 16 bytes are | ||
3389 | ** a 32-bit counter that is incremented with each change. The | ||
3390 | ** other bytes change randomly with each file change when | ||
3391 | ** a codec is in use. | ||
3392 | ** | ||
3393 | ** There is a vanishingly small chance that a change will not be | ||
3394 | ** detected. The chance of an undetected change is so small that | ||
3395 | ** it can be neglected. | ||
3396 | */ | ||
3397 | char dbFileVers[sizeof(pPager->dbFileVers)]; | ||
3398 | sqlite3PagerPagecount(pPager); | ||
3399 | |||
3400 | if( pPager->errCode ){ | ||
3401 | return pPager->errCode; | ||
3402 | } | ||
3403 | |||
3404 | if( pPager->dbSize>0 ){ | ||
3405 | IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); | ||
3406 | rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); | ||
3407 | if( rc!=SQLITE_OK ){ | ||
3408 | return rc; | ||
3409 | } | ||
3410 | }else{ | ||
3411 | memset(dbFileVers, 0, sizeof(dbFileVers)); | ||
3412 | } | ||
3413 | |||
3414 | if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ | ||
3415 | pager_reset(pPager); | ||
3416 | } | ||
3417 | } | ||
3418 | } | ||
3419 | assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED ); | ||
3420 | if( pPager->state==PAGER_UNLOCK ){ | ||
3421 | pPager->state = PAGER_SHARED; | ||
3422 | } | ||
3423 | } | ||
3424 | |||
3425 | return rc; | ||
3426 | } | ||
3427 | |||
3428 | /* | ||
3429 | ** Allocate a PgHdr object. Either create a new one or reuse | ||
3430 | ** an existing one that is not otherwise in use. | ||
3431 | ** | ||
3432 | ** A new PgHdr structure is created if any of the following are | ||
3433 | ** true: | ||
3434 | ** | ||
3435 | ** (1) We have not exceeded our maximum allocated cache size | ||
3436 | ** as set by the "PRAGMA cache_size" command. | ||
3437 | ** | ||
3438 | ** (2) There are no unused PgHdr objects available at this time. | ||
3439 | ** | ||
3440 | ** (3) This is an in-memory database. | ||
3441 | ** | ||
3442 | ** (4) There are no PgHdr objects that do not require a journal | ||
3443 | ** file sync and a sync of the journal file is currently | ||
3444 | ** prohibited. | ||
3445 | ** | ||
3446 | ** Otherwise, reuse an existing PgHdr. In other words, reuse an | ||
3447 | ** existing PgHdr if all of the following are true: | ||
3448 | ** | ||
3449 | ** (1) We have reached or exceeded the maximum cache size | ||
3450 | ** allowed by "PRAGMA cache_size". | ||
3451 | ** | ||
3452 | ** (2) There is a PgHdr available with PgHdr->nRef==0 | ||
3453 | ** | ||
3454 | ** (3) We are not in an in-memory database | ||
3455 | ** | ||
3456 | ** (4) Either there is an available PgHdr that does not need | ||
3457 | ** to be synced to disk or else disk syncing is currently | ||
3458 | ** allowed. | ||
3459 | */ | ||
3460 | static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){ | ||
3461 | int rc = SQLITE_OK; | ||
3462 | PgHdr *pPg; | ||
3463 | void *pData; | ||
3464 | |||
3465 | /* Create a new PgHdr if any of the four conditions defined | ||
3466 | ** above are met: */ | ||
3467 | if( pPager->nPage<pPager->mxPage | ||
3468 | || pPager->lru.pFirst==0 | ||
3469 | || MEMDB | ||
3470 | || (pPager->lru.pFirstSynced==0 && pPager->doNotSync) | ||
3471 | ){ | ||
3472 | if( pPager->nPage>=pPager->nHash ){ | ||
3473 | pager_resize_hash_table(pPager, | ||
3474 | pPager->nHash<256 ? 256 : pPager->nHash*2); | ||
3475 | if( pPager->nHash==0 ){ | ||
3476 | rc = SQLITE_NOMEM; | ||
3477 | goto pager_allocate_out; | ||
3478 | } | ||
3479 | } | ||
3480 | pagerLeave(pPager); | ||
3481 | pPg = sqlite3_malloc( sizeof(*pPg) + sizeof(u32) + pPager->nExtra | ||
3482 | + MEMDB*sizeof(PgHistory) ); | ||
3483 | if( pPg ){ | ||
3484 | pData = sqlite3_malloc( pPager->pageSize ); | ||
3485 | if( pData==0 ){ | ||
3486 | sqlite3_free(pPg); | ||
3487 | pPg = 0; | ||
3488 | } | ||
3489 | } | ||
3490 | pagerEnter(pPager); | ||
3491 | if( pPg==0 ){ | ||
3492 | rc = SQLITE_NOMEM; | ||
3493 | goto pager_allocate_out; | ||
3494 | } | ||
3495 | memset(pPg, 0, sizeof(*pPg)); | ||
3496 | if( MEMDB ){ | ||
3497 | memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory)); | ||
3498 | } | ||
3499 | pPg->pData = pData; | ||
3500 | pPg->pPager = pPager; | ||
3501 | pPg->pNextAll = pPager->pAll; | ||
3502 | pPager->pAll = pPg; | ||
3503 | pPager->nPage++; | ||
3504 | }else{ | ||
3505 | /* Recycle an existing page with a zero ref-count. */ | ||
3506 | rc = pager_recycle(pPager, &pPg); | ||
3507 | if( rc==SQLITE_BUSY ){ | ||
3508 | rc = SQLITE_IOERR_BLOCKED; | ||
3509 | } | ||
3510 | if( rc!=SQLITE_OK ){ | ||
3511 | goto pager_allocate_out; | ||
3512 | } | ||
3513 | assert( pPager->state>=SHARED_LOCK ); | ||
3514 | assert(pPg); | ||
3515 | } | ||
3516 | *ppPg = pPg; | ||
3517 | |||
3518 | pager_allocate_out: | ||
3519 | return rc; | ||
3520 | } | ||
3521 | |||
3522 | /* | ||
3523 | ** Make sure we have the content for a page. If the page was | ||
3524 | ** previously acquired with noContent==1, then the content was | ||
3525 | ** just initialized to zeros instead of being read from disk. | ||
3526 | ** But now we need the real data off of disk. So make sure we | ||
3527 | ** have it. Read it in if we do not have it already. | ||
3528 | */ | ||
3529 | static int pager_get_content(PgHdr *pPg){ | ||
3530 | if( pPg->needRead ){ | ||
3531 | int rc = readDbPage(pPg->pPager, pPg, pPg->pgno); | ||
3532 | if( rc==SQLITE_OK ){ | ||
3533 | pPg->needRead = 0; | ||
3534 | }else{ | ||
3535 | return rc; | ||
3536 | } | ||
3537 | } | ||
3538 | return SQLITE_OK; | ||
3539 | } | ||
3540 | |||
3541 | /* | ||
3542 | ** Acquire a page. | ||
3543 | ** | ||
3544 | ** A read lock on the disk file is obtained when the first page is acquired. | ||
3545 | ** This read lock is dropped when the last page is released. | ||
3546 | ** | ||
3547 | ** This routine works for any page number greater than 0. If the database | ||
3548 | ** file is smaller than the requested page, then no actual disk | ||
3549 | ** read occurs and the memory image of the page is initialized to | ||
3550 | ** all zeros. The extra data appended to a page is always initialized | ||
3551 | ** to zeros the first time a page is loaded into memory. | ||
3552 | ** | ||
3553 | ** The acquisition might fail for several reasons. In all cases, | ||
3554 | ** an appropriate error code is returned and *ppPage is set to NULL. | ||
3555 | ** | ||
3556 | ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt | ||
3557 | ** to find a page in the in-memory cache first. If the page is not already | ||
3558 | ** in memory, this routine goes to disk to read it in whereas Lookup() | ||
3559 | ** just returns 0. This routine acquires a read-lock the first time it | ||
3560 | ** has to go to disk, and could also playback an old journal if necessary. | ||
3561 | ** Since Lookup() never goes to disk, it never has to deal with locks | ||
3562 | ** or journal files. | ||
3563 | ** | ||
3564 | ** If noContent is false, the page contents are actually read from disk. | ||
3565 | ** If noContent is true, it means that we do not care about the contents | ||
3566 | ** of the page at this time, so do not do a disk read. Just fill in the | ||
3567 | ** page content with zeros. But mark the fact that we have not read the | ||
3568 | ** content by setting the PgHdr.needRead flag. Later on, if | ||
3569 | ** sqlite3PagerWrite() is called on this page or if this routine is | ||
3570 | ** called again with noContent==0, that means that the content is needed | ||
3571 | ** and the disk read should occur at that point. | ||
3572 | */ | ||
3573 | static int pagerAcquire( | ||
3574 | Pager *pPager, /* The pager open on the database file */ | ||
3575 | Pgno pgno, /* Page number to fetch */ | ||
3576 | DbPage **ppPage, /* Write a pointer to the page here */ | ||
3577 | int noContent /* Do not bother reading content from disk if true */ | ||
3578 | ){ | ||
3579 | PgHdr *pPg; | ||
3580 | int rc; | ||
3581 | |||
3582 | assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 ); | ||
3583 | |||
3584 | /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page | ||
3585 | ** number greater than this, or zero, is requested. | ||
3586 | */ | ||
3587 | if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ | ||
3588 | return SQLITE_CORRUPT_BKPT; | ||
3589 | } | ||
3590 | |||
3591 | /* Make sure we have not hit any critical errors. | ||
3592 | */ | ||
3593 | assert( pPager!=0 ); | ||
3594 | *ppPage = 0; | ||
3595 | |||
3596 | /* If this is the first page accessed, then get a SHARED lock | ||
3597 | ** on the database file. pagerSharedLock() is a no-op if | ||
3598 | ** a database lock is already held. | ||
3599 | */ | ||
3600 | rc = pagerSharedLock(pPager); | ||
3601 | if( rc!=SQLITE_OK ){ | ||
3602 | return rc; | ||
3603 | } | ||
3604 | assert( pPager->state!=PAGER_UNLOCK ); | ||
3605 | |||
3606 | pPg = pager_lookup(pPager, pgno); | ||
3607 | if( pPg==0 ){ | ||
3608 | /* The requested page is not in the page cache. */ | ||
3609 | int nMax; | ||
3610 | int h; | ||
3611 | PAGER_INCR(pPager->nMiss); | ||
3612 | rc = pagerAllocatePage(pPager, &pPg); | ||
3613 | if( rc!=SQLITE_OK ){ | ||
3614 | return rc; | ||
3615 | } | ||
3616 | |||
3617 | pPg->pgno = pgno; | ||
3618 | assert( !MEMDB || pgno>pPager->stmtSize ); | ||
3619 | if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){ | ||
3620 | #if 0 | ||
3621 | sqlite3CheckMemory(pPager->aInJournal, pgno/8); | ||
3622 | #endif | ||
3623 | assert( pPager->journalOpen ); | ||
3624 | pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0; | ||
3625 | pPg->needSync = 0; | ||
3626 | }else{ | ||
3627 | pPg->inJournal = 0; | ||
3628 | pPg->needSync = 0; | ||
3629 | } | ||
3630 | |||
3631 | makeClean(pPg); | ||
3632 | pPg->nRef = 1; | ||
3633 | REFINFO(pPg); | ||
3634 | |||
3635 | pPager->nRef++; | ||
3636 | if( pPager->nExtra>0 ){ | ||
3637 | memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra); | ||
3638 | } | ||
3639 | nMax = sqlite3PagerPagecount(pPager); | ||
3640 | if( pPager->errCode ){ | ||
3641 | rc = pPager->errCode; | ||
3642 | sqlite3PagerUnref(pPg); | ||
3643 | return rc; | ||
3644 | } | ||
3645 | |||
3646 | /* Populate the page with data, either by reading from the database | ||
3647 | ** file, or by setting the entire page to zero. | ||
3648 | */ | ||
3649 | if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){ | ||
3650 | if( pgno>pPager->mxPgno ){ | ||
3651 | sqlite3PagerUnref(pPg); | ||
3652 | return SQLITE_FULL; | ||
3653 | } | ||
3654 | memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); | ||
3655 | pPg->needRead = noContent && !pPager->alwaysRollback; | ||
3656 | IOTRACE(("ZERO %p %d\n", pPager, pgno)); | ||
3657 | }else{ | ||
3658 | rc = readDbPage(pPager, pPg, pgno); | ||
3659 | if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ | ||
3660 | pPg->pgno = 0; | ||
3661 | sqlite3PagerUnref(pPg); | ||
3662 | return rc; | ||
3663 | } | ||
3664 | pPg->needRead = 0; | ||
3665 | } | ||
3666 | |||
3667 | /* Link the page into the page hash table */ | ||
3668 | h = pgno & (pPager->nHash-1); | ||
3669 | assert( pgno!=0 ); | ||
3670 | pPg->pNextHash = pPager->aHash[h]; | ||
3671 | pPager->aHash[h] = pPg; | ||
3672 | if( pPg->pNextHash ){ | ||
3673 | assert( pPg->pNextHash->pPrevHash==0 ); | ||
3674 | pPg->pNextHash->pPrevHash = pPg; | ||
3675 | } | ||
3676 | |||
3677 | #ifdef SQLITE_CHECK_PAGES | ||
3678 | pPg->pageHash = pager_pagehash(pPg); | ||
3679 | #endif | ||
3680 | }else{ | ||
3681 | /* The requested page is in the page cache. */ | ||
3682 | assert(pPager->nRef>0 || pgno==1); | ||
3683 | PAGER_INCR(pPager->nHit); | ||
3684 | if( !noContent ){ | ||
3685 | rc = pager_get_content(pPg); | ||
3686 | if( rc ){ | ||
3687 | return rc; | ||
3688 | } | ||
3689 | } | ||
3690 | page_ref(pPg); | ||
3691 | } | ||
3692 | *ppPage = pPg; | ||
3693 | return SQLITE_OK; | ||
3694 | } | ||
3695 | int sqlite3PagerAcquire( | ||
3696 | Pager *pPager, /* The pager open on the database file */ | ||
3697 | Pgno pgno, /* Page number to fetch */ | ||
3698 | DbPage **ppPage, /* Write a pointer to the page here */ | ||
3699 | int noContent /* Do not bother reading content from disk if true */ | ||
3700 | ){ | ||
3701 | int rc; | ||
3702 | pagerEnter(pPager); | ||
3703 | rc = pagerAcquire(pPager, pgno, ppPage, noContent); | ||
3704 | pagerLeave(pPager); | ||
3705 | return rc; | ||
3706 | } | ||
3707 | |||
3708 | |||
3709 | /* | ||
3710 | ** Acquire a page if it is already in the in-memory cache. Do | ||
3711 | ** not read the page from disk. Return a pointer to the page, | ||
3712 | ** or 0 if the page is not in cache. | ||
3713 | ** | ||
3714 | ** See also sqlite3PagerGet(). The difference between this routine | ||
3715 | ** and sqlite3PagerGet() is that _get() will go to the disk and read | ||
3716 | ** in the page if the page is not already in cache. This routine | ||
3717 | ** returns NULL if the page is not in cache or if a disk I/O error | ||
3718 | ** has ever happened. | ||
3719 | */ | ||
3720 | DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ | ||
3721 | PgHdr *pPg = 0; | ||
3722 | |||
3723 | assert( pPager!=0 ); | ||
3724 | assert( pgno!=0 ); | ||
3725 | |||
3726 | pagerEnter(pPager); | ||
3727 | if( pPager->state==PAGER_UNLOCK ){ | ||
3728 | assert( !pPager->pAll || pPager->exclusiveMode ); | ||
3729 | }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ | ||
3730 | /* Do nothing */ | ||
3731 | }else if( (pPg = pager_lookup(pPager, pgno))!=0 ){ | ||
3732 | page_ref(pPg); | ||
3733 | } | ||
3734 | pagerLeave(pPager); | ||
3735 | return pPg; | ||
3736 | } | ||
3737 | |||
3738 | /* | ||
3739 | ** Release a page. | ||
3740 | ** | ||
3741 | ** If the number of references to the page drop to zero, then the | ||
3742 | ** page is added to the LRU list. When all references to all pages | ||
3743 | ** are released, a rollback occurs and the lock on the database is | ||
3744 | ** removed. | ||
3745 | */ | ||
3746 | int sqlite3PagerUnref(DbPage *pPg){ | ||
3747 | Pager *pPager = pPg->pPager; | ||
3748 | |||
3749 | /* Decrement the reference count for this page | ||
3750 | */ | ||
3751 | assert( pPg->nRef>0 ); | ||
3752 | pagerEnter(pPg->pPager); | ||
3753 | pPg->nRef--; | ||
3754 | REFINFO(pPg); | ||
3755 | |||
3756 | CHECK_PAGE(pPg); | ||
3757 | |||
3758 | /* When the number of references to a page reach 0, call the | ||
3759 | ** destructor and add the page to the freelist. | ||
3760 | */ | ||
3761 | if( pPg->nRef==0 ){ | ||
3762 | |||
3763 | lruListAdd(pPg); | ||
3764 | if( pPager->xDestructor ){ | ||
3765 | pPager->xDestructor(pPg, pPager->pageSize); | ||
3766 | } | ||
3767 | |||
3768 | /* When all pages reach the freelist, drop the read lock from | ||
3769 | ** the database file. | ||
3770 | */ | ||
3771 | pPager->nRef--; | ||
3772 | assert( pPager->nRef>=0 ); | ||
3773 | if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){ | ||
3774 | pagerUnlockAndRollback(pPager); | ||
3775 | } | ||
3776 | } | ||
3777 | pagerLeave(pPager); | ||
3778 | return SQLITE_OK; | ||
3779 | } | ||
3780 | |||
3781 | /* | ||
3782 | ** Create a journal file for pPager. There should already be a RESERVED | ||
3783 | ** or EXCLUSIVE lock on the database file when this routine is called. | ||
3784 | ** | ||
3785 | ** Return SQLITE_OK if everything. Return an error code and release the | ||
3786 | ** write lock if anything goes wrong. | ||
3787 | */ | ||
3788 | static int pager_open_journal(Pager *pPager){ | ||
3789 | sqlite3_vfs *pVfs = pPager->pVfs; | ||
3790 | int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE); | ||
3791 | |||
3792 | int rc; | ||
3793 | assert( !MEMDB ); | ||
3794 | assert( pPager->state>=PAGER_RESERVED ); | ||
3795 | assert( pPager->journalOpen==0 ); | ||
3796 | assert( pPager->useJournal ); | ||
3797 | assert( pPager->aInJournal==0 ); | ||
3798 | sqlite3PagerPagecount(pPager); | ||
3799 | pagerLeave(pPager); | ||
3800 | pPager->aInJournal = sqlite3MallocZero( pPager->dbSize/8 + 1 ); | ||
3801 | pagerEnter(pPager); | ||
3802 | if( pPager->aInJournal==0 ){ | ||
3803 | rc = SQLITE_NOMEM; | ||
3804 | goto failed_to_open_journal; | ||
3805 | } | ||
3806 | |||
3807 | if( pPager->tempFile ){ | ||
3808 | flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL); | ||
3809 | }else{ | ||
3810 | flags |= (SQLITE_OPEN_MAIN_JOURNAL); | ||
3811 | } | ||
3812 | #ifdef SQLITE_ENABLE_ATOMIC_WRITE | ||
3813 | rc = sqlite3JournalOpen( | ||
3814 | pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager) | ||
3815 | ); | ||
3816 | #else | ||
3817 | rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0); | ||
3818 | #endif | ||
3819 | assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); | ||
3820 | pPager->journalOff = 0; | ||
3821 | pPager->setMaster = 0; | ||
3822 | pPager->journalHdr = 0; | ||
3823 | if( rc!=SQLITE_OK ){ | ||
3824 | if( rc==SQLITE_NOMEM ){ | ||
3825 | sqlite3OsDelete(pVfs, pPager->zJournal, 0); | ||
3826 | } | ||
3827 | goto failed_to_open_journal; | ||
3828 | } | ||
3829 | pPager->journalOpen = 1; | ||
3830 | pPager->journalStarted = 0; | ||
3831 | pPager->needSync = 0; | ||
3832 | pPager->alwaysRollback = 0; | ||
3833 | pPager->nRec = 0; | ||
3834 | if( pPager->errCode ){ | ||
3835 | rc = pPager->errCode; | ||
3836 | goto failed_to_open_journal; | ||
3837 | } | ||
3838 | pPager->origDbSize = pPager->dbSize; | ||
3839 | |||
3840 | rc = writeJournalHdr(pPager); | ||
3841 | |||
3842 | if( pPager->stmtAutoopen && rc==SQLITE_OK ){ | ||
3843 | rc = sqlite3PagerStmtBegin(pPager); | ||
3844 | } | ||
3845 | if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){ | ||
3846 | rc = pager_end_transaction(pPager); | ||
3847 | if( rc==SQLITE_OK ){ | ||
3848 | rc = SQLITE_FULL; | ||
3849 | } | ||
3850 | } | ||
3851 | return rc; | ||
3852 | |||
3853 | failed_to_open_journal: | ||
3854 | sqlite3_free(pPager->aInJournal); | ||
3855 | pPager->aInJournal = 0; | ||
3856 | return rc; | ||
3857 | } | ||
3858 | |||
3859 | /* | ||
3860 | ** Acquire a write-lock on the database. The lock is removed when | ||
3861 | ** the any of the following happen: | ||
3862 | ** | ||
3863 | ** * sqlite3PagerCommitPhaseTwo() is called. | ||
3864 | ** * sqlite3PagerRollback() is called. | ||
3865 | ** * sqlite3PagerClose() is called. | ||
3866 | ** * sqlite3PagerUnref() is called to on every outstanding page. | ||
3867 | ** | ||
3868 | ** The first parameter to this routine is a pointer to any open page of the | ||
3869 | ** database file. Nothing changes about the page - it is used merely to | ||
3870 | ** acquire a pointer to the Pager structure and as proof that there is | ||
3871 | ** already a read-lock on the database. | ||
3872 | ** | ||
3873 | ** The second parameter indicates how much space in bytes to reserve for a | ||
3874 | ** master journal file-name at the start of the journal when it is created. | ||
3875 | ** | ||
3876 | ** A journal file is opened if this is not a temporary file. For temporary | ||
3877 | ** files, the opening of the journal file is deferred until there is an | ||
3878 | ** actual need to write to the journal. | ||
3879 | ** | ||
3880 | ** If the database is already reserved for writing, this routine is a no-op. | ||
3881 | ** | ||
3882 | ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file | ||
3883 | ** immediately instead of waiting until we try to flush the cache. The | ||
3884 | ** exFlag is ignored if a transaction is already active. | ||
3885 | */ | ||
3886 | int sqlite3PagerBegin(DbPage *pPg, int exFlag){ | ||
3887 | Pager *pPager = pPg->pPager; | ||
3888 | int rc = SQLITE_OK; | ||
3889 | pagerEnter(pPager); | ||
3890 | assert( pPg->nRef>0 ); | ||
3891 | assert( pPager->state!=PAGER_UNLOCK ); | ||
3892 | if( pPager->state==PAGER_SHARED ){ | ||
3893 | assert( pPager->aInJournal==0 ); | ||
3894 | if( MEMDB ){ | ||
3895 | pPager->state = PAGER_EXCLUSIVE; | ||
3896 | pPager->origDbSize = pPager->dbSize; | ||
3897 | }else{ | ||
3898 | rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); | ||
3899 | if( rc==SQLITE_OK ){ | ||
3900 | pPager->state = PAGER_RESERVED; | ||
3901 | if( exFlag ){ | ||
3902 | rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); | ||
3903 | } | ||
3904 | } | ||
3905 | if( rc!=SQLITE_OK ){ | ||
3906 | pagerLeave(pPager); | ||
3907 | return rc; | ||
3908 | } | ||
3909 | pPager->dirtyCache = 0; | ||
3910 | PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager)); | ||
3911 | if( pPager->useJournal && !pPager->tempFile ){ | ||
3912 | rc = pager_open_journal(pPager); | ||
3913 | } | ||
3914 | } | ||
3915 | }else if( pPager->journalOpen && pPager->journalOff==0 ){ | ||
3916 | /* This happens when the pager was in exclusive-access mode last | ||
3917 | ** time a (read or write) transaction was successfully concluded | ||
3918 | ** by this connection. Instead of deleting the journal file it was | ||
3919 | ** kept open and truncated to 0 bytes. | ||
3920 | */ | ||
3921 | assert( pPager->nRec==0 ); | ||
3922 | assert( pPager->origDbSize==0 ); | ||
3923 | assert( pPager->aInJournal==0 ); | ||
3924 | sqlite3PagerPagecount(pPager); | ||
3925 | pagerLeave(pPager); | ||
3926 | pPager->aInJournal = sqlite3MallocZero( pPager->dbSize/8 + 1 ); | ||
3927 | pagerEnter(pPager); | ||
3928 | if( !pPager->aInJournal ){ | ||
3929 | rc = SQLITE_NOMEM; | ||
3930 | }else{ | ||
3931 | pPager->origDbSize = pPager->dbSize; | ||
3932 | rc = writeJournalHdr(pPager); | ||
3933 | } | ||
3934 | } | ||
3935 | assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK ); | ||
3936 | pagerLeave(pPager); | ||
3937 | return rc; | ||
3938 | } | ||
3939 | |||
3940 | /* | ||
3941 | ** Make a page dirty. Set its dirty flag and add it to the dirty | ||
3942 | ** page list. | ||
3943 | */ | ||
3944 | static void makeDirty(PgHdr *pPg){ | ||
3945 | if( pPg->dirty==0 ){ | ||
3946 | Pager *pPager = pPg->pPager; | ||
3947 | pPg->dirty = 1; | ||
3948 | pPg->pDirty = pPager->pDirty; | ||
3949 | if( pPager->pDirty ){ | ||
3950 | pPager->pDirty->pPrevDirty = pPg; | ||
3951 | } | ||
3952 | pPg->pPrevDirty = 0; | ||
3953 | pPager->pDirty = pPg; | ||
3954 | } | ||
3955 | } | ||
3956 | |||
3957 | /* | ||
3958 | ** Make a page clean. Clear its dirty bit and remove it from the | ||
3959 | ** dirty page list. | ||
3960 | */ | ||
3961 | static void makeClean(PgHdr *pPg){ | ||
3962 | if( pPg->dirty ){ | ||
3963 | pPg->dirty = 0; | ||
3964 | if( pPg->pDirty ){ | ||
3965 | assert( pPg->pDirty->pPrevDirty==pPg ); | ||
3966 | pPg->pDirty->pPrevDirty = pPg->pPrevDirty; | ||
3967 | } | ||
3968 | if( pPg->pPrevDirty ){ | ||
3969 | assert( pPg->pPrevDirty->pDirty==pPg ); | ||
3970 | pPg->pPrevDirty->pDirty = pPg->pDirty; | ||
3971 | }else{ | ||
3972 | assert( pPg->pPager->pDirty==pPg ); | ||
3973 | pPg->pPager->pDirty = pPg->pDirty; | ||
3974 | } | ||
3975 | } | ||
3976 | } | ||
3977 | |||
3978 | |||
3979 | /* | ||
3980 | ** Mark a data page as writeable. The page is written into the journal | ||
3981 | ** if it is not there already. This routine must be called before making | ||
3982 | ** changes to a page. | ||
3983 | ** | ||
3984 | ** The first time this routine is called, the pager creates a new | ||
3985 | ** journal and acquires a RESERVED lock on the database. If the RESERVED | ||
3986 | ** lock could not be acquired, this routine returns SQLITE_BUSY. The | ||
3987 | ** calling routine must check for that return value and be careful not to | ||
3988 | ** change any page data until this routine returns SQLITE_OK. | ||
3989 | ** | ||
3990 | ** If the journal file could not be written because the disk is full, | ||
3991 | ** then this routine returns SQLITE_FULL and does an immediate rollback. | ||
3992 | ** All subsequent write attempts also return SQLITE_FULL until there | ||
3993 | ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to | ||
3994 | ** reset. | ||
3995 | */ | ||
3996 | static int pager_write(PgHdr *pPg){ | ||
3997 | void *pData = PGHDR_TO_DATA(pPg); | ||
3998 | Pager *pPager = pPg->pPager; | ||
3999 | int rc = SQLITE_OK; | ||
4000 | |||
4001 | /* Check for errors | ||
4002 | */ | ||
4003 | if( pPager->errCode ){ | ||
4004 | return pPager->errCode; | ||
4005 | } | ||
4006 | if( pPager->readOnly ){ | ||
4007 | return SQLITE_PERM; | ||
4008 | } | ||
4009 | |||
4010 | assert( !pPager->setMaster ); | ||
4011 | |||
4012 | CHECK_PAGE(pPg); | ||
4013 | |||
4014 | /* If this page was previously acquired with noContent==1, that means | ||
4015 | ** we didn't really read in the content of the page. This can happen | ||
4016 | ** (for example) when the page is being moved to the freelist. But | ||
4017 | ** now we are (perhaps) moving the page off of the freelist for | ||
4018 | ** reuse and we need to know its original content so that content | ||
4019 | ** can be stored in the rollback journal. So do the read at this | ||
4020 | ** time. | ||
4021 | */ | ||
4022 | rc = pager_get_content(pPg); | ||
4023 | if( rc ){ | ||
4024 | return rc; | ||
4025 | } | ||
4026 | |||
4027 | /* Mark the page as dirty. If the page has already been written | ||
4028 | ** to the journal then we can return right away. | ||
4029 | */ | ||
4030 | makeDirty(pPg); | ||
4031 | if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){ | ||
4032 | pPager->dirtyCache = 1; | ||
4033 | }else{ | ||
4034 | |||
4035 | /* If we get this far, it means that the page needs to be | ||
4036 | ** written to the transaction journal or the ckeckpoint journal | ||
4037 | ** or both. | ||
4038 | ** | ||
4039 | ** First check to see that the transaction journal exists and | ||
4040 | ** create it if it does not. | ||
4041 | */ | ||
4042 | assert( pPager->state!=PAGER_UNLOCK ); | ||
4043 | rc = sqlite3PagerBegin(pPg, 0); | ||
4044 | if( rc!=SQLITE_OK ){ | ||
4045 | return rc; | ||
4046 | } | ||
4047 | assert( pPager->state>=PAGER_RESERVED ); | ||
4048 | if( !pPager->journalOpen && pPager->useJournal ){ | ||
4049 | rc = pager_open_journal(pPager); | ||
4050 | if( rc!=SQLITE_OK ) return rc; | ||
4051 | } | ||
4052 | assert( pPager->journalOpen || !pPager->useJournal ); | ||
4053 | pPager->dirtyCache = 1; | ||
4054 | |||
4055 | /* The transaction journal now exists and we have a RESERVED or an | ||
4056 | ** EXCLUSIVE lock on the main database file. Write the current page to | ||
4057 | ** the transaction journal if it is not there already. | ||
4058 | */ | ||
4059 | if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){ | ||
4060 | if( (int)pPg->pgno <= pPager->origDbSize ){ | ||
4061 | if( MEMDB ){ | ||
4062 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4063 | PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); | ||
4064 | assert( pHist->pOrig==0 ); | ||
4065 | pHist->pOrig = sqlite3_malloc( pPager->pageSize ); | ||
4066 | if( pHist->pOrig ){ | ||
4067 | memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize); | ||
4068 | } | ||
4069 | }else{ | ||
4070 | u32 cksum; | ||
4071 | char *pData2; | ||
4072 | |||
4073 | /* We should never write to the journal file the page that | ||
4074 | ** contains the database locks. The following assert verifies | ||
4075 | ** that we do not. */ | ||
4076 | assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); | ||
4077 | pData2 = CODEC2(pPager, pData, pPg->pgno, 7); | ||
4078 | cksum = pager_cksum(pPager, (u8*)pData2); | ||
4079 | rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno); | ||
4080 | if( rc==SQLITE_OK ){ | ||
4081 | rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, | ||
4082 | pPager->journalOff + 4); | ||
4083 | pPager->journalOff += pPager->pageSize+4; | ||
4084 | } | ||
4085 | if( rc==SQLITE_OK ){ | ||
4086 | rc = write32bits(pPager->jfd, pPager->journalOff, cksum); | ||
4087 | pPager->journalOff += 4; | ||
4088 | } | ||
4089 | IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, | ||
4090 | pPager->journalOff, pPager->pageSize)); | ||
4091 | PAGER_INCR(sqlite3_pager_writej_count); | ||
4092 | PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n", | ||
4093 | PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg)); | ||
4094 | |||
4095 | /* An error has occured writing to the journal file. The | ||
4096 | ** transaction will be rolled back by the layer above. | ||
4097 | */ | ||
4098 | if( rc!=SQLITE_OK ){ | ||
4099 | return rc; | ||
4100 | } | ||
4101 | |||
4102 | pPager->nRec++; | ||
4103 | assert( pPager->aInJournal!=0 ); | ||
4104 | pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4105 | pPg->needSync = !pPager->noSync; | ||
4106 | if( pPager->stmtInUse ){ | ||
4107 | pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4108 | } | ||
4109 | } | ||
4110 | }else{ | ||
4111 | pPg->needSync = !pPager->journalStarted && !pPager->noSync; | ||
4112 | PAGERTRACE4("APPEND %d page %d needSync=%d\n", | ||
4113 | PAGERID(pPager), pPg->pgno, pPg->needSync); | ||
4114 | } | ||
4115 | if( pPg->needSync ){ | ||
4116 | pPager->needSync = 1; | ||
4117 | } | ||
4118 | pPg->inJournal = 1; | ||
4119 | } | ||
4120 | |||
4121 | /* If the statement journal is open and the page is not in it, | ||
4122 | ** then write the current page to the statement journal. Note that | ||
4123 | ** the statement journal format differs from the standard journal format | ||
4124 | ** in that it omits the checksums and the header. | ||
4125 | */ | ||
4126 | if( pPager->stmtInUse | ||
4127 | && !pageInStatement(pPg) | ||
4128 | && (int)pPg->pgno<=pPager->stmtSize | ||
4129 | ){ | ||
4130 | assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); | ||
4131 | if( MEMDB ){ | ||
4132 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4133 | assert( pHist->pStmt==0 ); | ||
4134 | pHist->pStmt = sqlite3_malloc( pPager->pageSize ); | ||
4135 | if( pHist->pStmt ){ | ||
4136 | memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize); | ||
4137 | } | ||
4138 | PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); | ||
4139 | page_add_to_stmt_list(pPg); | ||
4140 | }else{ | ||
4141 | i64 offset = pPager->stmtNRec*(4+pPager->pageSize); | ||
4142 | char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7); | ||
4143 | rc = write32bits(pPager->stfd, offset, pPg->pgno); | ||
4144 | if( rc==SQLITE_OK ){ | ||
4145 | rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4); | ||
4146 | } | ||
4147 | PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); | ||
4148 | if( rc!=SQLITE_OK ){ | ||
4149 | return rc; | ||
4150 | } | ||
4151 | pPager->stmtNRec++; | ||
4152 | assert( pPager->aInStmt!=0 ); | ||
4153 | pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4154 | } | ||
4155 | } | ||
4156 | } | ||
4157 | |||
4158 | /* Update the database size and return. | ||
4159 | */ | ||
4160 | assert( pPager->state>=PAGER_SHARED ); | ||
4161 | if( pPager->dbSize<(int)pPg->pgno ){ | ||
4162 | pPager->dbSize = pPg->pgno; | ||
4163 | if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){ | ||
4164 | pPager->dbSize++; | ||
4165 | } | ||
4166 | } | ||
4167 | return rc; | ||
4168 | } | ||
4169 | |||
4170 | /* | ||
4171 | ** This function is used to mark a data-page as writable. It uses | ||
4172 | ** pager_write() to open a journal file (if it is not already open) | ||
4173 | ** and write the page *pData to the journal. | ||
4174 | ** | ||
4175 | ** The difference between this function and pager_write() is that this | ||
4176 | ** function also deals with the special case where 2 or more pages | ||
4177 | ** fit on a single disk sector. In this case all co-resident pages | ||
4178 | ** must have been written to the journal file before returning. | ||
4179 | */ | ||
4180 | int sqlite3PagerWrite(DbPage *pDbPage){ | ||
4181 | int rc = SQLITE_OK; | ||
4182 | |||
4183 | PgHdr *pPg = pDbPage; | ||
4184 | Pager *pPager = pPg->pPager; | ||
4185 | Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); | ||
4186 | |||
4187 | pagerEnter(pPager); | ||
4188 | if( !MEMDB && nPagePerSector>1 ){ | ||
4189 | Pgno nPageCount; /* Total number of pages in database file */ | ||
4190 | Pgno pg1; /* First page of the sector pPg is located on. */ | ||
4191 | int nPage; /* Number of pages starting at pg1 to journal */ | ||
4192 | int ii; | ||
4193 | int needSync = 0; | ||
4194 | |||
4195 | /* Set the doNotSync flag to 1. This is because we cannot allow a journal | ||
4196 | ** header to be written between the pages journaled by this function. | ||
4197 | */ | ||
4198 | assert( pPager->doNotSync==0 ); | ||
4199 | pPager->doNotSync = 1; | ||
4200 | |||
4201 | /* This trick assumes that both the page-size and sector-size are | ||
4202 | ** an integer power of 2. It sets variable pg1 to the identifier | ||
4203 | ** of the first page of the sector pPg is located on. | ||
4204 | */ | ||
4205 | pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; | ||
4206 | |||
4207 | nPageCount = sqlite3PagerPagecount(pPager); | ||
4208 | if( pPg->pgno>nPageCount ){ | ||
4209 | nPage = (pPg->pgno - pg1)+1; | ||
4210 | }else if( (pg1+nPagePerSector-1)>nPageCount ){ | ||
4211 | nPage = nPageCount+1-pg1; | ||
4212 | }else{ | ||
4213 | nPage = nPagePerSector; | ||
4214 | } | ||
4215 | assert(nPage>0); | ||
4216 | assert(pg1<=pPg->pgno); | ||
4217 | assert((pg1+nPage)>pPg->pgno); | ||
4218 | |||
4219 | for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){ | ||
4220 | Pgno pg = pg1+ii; | ||
4221 | PgHdr *pPage; | ||
4222 | if( !pPager->aInJournal || pg==pPg->pgno || | ||
4223 | pg>pPager->origDbSize || !(pPager->aInJournal[pg/8]&(1<<(pg&7))) | ||
4224 | ) { | ||
4225 | if( pg!=PAGER_MJ_PGNO(pPager) ){ | ||
4226 | rc = sqlite3PagerGet(pPager, pg, &pPage); | ||
4227 | if( rc==SQLITE_OK ){ | ||
4228 | rc = pager_write(pPage); | ||
4229 | if( pPage->needSync ){ | ||
4230 | needSync = 1; | ||
4231 | } | ||
4232 | sqlite3PagerUnref(pPage); | ||
4233 | } | ||
4234 | } | ||
4235 | }else if( (pPage = pager_lookup(pPager, pg)) ){ | ||
4236 | if( pPage->needSync ){ | ||
4237 | needSync = 1; | ||
4238 | } | ||
4239 | } | ||
4240 | } | ||
4241 | |||
4242 | /* If the PgHdr.needSync flag is set for any of the nPage pages | ||
4243 | ** starting at pg1, then it needs to be set for all of them. Because | ||
4244 | ** writing to any of these nPage pages may damage the others, the | ||
4245 | ** journal file must contain sync()ed copies of all of them | ||
4246 | ** before any of them can be written out to the database file. | ||
4247 | */ | ||
4248 | if( needSync ){ | ||
4249 | for(ii=0; ii<nPage && needSync; ii++){ | ||
4250 | PgHdr *pPage = pager_lookup(pPager, pg1+ii); | ||
4251 | if( pPage ) pPage->needSync = 1; | ||
4252 | } | ||
4253 | assert(pPager->needSync); | ||
4254 | } | ||
4255 | |||
4256 | assert( pPager->doNotSync==1 ); | ||
4257 | pPager->doNotSync = 0; | ||
4258 | }else{ | ||
4259 | rc = pager_write(pDbPage); | ||
4260 | } | ||
4261 | pagerLeave(pPager); | ||
4262 | return rc; | ||
4263 | } | ||
4264 | |||
4265 | /* | ||
4266 | ** Return TRUE if the page given in the argument was previously passed | ||
4267 | ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok | ||
4268 | ** to change the content of the page. | ||
4269 | */ | ||
4270 | #ifndef NDEBUG | ||
4271 | int sqlite3PagerIswriteable(DbPage *pPg){ | ||
4272 | return pPg->dirty; | ||
4273 | } | ||
4274 | #endif | ||
4275 | |||
4276 | #ifndef SQLITE_OMIT_VACUUM | ||
4277 | /* | ||
4278 | ** Replace the content of a single page with the information in the third | ||
4279 | ** argument. | ||
4280 | */ | ||
4281 | int sqlite3PagerOverwrite(Pager *pPager, Pgno pgno, void *pData){ | ||
4282 | PgHdr *pPg; | ||
4283 | int rc; | ||
4284 | |||
4285 | pagerEnter(pPager); | ||
4286 | rc = sqlite3PagerGet(pPager, pgno, &pPg); | ||
4287 | if( rc==SQLITE_OK ){ | ||
4288 | rc = sqlite3PagerWrite(pPg); | ||
4289 | if( rc==SQLITE_OK ){ | ||
4290 | memcpy(sqlite3PagerGetData(pPg), pData, pPager->pageSize); | ||
4291 | } | ||
4292 | sqlite3PagerUnref(pPg); | ||
4293 | } | ||
4294 | pagerLeave(pPager); | ||
4295 | return rc; | ||
4296 | } | ||
4297 | #endif | ||
4298 | |||
4299 | /* | ||
4300 | ** A call to this routine tells the pager that it is not necessary to | ||
4301 | ** write the information on page pPg back to the disk, even though | ||
4302 | ** that page might be marked as dirty. | ||
4303 | ** | ||
4304 | ** The overlying software layer calls this routine when all of the data | ||
4305 | ** on the given page is unused. The pager marks the page as clean so | ||
4306 | ** that it does not get written to disk. | ||
4307 | ** | ||
4308 | ** Tests show that this optimization, together with the | ||
4309 | ** sqlite3PagerDontRollback() below, more than double the speed | ||
4310 | ** of large INSERT operations and quadruple the speed of large DELETEs. | ||
4311 | ** | ||
4312 | ** When this routine is called, set the alwaysRollback flag to true. | ||
4313 | ** Subsequent calls to sqlite3PagerDontRollback() for the same page | ||
4314 | ** will thereafter be ignored. This is necessary to avoid a problem | ||
4315 | ** where a page with data is added to the freelist during one part of | ||
4316 | ** a transaction then removed from the freelist during a later part | ||
4317 | ** of the same transaction and reused for some other purpose. When it | ||
4318 | ** is first added to the freelist, this routine is called. When reused, | ||
4319 | ** the sqlite3PagerDontRollback() routine is called. But because the | ||
4320 | ** page contains critical data, we still need to be sure it gets | ||
4321 | ** rolled back in spite of the sqlite3PagerDontRollback() call. | ||
4322 | */ | ||
4323 | void sqlite3PagerDontWrite(DbPage *pDbPage){ | ||
4324 | PgHdr *pPg = pDbPage; | ||
4325 | Pager *pPager = pPg->pPager; | ||
4326 | |||
4327 | if( MEMDB ) return; | ||
4328 | pagerEnter(pPager); | ||
4329 | pPg->alwaysRollback = 1; | ||
4330 | if( pPg->dirty && !pPager->stmtInUse ){ | ||
4331 | assert( pPager->state>=PAGER_SHARED ); | ||
4332 | if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){ | ||
4333 | /* If this pages is the last page in the file and the file has grown | ||
4334 | ** during the current transaction, then do NOT mark the page as clean. | ||
4335 | ** When the database file grows, we must make sure that the last page | ||
4336 | ** gets written at least once so that the disk file will be the correct | ||
4337 | ** size. If you do not write this page and the size of the file | ||
4338 | ** on the disk ends up being too small, that can lead to database | ||
4339 | ** corruption during the next transaction. | ||
4340 | */ | ||
4341 | }else{ | ||
4342 | PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)); | ||
4343 | IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) | ||
4344 | makeClean(pPg); | ||
4345 | #ifdef SQLITE_CHECK_PAGES | ||
4346 | pPg->pageHash = pager_pagehash(pPg); | ||
4347 | #endif | ||
4348 | } | ||
4349 | } | ||
4350 | pagerLeave(pPager); | ||
4351 | } | ||
4352 | |||
4353 | /* | ||
4354 | ** A call to this routine tells the pager that if a rollback occurs, | ||
4355 | ** it is not necessary to restore the data on the given page. This | ||
4356 | ** means that the pager does not have to record the given page in the | ||
4357 | ** rollback journal. | ||
4358 | ** | ||
4359 | ** If we have not yet actually read the content of this page (if | ||
4360 | ** the PgHdr.needRead flag is set) then this routine acts as a promise | ||
4361 | ** that we will never need to read the page content in the future. | ||
4362 | ** so the needRead flag can be cleared at this point. | ||
4363 | */ | ||
4364 | void sqlite3PagerDontRollback(DbPage *pPg){ | ||
4365 | Pager *pPager = pPg->pPager; | ||
4366 | |||
4367 | pagerEnter(pPager); | ||
4368 | assert( pPager->state>=PAGER_RESERVED ); | ||
4369 | if( pPager->journalOpen==0 ) return; | ||
4370 | if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return; | ||
4371 | if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){ | ||
4372 | assert( pPager->aInJournal!=0 ); | ||
4373 | pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4374 | pPg->inJournal = 1; | ||
4375 | pPg->needRead = 0; | ||
4376 | if( pPager->stmtInUse ){ | ||
4377 | pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4378 | } | ||
4379 | PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager)); | ||
4380 | IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno)) | ||
4381 | } | ||
4382 | if( pPager->stmtInUse | ||
4383 | && !pageInStatement(pPg) | ||
4384 | && (int)pPg->pgno<=pPager->stmtSize | ||
4385 | ){ | ||
4386 | assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); | ||
4387 | assert( pPager->aInStmt!=0 ); | ||
4388 | pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); | ||
4389 | } | ||
4390 | pagerLeave(pPager); | ||
4391 | } | ||
4392 | |||
4393 | |||
4394 | /* | ||
4395 | ** This routine is called to increment the database file change-counter, | ||
4396 | ** stored at byte 24 of the pager file. | ||
4397 | */ | ||
4398 | static int pager_incr_changecounter(Pager *pPager, int isDirect){ | ||
4399 | PgHdr *pPgHdr; | ||
4400 | u32 change_counter; | ||
4401 | int rc = SQLITE_OK; | ||
4402 | |||
4403 | if( !pPager->changeCountDone ){ | ||
4404 | /* Open page 1 of the file for writing. */ | ||
4405 | rc = sqlite3PagerGet(pPager, 1, &pPgHdr); | ||
4406 | if( rc!=SQLITE_OK ) return rc; | ||
4407 | |||
4408 | if( !isDirect ){ | ||
4409 | rc = sqlite3PagerWrite(pPgHdr); | ||
4410 | if( rc!=SQLITE_OK ){ | ||
4411 | sqlite3PagerUnref(pPgHdr); | ||
4412 | return rc; | ||
4413 | } | ||
4414 | } | ||
4415 | |||
4416 | /* Increment the value just read and write it back to byte 24. */ | ||
4417 | change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers); | ||
4418 | change_counter++; | ||
4419 | put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter); | ||
4420 | |||
4421 | if( isDirect && pPager->fd->pMethods ){ | ||
4422 | const void *zBuf = PGHDR_TO_DATA(pPgHdr); | ||
4423 | rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); | ||
4424 | } | ||
4425 | |||
4426 | /* Release the page reference. */ | ||
4427 | sqlite3PagerUnref(pPgHdr); | ||
4428 | pPager->changeCountDone = 1; | ||
4429 | } | ||
4430 | return rc; | ||
4431 | } | ||
4432 | |||
4433 | /* | ||
4434 | ** Sync the database file for the pager pPager. zMaster points to the name | ||
4435 | ** of a master journal file that should be written into the individual | ||
4436 | ** journal file. zMaster may be NULL, which is interpreted as no master | ||
4437 | ** journal (a single database transaction). | ||
4438 | ** | ||
4439 | ** This routine ensures that the journal is synced, all dirty pages written | ||
4440 | ** to the database file and the database file synced. The only thing that | ||
4441 | ** remains to commit the transaction is to delete the journal file (or | ||
4442 | ** master journal file if specified). | ||
4443 | ** | ||
4444 | ** Note that if zMaster==NULL, this does not overwrite a previous value | ||
4445 | ** passed to an sqlite3PagerCommitPhaseOne() call. | ||
4446 | ** | ||
4447 | ** If parameter nTrunc is non-zero, then the pager file is truncated to | ||
4448 | ** nTrunc pages (this is used by auto-vacuum databases). | ||
4449 | */ | ||
4450 | int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){ | ||
4451 | int rc = SQLITE_OK; | ||
4452 | |||
4453 | PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", | ||
4454 | pPager->zFilename, zMaster, nTrunc); | ||
4455 | pagerEnter(pPager); | ||
4456 | |||
4457 | /* If this is an in-memory db, or no pages have been written to, or this | ||
4458 | ** function has already been called, it is a no-op. | ||
4459 | */ | ||
4460 | if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){ | ||
4461 | PgHdr *pPg; | ||
4462 | |||
4463 | #ifdef SQLITE_ENABLE_ATOMIC_WRITE | ||
4464 | /* The atomic-write optimization can be used if all of the | ||
4465 | ** following are true: | ||
4466 | ** | ||
4467 | ** + The file-system supports the atomic-write property for | ||
4468 | ** blocks of size page-size, and | ||
4469 | ** + This commit is not part of a multi-file transaction, and | ||
4470 | ** + Exactly one page has been modified and store in the journal file. | ||
4471 | ** | ||
4472 | ** If the optimization can be used, then the journal file will never | ||
4473 | ** be created for this transaction. | ||
4474 | */ | ||
4475 | int useAtomicWrite = ( | ||
4476 | !zMaster && | ||
4477 | pPager->journalOff==jrnlBufferSize(pPager) && | ||
4478 | nTrunc==0 && | ||
4479 | (0==pPager->pDirty || 0==pPager->pDirty->pDirty) | ||
4480 | ); | ||
4481 | if( useAtomicWrite ){ | ||
4482 | /* Update the nRec field in the journal file. */ | ||
4483 | int offset = pPager->journalHdr + sizeof(aJournalMagic); | ||
4484 | assert(pPager->nRec==1); | ||
4485 | rc = write32bits(pPager->jfd, offset, pPager->nRec); | ||
4486 | |||
4487 | /* Update the db file change counter. The following call will modify | ||
4488 | ** the in-memory representation of page 1 to include the updated | ||
4489 | ** change counter and then write page 1 directly to the database | ||
4490 | ** file. Because of the atomic-write property of the host file-system, | ||
4491 | ** this is safe. | ||
4492 | */ | ||
4493 | if( rc==SQLITE_OK ){ | ||
4494 | rc = pager_incr_changecounter(pPager, 1); | ||
4495 | } | ||
4496 | }else{ | ||
4497 | rc = sqlite3JournalCreate(pPager->jfd); | ||
4498 | } | ||
4499 | |||
4500 | if( !useAtomicWrite && rc==SQLITE_OK ) | ||
4501 | #endif | ||
4502 | |||
4503 | /* If a master journal file name has already been written to the | ||
4504 | ** journal file, then no sync is required. This happens when it is | ||
4505 | ** written, then the process fails to upgrade from a RESERVED to an | ||
4506 | ** EXCLUSIVE lock. The next time the process tries to commit the | ||
4507 | ** transaction the m-j name will have already been written. | ||
4508 | */ | ||
4509 | if( !pPager->setMaster ){ | ||
4510 | assert( pPager->journalOpen ); | ||
4511 | rc = pager_incr_changecounter(pPager, 0); | ||
4512 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4513 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4514 | if( nTrunc!=0 ){ | ||
4515 | /* If this transaction has made the database smaller, then all pages | ||
4516 | ** being discarded by the truncation must be written to the journal | ||
4517 | ** file. | ||
4518 | */ | ||
4519 | Pgno i; | ||
4520 | int iSkip = PAGER_MJ_PGNO(pPager); | ||
4521 | for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){ | ||
4522 | if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){ | ||
4523 | rc = sqlite3PagerGet(pPager, i, &pPg); | ||
4524 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4525 | rc = sqlite3PagerWrite(pPg); | ||
4526 | sqlite3PagerUnref(pPg); | ||
4527 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4528 | } | ||
4529 | } | ||
4530 | } | ||
4531 | #endif | ||
4532 | rc = writeMasterJournal(pPager, zMaster); | ||
4533 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4534 | rc = syncJournal(pPager); | ||
4535 | } | ||
4536 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4537 | |||
4538 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4539 | if( nTrunc!=0 ){ | ||
4540 | rc = sqlite3PagerTruncate(pPager, nTrunc); | ||
4541 | if( rc!=SQLITE_OK ) goto sync_exit; | ||
4542 | } | ||
4543 | #endif | ||
4544 | |||
4545 | /* Write all dirty pages to the database file */ | ||
4546 | pPg = pager_get_all_dirty_pages(pPager); | ||
4547 | rc = pager_write_pagelist(pPg); | ||
4548 | if( rc!=SQLITE_OK ){ | ||
4549 | while( pPg && !pPg->dirty ){ pPg = pPg->pDirty; } | ||
4550 | pPager->pDirty = pPg; | ||
4551 | goto sync_exit; | ||
4552 | } | ||
4553 | pPager->pDirty = 0; | ||
4554 | |||
4555 | /* Sync the database file. */ | ||
4556 | if( !pPager->noSync ){ | ||
4557 | rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); | ||
4558 | } | ||
4559 | IOTRACE(("DBSYNC %p\n", pPager)) | ||
4560 | |||
4561 | pPager->state = PAGER_SYNCED; | ||
4562 | }else if( MEMDB && nTrunc!=0 ){ | ||
4563 | rc = sqlite3PagerTruncate(pPager, nTrunc); | ||
4564 | } | ||
4565 | |||
4566 | sync_exit: | ||
4567 | if( rc==SQLITE_IOERR_BLOCKED ){ | ||
4568 | /* pager_incr_changecounter() may attempt to obtain an exclusive | ||
4569 | * lock to spill the cache and return IOERR_BLOCKED. But since | ||
4570 | * there is no chance the cache is inconsistent, it's | ||
4571 | * better to return SQLITE_BUSY. | ||
4572 | */ | ||
4573 | rc = SQLITE_BUSY; | ||
4574 | } | ||
4575 | pagerLeave(pPager); | ||
4576 | return rc; | ||
4577 | } | ||
4578 | |||
4579 | |||
4580 | /* | ||
4581 | ** Commit all changes to the database and release the write lock. | ||
4582 | ** | ||
4583 | ** If the commit fails for any reason, a rollback attempt is made | ||
4584 | ** and an error code is returned. If the commit worked, SQLITE_OK | ||
4585 | ** is returned. | ||
4586 | */ | ||
4587 | int sqlite3PagerCommitPhaseTwo(Pager *pPager){ | ||
4588 | int rc; | ||
4589 | PgHdr *pPg; | ||
4590 | |||
4591 | if( pPager->errCode ){ | ||
4592 | return pPager->errCode; | ||
4593 | } | ||
4594 | if( pPager->state<PAGER_RESERVED ){ | ||
4595 | return SQLITE_ERROR; | ||
4596 | } | ||
4597 | pagerEnter(pPager); | ||
4598 | PAGERTRACE2("COMMIT %d\n", PAGERID(pPager)); | ||
4599 | if( MEMDB ){ | ||
4600 | pPg = pager_get_all_dirty_pages(pPager); | ||
4601 | while( pPg ){ | ||
4602 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4603 | clearHistory(pHist); | ||
4604 | pPg->dirty = 0; | ||
4605 | pPg->inJournal = 0; | ||
4606 | pHist->inStmt = 0; | ||
4607 | pPg->needSync = 0; | ||
4608 | pHist->pPrevStmt = pHist->pNextStmt = 0; | ||
4609 | pPg = pPg->pDirty; | ||
4610 | } | ||
4611 | pPager->pDirty = 0; | ||
4612 | #ifndef NDEBUG | ||
4613 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
4614 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4615 | assert( !pPg->alwaysRollback ); | ||
4616 | assert( !pHist->pOrig ); | ||
4617 | assert( !pHist->pStmt ); | ||
4618 | } | ||
4619 | #endif | ||
4620 | pPager->pStmt = 0; | ||
4621 | pPager->state = PAGER_SHARED; | ||
4622 | return SQLITE_OK; | ||
4623 | } | ||
4624 | assert( pPager->journalOpen || !pPager->dirtyCache ); | ||
4625 | assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache ); | ||
4626 | rc = pager_end_transaction(pPager); | ||
4627 | rc = pager_error(pPager, rc); | ||
4628 | pagerLeave(pPager); | ||
4629 | return rc; | ||
4630 | } | ||
4631 | |||
4632 | /* | ||
4633 | ** Rollback all changes. The database falls back to PAGER_SHARED mode. | ||
4634 | ** All in-memory cache pages revert to their original data contents. | ||
4635 | ** The journal is deleted. | ||
4636 | ** | ||
4637 | ** This routine cannot fail unless some other process is not following | ||
4638 | ** the correct locking protocol or unless some other | ||
4639 | ** process is writing trash into the journal file (SQLITE_CORRUPT) or | ||
4640 | ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error | ||
4641 | ** codes are returned for all these occasions. Otherwise, | ||
4642 | ** SQLITE_OK is returned. | ||
4643 | */ | ||
4644 | int sqlite3PagerRollback(Pager *pPager){ | ||
4645 | int rc; | ||
4646 | PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager)); | ||
4647 | if( MEMDB ){ | ||
4648 | PgHdr *p; | ||
4649 | for(p=pPager->pAll; p; p=p->pNextAll){ | ||
4650 | PgHistory *pHist; | ||
4651 | assert( !p->alwaysRollback ); | ||
4652 | if( !p->dirty ){ | ||
4653 | assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig ); | ||
4654 | assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt ); | ||
4655 | continue; | ||
4656 | } | ||
4657 | |||
4658 | pHist = PGHDR_TO_HIST(p, pPager); | ||
4659 | if( pHist->pOrig ){ | ||
4660 | memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize); | ||
4661 | PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager)); | ||
4662 | }else{ | ||
4663 | PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager)); | ||
4664 | } | ||
4665 | clearHistory(pHist); | ||
4666 | p->dirty = 0; | ||
4667 | p->inJournal = 0; | ||
4668 | pHist->inStmt = 0; | ||
4669 | pHist->pPrevStmt = pHist->pNextStmt = 0; | ||
4670 | if( pPager->xReiniter ){ | ||
4671 | pPager->xReiniter(p, pPager->pageSize); | ||
4672 | } | ||
4673 | } | ||
4674 | pPager->pDirty = 0; | ||
4675 | pPager->pStmt = 0; | ||
4676 | pPager->dbSize = pPager->origDbSize; | ||
4677 | pager_truncate_cache(pPager); | ||
4678 | pPager->stmtInUse = 0; | ||
4679 | pPager->state = PAGER_SHARED; | ||
4680 | return SQLITE_OK; | ||
4681 | } | ||
4682 | |||
4683 | pagerEnter(pPager); | ||
4684 | if( !pPager->dirtyCache || !pPager->journalOpen ){ | ||
4685 | rc = pager_end_transaction(pPager); | ||
4686 | pagerLeave(pPager); | ||
4687 | return rc; | ||
4688 | } | ||
4689 | |||
4690 | if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ | ||
4691 | if( pPager->state>=PAGER_EXCLUSIVE ){ | ||
4692 | pager_playback(pPager, 0); | ||
4693 | } | ||
4694 | pagerLeave(pPager); | ||
4695 | return pPager->errCode; | ||
4696 | } | ||
4697 | if( pPager->state==PAGER_RESERVED ){ | ||
4698 | int rc2; | ||
4699 | rc = pager_playback(pPager, 0); | ||
4700 | rc2 = pager_end_transaction(pPager); | ||
4701 | if( rc==SQLITE_OK ){ | ||
4702 | rc = rc2; | ||
4703 | } | ||
4704 | }else{ | ||
4705 | rc = pager_playback(pPager, 0); | ||
4706 | } | ||
4707 | /* pager_reset(pPager); */ | ||
4708 | pPager->dbSize = -1; | ||
4709 | |||
4710 | /* If an error occurs during a ROLLBACK, we can no longer trust the pager | ||
4711 | ** cache. So call pager_error() on the way out to make any error | ||
4712 | ** persistent. | ||
4713 | */ | ||
4714 | rc = pager_error(pPager, rc); | ||
4715 | pagerLeave(pPager); | ||
4716 | return rc; | ||
4717 | } | ||
4718 | |||
4719 | /* | ||
4720 | ** Return TRUE if the database file is opened read-only. Return FALSE | ||
4721 | ** if the database is (in theory) writable. | ||
4722 | */ | ||
4723 | int sqlite3PagerIsreadonly(Pager *pPager){ | ||
4724 | return pPager->readOnly; | ||
4725 | } | ||
4726 | |||
4727 | /* | ||
4728 | ** Return the number of references to the pager. | ||
4729 | */ | ||
4730 | int sqlite3PagerRefcount(Pager *pPager){ | ||
4731 | return pPager->nRef; | ||
4732 | } | ||
4733 | |||
4734 | #ifdef SQLITE_TEST | ||
4735 | /* | ||
4736 | ** This routine is used for testing and analysis only. | ||
4737 | */ | ||
4738 | int *sqlite3PagerStats(Pager *pPager){ | ||
4739 | static int a[11]; | ||
4740 | a[0] = pPager->nRef; | ||
4741 | a[1] = pPager->nPage; | ||
4742 | a[2] = pPager->mxPage; | ||
4743 | a[3] = pPager->dbSize; | ||
4744 | a[4] = pPager->state; | ||
4745 | a[5] = pPager->errCode; | ||
4746 | a[6] = pPager->nHit; | ||
4747 | a[7] = pPager->nMiss; | ||
4748 | a[8] = 0; /* Used to be pPager->nOvfl */ | ||
4749 | a[9] = pPager->nRead; | ||
4750 | a[10] = pPager->nWrite; | ||
4751 | return a; | ||
4752 | } | ||
4753 | #endif | ||
4754 | |||
4755 | /* | ||
4756 | ** Set the statement rollback point. | ||
4757 | ** | ||
4758 | ** This routine should be called with the transaction journal already | ||
4759 | ** open. A new statement journal is created that can be used to rollback | ||
4760 | ** changes of a single SQL command within a larger transaction. | ||
4761 | */ | ||
4762 | static int pagerStmtBegin(Pager *pPager){ | ||
4763 | int rc; | ||
4764 | assert( !pPager->stmtInUse ); | ||
4765 | assert( pPager->state>=PAGER_SHARED ); | ||
4766 | assert( pPager->dbSize>=0 ); | ||
4767 | PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager)); | ||
4768 | if( MEMDB ){ | ||
4769 | pPager->stmtInUse = 1; | ||
4770 | pPager->stmtSize = pPager->dbSize; | ||
4771 | return SQLITE_OK; | ||
4772 | } | ||
4773 | if( !pPager->journalOpen ){ | ||
4774 | pPager->stmtAutoopen = 1; | ||
4775 | return SQLITE_OK; | ||
4776 | } | ||
4777 | assert( pPager->journalOpen ); | ||
4778 | pagerLeave(pPager); | ||
4779 | assert( pPager->aInStmt==0 ); | ||
4780 | pPager->aInStmt = sqlite3MallocZero( pPager->dbSize/8 + 1 ); | ||
4781 | pagerEnter(pPager); | ||
4782 | if( pPager->aInStmt==0 ){ | ||
4783 | /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */ | ||
4784 | return SQLITE_NOMEM; | ||
4785 | } | ||
4786 | #ifndef NDEBUG | ||
4787 | rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize); | ||
4788 | if( rc ) goto stmt_begin_failed; | ||
4789 | assert( pPager->stmtJSize == pPager->journalOff ); | ||
4790 | #endif | ||
4791 | pPager->stmtJSize = pPager->journalOff; | ||
4792 | pPager->stmtSize = pPager->dbSize; | ||
4793 | pPager->stmtHdrOff = 0; | ||
4794 | pPager->stmtCksum = pPager->cksumInit; | ||
4795 | if( !pPager->stmtOpen ){ | ||
4796 | rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->stfd, pPager->zStmtJrnl, | ||
4797 | SQLITE_OPEN_SUBJOURNAL); | ||
4798 | if( rc ){ | ||
4799 | goto stmt_begin_failed; | ||
4800 | } | ||
4801 | pPager->stmtOpen = 1; | ||
4802 | pPager->stmtNRec = 0; | ||
4803 | } | ||
4804 | pPager->stmtInUse = 1; | ||
4805 | return SQLITE_OK; | ||
4806 | |||
4807 | stmt_begin_failed: | ||
4808 | if( pPager->aInStmt ){ | ||
4809 | sqlite3_free(pPager->aInStmt); | ||
4810 | pPager->aInStmt = 0; | ||
4811 | } | ||
4812 | return rc; | ||
4813 | } | ||
4814 | int sqlite3PagerStmtBegin(Pager *pPager){ | ||
4815 | int rc; | ||
4816 | pagerEnter(pPager); | ||
4817 | rc = pagerStmtBegin(pPager); | ||
4818 | pagerLeave(pPager); | ||
4819 | return rc; | ||
4820 | } | ||
4821 | |||
4822 | /* | ||
4823 | ** Commit a statement. | ||
4824 | */ | ||
4825 | int sqlite3PagerStmtCommit(Pager *pPager){ | ||
4826 | pagerEnter(pPager); | ||
4827 | if( pPager->stmtInUse ){ | ||
4828 | PgHdr *pPg, *pNext; | ||
4829 | PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager)); | ||
4830 | if( !MEMDB ){ | ||
4831 | /* sqlite3OsTruncate(pPager->stfd, 0); */ | ||
4832 | sqlite3_free( pPager->aInStmt ); | ||
4833 | pPager->aInStmt = 0; | ||
4834 | }else{ | ||
4835 | for(pPg=pPager->pStmt; pPg; pPg=pNext){ | ||
4836 | PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4837 | pNext = pHist->pNextStmt; | ||
4838 | assert( pHist->inStmt ); | ||
4839 | pHist->inStmt = 0; | ||
4840 | pHist->pPrevStmt = pHist->pNextStmt = 0; | ||
4841 | sqlite3_free(pHist->pStmt); | ||
4842 | pHist->pStmt = 0; | ||
4843 | } | ||
4844 | } | ||
4845 | pPager->stmtNRec = 0; | ||
4846 | pPager->stmtInUse = 0; | ||
4847 | pPager->pStmt = 0; | ||
4848 | } | ||
4849 | pPager->stmtAutoopen = 0; | ||
4850 | pagerLeave(pPager); | ||
4851 | return SQLITE_OK; | ||
4852 | } | ||
4853 | |||
4854 | /* | ||
4855 | ** Rollback a statement. | ||
4856 | */ | ||
4857 | int sqlite3PagerStmtRollback(Pager *pPager){ | ||
4858 | int rc; | ||
4859 | pagerEnter(pPager); | ||
4860 | if( pPager->stmtInUse ){ | ||
4861 | PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager)); | ||
4862 | if( MEMDB ){ | ||
4863 | PgHdr *pPg; | ||
4864 | PgHistory *pHist; | ||
4865 | for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){ | ||
4866 | pHist = PGHDR_TO_HIST(pPg, pPager); | ||
4867 | if( pHist->pStmt ){ | ||
4868 | memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize); | ||
4869 | sqlite3_free(pHist->pStmt); | ||
4870 | pHist->pStmt = 0; | ||
4871 | } | ||
4872 | } | ||
4873 | pPager->dbSize = pPager->stmtSize; | ||
4874 | pager_truncate_cache(pPager); | ||
4875 | rc = SQLITE_OK; | ||
4876 | }else{ | ||
4877 | rc = pager_stmt_playback(pPager); | ||
4878 | } | ||
4879 | sqlite3PagerStmtCommit(pPager); | ||
4880 | }else{ | ||
4881 | rc = SQLITE_OK; | ||
4882 | } | ||
4883 | pPager->stmtAutoopen = 0; | ||
4884 | pagerLeave(pPager); | ||
4885 | return rc; | ||
4886 | } | ||
4887 | |||
4888 | /* | ||
4889 | ** Return the full pathname of the database file. | ||
4890 | */ | ||
4891 | const char *sqlite3PagerFilename(Pager *pPager){ | ||
4892 | return pPager->zFilename; | ||
4893 | } | ||
4894 | |||
4895 | /* | ||
4896 | ** Return the VFS structure for the pager. | ||
4897 | */ | ||
4898 | const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ | ||
4899 | return pPager->pVfs; | ||
4900 | } | ||
4901 | |||
4902 | /* | ||
4903 | ** Return the file handle for the database file associated | ||
4904 | ** with the pager. This might return NULL if the file has | ||
4905 | ** not yet been opened. | ||
4906 | */ | ||
4907 | sqlite3_file *sqlite3PagerFile(Pager *pPager){ | ||
4908 | return pPager->fd; | ||
4909 | } | ||
4910 | |||
4911 | /* | ||
4912 | ** Return the directory of the database file. | ||
4913 | */ | ||
4914 | const char *sqlite3PagerDirname(Pager *pPager){ | ||
4915 | return pPager->zDirectory; | ||
4916 | } | ||
4917 | |||
4918 | /* | ||
4919 | ** Return the full pathname of the journal file. | ||
4920 | */ | ||
4921 | const char *sqlite3PagerJournalname(Pager *pPager){ | ||
4922 | return pPager->zJournal; | ||
4923 | } | ||
4924 | |||
4925 | /* | ||
4926 | ** Return true if fsync() calls are disabled for this pager. Return FALSE | ||
4927 | ** if fsync()s are executed normally. | ||
4928 | */ | ||
4929 | int sqlite3PagerNosync(Pager *pPager){ | ||
4930 | return pPager->noSync; | ||
4931 | } | ||
4932 | |||
4933 | #ifdef SQLITE_HAS_CODEC | ||
4934 | /* | ||
4935 | ** Set the codec for this pager | ||
4936 | */ | ||
4937 | void sqlite3PagerSetCodec( | ||
4938 | Pager *pPager, | ||
4939 | void *(*xCodec)(void*,void*,Pgno,int), | ||
4940 | void *pCodecArg | ||
4941 | ){ | ||
4942 | pPager->xCodec = xCodec; | ||
4943 | pPager->pCodecArg = pCodecArg; | ||
4944 | } | ||
4945 | #endif | ||
4946 | |||
4947 | #ifndef SQLITE_OMIT_AUTOVACUUM | ||
4948 | /* | ||
4949 | ** Move the page pPg to location pgno in the file. | ||
4950 | ** | ||
4951 | ** There must be no references to the page previously located at | ||
4952 | ** pgno (which we call pPgOld) though that page is allowed to be | ||
4953 | ** in cache. If the page previous located at pgno is not already | ||
4954 | ** in the rollback journal, it is not put there by by this routine. | ||
4955 | ** | ||
4956 | ** References to the page pPg remain valid. Updating any | ||
4957 | ** meta-data associated with pPg (i.e. data stored in the nExtra bytes | ||
4958 | ** allocated along with the page) is the responsibility of the caller. | ||
4959 | ** | ||
4960 | ** A transaction must be active when this routine is called. It used to be | ||
4961 | ** required that a statement transaction was not active, but this restriction | ||
4962 | ** has been removed (CREATE INDEX needs to move a page when a statement | ||
4963 | ** transaction is active). | ||
4964 | */ | ||
4965 | int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno){ | ||
4966 | PgHdr *pPgOld; /* The page being overwritten. */ | ||
4967 | int h; | ||
4968 | Pgno needSyncPgno = 0; | ||
4969 | |||
4970 | pagerEnter(pPager); | ||
4971 | assert( pPg->nRef>0 ); | ||
4972 | |||
4973 | PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", | ||
4974 | PAGERID(pPager), pPg->pgno, pPg->needSync, pgno); | ||
4975 | IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) | ||
4976 | |||
4977 | pager_get_content(pPg); | ||
4978 | if( pPg->needSync ){ | ||
4979 | needSyncPgno = pPg->pgno; | ||
4980 | assert( pPg->inJournal || (int)pgno>pPager->origDbSize ); | ||
4981 | assert( pPg->dirty ); | ||
4982 | assert( pPager->needSync ); | ||
4983 | } | ||
4984 | |||
4985 | /* Unlink pPg from it's hash-chain */ | ||
4986 | unlinkHashChain(pPager, pPg); | ||
4987 | |||
4988 | /* If the cache contains a page with page-number pgno, remove it | ||
4989 | ** from it's hash chain. Also, if the PgHdr.needSync was set for | ||
4990 | ** page pgno before the 'move' operation, it needs to be retained | ||
4991 | ** for the page moved there. | ||
4992 | */ | ||
4993 | pPg->needSync = 0; | ||
4994 | pPgOld = pager_lookup(pPager, pgno); | ||
4995 | if( pPgOld ){ | ||
4996 | assert( pPgOld->nRef==0 ); | ||
4997 | unlinkHashChain(pPager, pPgOld); | ||
4998 | makeClean(pPgOld); | ||
4999 | pPg->needSync = pPgOld->needSync; | ||
5000 | }else{ | ||
5001 | pPg->needSync = 0; | ||
5002 | } | ||
5003 | if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){ | ||
5004 | pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0; | ||
5005 | }else{ | ||
5006 | pPg->inJournal = 0; | ||
5007 | assert( pPg->needSync==0 || (int)pgno>pPager->origDbSize ); | ||
5008 | } | ||
5009 | |||
5010 | /* Change the page number for pPg and insert it into the new hash-chain. */ | ||
5011 | assert( pgno!=0 ); | ||
5012 | pPg->pgno = pgno; | ||
5013 | h = pgno & (pPager->nHash-1); | ||
5014 | if( pPager->aHash[h] ){ | ||
5015 | assert( pPager->aHash[h]->pPrevHash==0 ); | ||
5016 | pPager->aHash[h]->pPrevHash = pPg; | ||
5017 | } | ||
5018 | pPg->pNextHash = pPager->aHash[h]; | ||
5019 | pPager->aHash[h] = pPg; | ||
5020 | pPg->pPrevHash = 0; | ||
5021 | |||
5022 | makeDirty(pPg); | ||
5023 | pPager->dirtyCache = 1; | ||
5024 | |||
5025 | if( needSyncPgno ){ | ||
5026 | /* If needSyncPgno is non-zero, then the journal file needs to be | ||
5027 | ** sync()ed before any data is written to database file page needSyncPgno. | ||
5028 | ** Currently, no such page exists in the page-cache and the | ||
5029 | ** Pager.aInJournal bit has been set. This needs to be remedied by loading | ||
5030 | ** the page into the pager-cache and setting the PgHdr.needSync flag. | ||
5031 | ** | ||
5032 | ** The sqlite3PagerGet() call may cause the journal to sync. So make | ||
5033 | ** sure the Pager.needSync flag is set too. | ||
5034 | */ | ||
5035 | int rc; | ||
5036 | PgHdr *pPgHdr; | ||
5037 | assert( pPager->needSync ); | ||
5038 | rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr); | ||
5039 | if( rc!=SQLITE_OK ) return rc; | ||
5040 | pPager->needSync = 1; | ||
5041 | pPgHdr->needSync = 1; | ||
5042 | pPgHdr->inJournal = 1; | ||
5043 | makeDirty(pPgHdr); | ||
5044 | sqlite3PagerUnref(pPgHdr); | ||
5045 | } | ||
5046 | |||
5047 | pagerLeave(pPager); | ||
5048 | return SQLITE_OK; | ||
5049 | } | ||
5050 | #endif | ||
5051 | |||
5052 | /* | ||
5053 | ** Return a pointer to the data for the specified page. | ||
5054 | */ | ||
5055 | void *sqlite3PagerGetData(DbPage *pPg){ | ||
5056 | return PGHDR_TO_DATA(pPg); | ||
5057 | } | ||
5058 | |||
5059 | /* | ||
5060 | ** Return a pointer to the Pager.nExtra bytes of "extra" space | ||
5061 | ** allocated along with the specified page. | ||
5062 | */ | ||
5063 | void *sqlite3PagerGetExtra(DbPage *pPg){ | ||
5064 | Pager *pPager = pPg->pPager; | ||
5065 | return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0); | ||
5066 | } | ||
5067 | |||
5068 | /* | ||
5069 | ** Get/set the locking-mode for this pager. Parameter eMode must be one | ||
5070 | ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or | ||
5071 | ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then | ||
5072 | ** the locking-mode is set to the value specified. | ||
5073 | ** | ||
5074 | ** The returned value is either PAGER_LOCKINGMODE_NORMAL or | ||
5075 | ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) | ||
5076 | ** locking-mode. | ||
5077 | */ | ||
5078 | int sqlite3PagerLockingMode(Pager *pPager, int eMode){ | ||
5079 | assert( eMode==PAGER_LOCKINGMODE_QUERY | ||
5080 | || eMode==PAGER_LOCKINGMODE_NORMAL | ||
5081 | || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); | ||
5082 | assert( PAGER_LOCKINGMODE_QUERY<0 ); | ||
5083 | assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); | ||
5084 | if( eMode>=0 && !pPager->tempFile ){ | ||
5085 | pPager->exclusiveMode = eMode; | ||
5086 | } | ||
5087 | return (int)pPager->exclusiveMode; | ||
5088 | } | ||
5089 | |||
5090 | #ifdef SQLITE_DEBUG | ||
5091 | /* | ||
5092 | ** Print a listing of all referenced pages and their ref count. | ||
5093 | */ | ||
5094 | void sqlite3PagerRefdump(Pager *pPager){ | ||
5095 | PgHdr *pPg; | ||
5096 | for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ | ||
5097 | if( pPg->nRef<=0 ) continue; | ||
5098 | sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n", | ||
5099 | pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef); | ||
5100 | } | ||
5101 | } | ||
5102 | #endif | ||
5103 | |||
5104 | #endif /* SQLITE_OMIT_DISKIO */ | ||