diff options
Diffstat (limited to 'libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl')
-rw-r--r-- | libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl | 859 |
1 files changed, 859 insertions, 0 deletions
diff --git a/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl b/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl new file mode 100644 index 0000000..d70b442 --- /dev/null +++ b/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl | |||
@@ -0,0 +1,859 @@ | |||
1 | # Run this TCL script using "testfixture" in order get a report that shows | ||
2 | # how much disk space is used by a particular data to actually store data | ||
3 | # versus how much space is unused. | ||
4 | # | ||
5 | |||
6 | if {[catch { | ||
7 | |||
8 | # Get the name of the database to analyze | ||
9 | # | ||
10 | #set argv $argv0 | ||
11 | if {[llength $argv]!=1} { | ||
12 | puts stderr "Usage: $argv0 database-name" | ||
13 | exit 1 | ||
14 | } | ||
15 | set file_to_analyze [lindex $argv 0] | ||
16 | if {![file exists $file_to_analyze]} { | ||
17 | puts stderr "No such file: $file_to_analyze" | ||
18 | exit 1 | ||
19 | } | ||
20 | if {![file readable $file_to_analyze]} { | ||
21 | puts stderr "File is not readable: $file_to_analyze" | ||
22 | exit 1 | ||
23 | } | ||
24 | if {[file size $file_to_analyze]<512} { | ||
25 | puts stderr "Empty or malformed database: $file_to_analyze" | ||
26 | exit 1 | ||
27 | } | ||
28 | |||
29 | # Maximum distance between pages before we consider it a "gap" | ||
30 | # | ||
31 | set MAXGAP 3 | ||
32 | |||
33 | # Open the database | ||
34 | # | ||
35 | sqlite3 db [lindex $argv 0] | ||
36 | set DB [btree_open [lindex $argv 0] 1000 0] | ||
37 | |||
38 | # In-memory database for collecting statistics. This script loops through | ||
39 | # the tables and indices in the database being analyzed, adding a row for each | ||
40 | # to an in-memory database (for which the schema is shown below). It then | ||
41 | # queries the in-memory db to produce the space-analysis report. | ||
42 | # | ||
43 | sqlite3 mem :memory: | ||
44 | set tabledef\ | ||
45 | {CREATE TABLE space_used( | ||
46 | name clob, -- Name of a table or index in the database file | ||
47 | tblname clob, -- Name of associated table | ||
48 | is_index boolean, -- TRUE if it is an index, false for a table | ||
49 | nentry int, -- Number of entries in the BTree | ||
50 | leaf_entries int, -- Number of leaf entries | ||
51 | payload int, -- Total amount of data stored in this table or index | ||
52 | ovfl_payload int, -- Total amount of data stored on overflow pages | ||
53 | ovfl_cnt int, -- Number of entries that use overflow | ||
54 | mx_payload int, -- Maximum payload size | ||
55 | int_pages int, -- Number of interior pages used | ||
56 | leaf_pages int, -- Number of leaf pages used | ||
57 | ovfl_pages int, -- Number of overflow pages used | ||
58 | int_unused int, -- Number of unused bytes on interior pages | ||
59 | leaf_unused int, -- Number of unused bytes on primary pages | ||
60 | ovfl_unused int, -- Number of unused bytes on overflow pages | ||
61 | gap_cnt int -- Number of gaps in the page layout | ||
62 | );} | ||
63 | mem eval $tabledef | ||
64 | |||
65 | proc integerify {real} { | ||
66 | return [expr int($real)] | ||
67 | } | ||
68 | mem function int integerify | ||
69 | |||
70 | # Quote a string for use in an SQL query. Examples: | ||
71 | # | ||
72 | # [quote {hello world}] == {'hello world'} | ||
73 | # [quote {hello world's}] == {'hello world''s'} | ||
74 | # | ||
75 | proc quote {txt} { | ||
76 | regsub -all ' $txt '' q | ||
77 | return '$q' | ||
78 | } | ||
79 | |||
80 | # This proc is a wrapper around the btree_cursor_info command. The | ||
81 | # second argument is an open btree cursor returned by [btree_cursor]. | ||
82 | # The first argument is the name of an array variable that exists in | ||
83 | # the scope of the caller. If the third argument is non-zero, then | ||
84 | # info is returned for the page that lies $up entries upwards in the | ||
85 | # tree-structure. (i.e. $up==1 returns the parent page, $up==2 the | ||
86 | # grandparent etc.) | ||
87 | # | ||
88 | # The following entries in that array are filled in with information retrieved | ||
89 | # using [btree_cursor_info]: | ||
90 | # | ||
91 | # $arrayvar(page_no) = The page number | ||
92 | # $arrayvar(entry_no) = The entry number | ||
93 | # $arrayvar(page_entries) = Total number of entries on this page | ||
94 | # $arrayvar(cell_size) = Cell size (local payload + header) | ||
95 | # $arrayvar(page_freebytes) = Number of free bytes on this page | ||
96 | # $arrayvar(page_freeblocks) = Number of free blocks on the page | ||
97 | # $arrayvar(payload_bytes) = Total payload size (local + overflow) | ||
98 | # $arrayvar(header_bytes) = Header size in bytes | ||
99 | # $arrayvar(local_payload_bytes) = Local payload size | ||
100 | # $arrayvar(parent) = Parent page number | ||
101 | # | ||
102 | proc cursor_info {arrayvar csr {up 0}} { | ||
103 | upvar $arrayvar a | ||
104 | foreach [list a(page_no) \ | ||
105 | a(entry_no) \ | ||
106 | a(page_entries) \ | ||
107 | a(cell_size) \ | ||
108 | a(page_freebytes) \ | ||
109 | a(page_freeblocks) \ | ||
110 | a(payload_bytes) \ | ||
111 | a(header_bytes) \ | ||
112 | a(local_payload_bytes) \ | ||
113 | a(parent) \ | ||
114 | a(first_ovfl) ] [btree_cursor_info $csr $up] break | ||
115 | } | ||
116 | |||
117 | # Determine the page-size of the database. This global variable is used | ||
118 | # throughout the script. | ||
119 | # | ||
120 | set pageSize [db eval {PRAGMA page_size}] | ||
121 | |||
122 | # Analyze every table in the database, one at a time. | ||
123 | # | ||
124 | # The following query returns the name and root-page of each table in the | ||
125 | # database, including the sqlite_master table. | ||
126 | # | ||
127 | set sql { | ||
128 | SELECT name, rootpage FROM sqlite_master | ||
129 | WHERE type='table' AND rootpage>0 | ||
130 | UNION ALL | ||
131 | SELECT 'sqlite_master', 1 | ||
132 | ORDER BY 1 | ||
133 | } | ||
134 | set wideZero [expr {10000000000 - 10000000000}] | ||
135 | foreach {name rootpage} [db eval $sql] { | ||
136 | puts stderr "Analyzing table $name..." | ||
137 | |||
138 | # Code below traverses the table being analyzed (table name $name), using the | ||
139 | # btree cursor $cursor. Statistics related to table $name are accumulated in | ||
140 | # the following variables: | ||
141 | # | ||
142 | set total_payload $wideZero ;# Payload space used by all entries | ||
143 | set total_ovfl $wideZero ;# Payload space on overflow pages | ||
144 | set unused_int $wideZero ;# Unused space on interior nodes | ||
145 | set unused_leaf $wideZero ;# Unused space on leaf nodes | ||
146 | set unused_ovfl $wideZero ;# Unused space on overflow pages | ||
147 | set cnt_ovfl $wideZero ;# Number of entries that use overflows | ||
148 | set cnt_leaf_entry $wideZero ;# Number of leaf entries | ||
149 | set cnt_int_entry $wideZero ;# Number of interor entries | ||
150 | set mx_payload $wideZero ;# Maximum payload size | ||
151 | set ovfl_pages $wideZero ;# Number of overflow pages used | ||
152 | set leaf_pages $wideZero ;# Number of leaf pages | ||
153 | set int_pages $wideZero ;# Number of interior pages | ||
154 | set gap_cnt 0 ;# Number of holes in the page sequence | ||
155 | set prev_pgno 0 ;# Last page number seen | ||
156 | |||
157 | # As the btree is traversed, the array variable $seen($pgno) is set to 1 | ||
158 | # the first time page $pgno is encountered. | ||
159 | # | ||
160 | catch {unset seen} | ||
161 | |||
162 | # The following loop runs once for each entry in table $name. The table | ||
163 | # is traversed using the btree cursor stored in variable $csr | ||
164 | # | ||
165 | set csr [btree_cursor $DB $rootpage 0] | ||
166 | for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} { | ||
167 | incr cnt_leaf_entry | ||
168 | |||
169 | # Retrieve information about the entry the btree-cursor points to into | ||
170 | # the array variable $ci (cursor info). | ||
171 | # | ||
172 | cursor_info ci $csr | ||
173 | |||
174 | # Check if the payload of this entry is greater than the current | ||
175 | # $mx_payload statistic for the table. Also increase the $total_payload | ||
176 | # statistic. | ||
177 | # | ||
178 | if {$ci(payload_bytes)>$mx_payload} {set mx_payload $ci(payload_bytes)} | ||
179 | incr total_payload $ci(payload_bytes) | ||
180 | |||
181 | # If this entry uses overflow pages, then update the $cnt_ovfl, | ||
182 | # $total_ovfl, $ovfl_pages and $unused_ovfl statistics. | ||
183 | # | ||
184 | set ovfl [expr {$ci(payload_bytes)-$ci(local_payload_bytes)}] | ||
185 | if {$ovfl} { | ||
186 | incr cnt_ovfl | ||
187 | incr total_ovfl $ovfl | ||
188 | set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] | ||
189 | incr ovfl_pages $n | ||
190 | incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] | ||
191 | set pglist [btree_ovfl_info $DB $csr] | ||
192 | } else { | ||
193 | set pglist {} | ||
194 | } | ||
195 | |||
196 | # If this is the first table entry analyzed for the page, then update | ||
197 | # the page-related statistics $leaf_pages and $unused_leaf. Also, if | ||
198 | # this page has a parent page that has not been analyzed, retrieve | ||
199 | # info for the parent and update statistics for it too. | ||
200 | # | ||
201 | if {![info exists seen($ci(page_no))]} { | ||
202 | set seen($ci(page_no)) 1 | ||
203 | incr leaf_pages | ||
204 | incr unused_leaf $ci(page_freebytes) | ||
205 | set pglist "$ci(page_no) $pglist" | ||
206 | |||
207 | # Now check if the page has a parent that has not been analyzed. If | ||
208 | # so, update the $int_pages, $cnt_int_entry and $unused_int statistics | ||
209 | # accordingly. Then check if the parent page has a parent that has | ||
210 | # not yet been analyzed etc. | ||
211 | # | ||
212 | # set parent $ci(parent_page_no) | ||
213 | for {set up 1} \ | ||
214 | {$ci(parent)!=0 && ![info exists seen($ci(parent))]} {incr up} \ | ||
215 | { | ||
216 | # Mark the parent as seen. | ||
217 | # | ||
218 | set seen($ci(parent)) 1 | ||
219 | |||
220 | # Retrieve info for the parent and update statistics. | ||
221 | cursor_info ci $csr $up | ||
222 | incr int_pages | ||
223 | incr cnt_int_entry $ci(page_entries) | ||
224 | incr unused_int $ci(page_freebytes) | ||
225 | |||
226 | # parent pages come before their first child | ||
227 | set pglist "$ci(page_no) $pglist" | ||
228 | } | ||
229 | } | ||
230 | |||
231 | # Check the page list for fragmentation | ||
232 | # | ||
233 | foreach pg $pglist { | ||
234 | if {$pg!=$prev_pgno+1 && $prev_pgno>0} { | ||
235 | incr gap_cnt | ||
236 | } | ||
237 | set prev_pgno $pg | ||
238 | } | ||
239 | } | ||
240 | btree_close_cursor $csr | ||
241 | |||
242 | # Handle the special case where a table contains no data. In this case | ||
243 | # all statistics are zero, except for the number of leaf pages (1) and | ||
244 | # the unused bytes on leaf pages ($pageSize - 8). | ||
245 | # | ||
246 | # An exception to the above is the sqlite_master table. If it is empty | ||
247 | # then all statistics are zero except for the number of leaf pages (1), | ||
248 | # and the number of unused bytes on leaf pages ($pageSize - 112). | ||
249 | # | ||
250 | if {[llength [array names seen]]==0} { | ||
251 | set leaf_pages 1 | ||
252 | if {$rootpage==1} { | ||
253 | set unused_leaf [expr {$pageSize-112}] | ||
254 | } else { | ||
255 | set unused_leaf [expr {$pageSize-8}] | ||
256 | } | ||
257 | } | ||
258 | |||
259 | # Insert the statistics for the table analyzed into the in-memory database. | ||
260 | # | ||
261 | set sql "INSERT INTO space_used VALUES(" | ||
262 | append sql [quote $name] | ||
263 | append sql ",[quote $name]" | ||
264 | append sql ",0" | ||
265 | append sql ",[expr {$cnt_leaf_entry+$cnt_int_entry}]" | ||
266 | append sql ",$cnt_leaf_entry" | ||
267 | append sql ",$total_payload" | ||
268 | append sql ",$total_ovfl" | ||
269 | append sql ",$cnt_ovfl" | ||
270 | append sql ",$mx_payload" | ||
271 | append sql ",$int_pages" | ||
272 | append sql ",$leaf_pages" | ||
273 | append sql ",$ovfl_pages" | ||
274 | append sql ",$unused_int" | ||
275 | append sql ",$unused_leaf" | ||
276 | append sql ",$unused_ovfl" | ||
277 | append sql ",$gap_cnt" | ||
278 | append sql ); | ||
279 | mem eval $sql | ||
280 | } | ||
281 | |||
282 | # Analyze every index in the database, one at a time. | ||
283 | # | ||
284 | # The query below returns the name, associated table and root-page number | ||
285 | # for every index in the database. | ||
286 | # | ||
287 | set sql { | ||
288 | SELECT name, tbl_name, rootpage FROM sqlite_master WHERE type='index' | ||
289 | ORDER BY 2, 1 | ||
290 | } | ||
291 | foreach {name tbl_name rootpage} [db eval $sql] { | ||
292 | puts stderr "Analyzing index $name of table $tbl_name..." | ||
293 | |||
294 | # Code below traverses the index being analyzed (index name $name), using the | ||
295 | # btree cursor $cursor. Statistics related to index $name are accumulated in | ||
296 | # the following variables: | ||
297 | # | ||
298 | set total_payload $wideZero ;# Payload space used by all entries | ||
299 | set total_ovfl $wideZero ;# Payload space on overflow pages | ||
300 | set unused_leaf $wideZero ;# Unused space on leaf nodes | ||
301 | set unused_ovfl $wideZero ;# Unused space on overflow pages | ||
302 | set cnt_ovfl $wideZero ;# Number of entries that use overflows | ||
303 | set cnt_leaf_entry $wideZero ;# Number of leaf entries | ||
304 | set mx_payload $wideZero ;# Maximum payload size | ||
305 | set ovfl_pages $wideZero ;# Number of overflow pages used | ||
306 | set leaf_pages $wideZero ;# Number of leaf pages | ||
307 | set gap_cnt 0 ;# Number of holes in the page sequence | ||
308 | set prev_pgno 0 ;# Last page number seen | ||
309 | |||
310 | # As the btree is traversed, the array variable $seen($pgno) is set to 1 | ||
311 | # the first time page $pgno is encountered. | ||
312 | # | ||
313 | catch {unset seen} | ||
314 | |||
315 | # The following loop runs once for each entry in index $name. The index | ||
316 | # is traversed using the btree cursor stored in variable $csr | ||
317 | # | ||
318 | set csr [btree_cursor $DB $rootpage 0] | ||
319 | for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} { | ||
320 | incr cnt_leaf_entry | ||
321 | |||
322 | # Retrieve information about the entry the btree-cursor points to into | ||
323 | # the array variable $ci (cursor info). | ||
324 | # | ||
325 | cursor_info ci $csr | ||
326 | |||
327 | # Check if the payload of this entry is greater than the current | ||
328 | # $mx_payload statistic for the table. Also increase the $total_payload | ||
329 | # statistic. | ||
330 | # | ||
331 | set payload [btree_keysize $csr] | ||
332 | if {$payload>$mx_payload} {set mx_payload $payload} | ||
333 | incr total_payload $payload | ||
334 | |||
335 | # If this entry uses overflow pages, then update the $cnt_ovfl, | ||
336 | # $total_ovfl, $ovfl_pages and $unused_ovfl statistics. | ||
337 | # | ||
338 | set ovfl [expr {$payload-$ci(local_payload_bytes)}] | ||
339 | if {$ovfl} { | ||
340 | incr cnt_ovfl | ||
341 | incr total_ovfl $ovfl | ||
342 | set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] | ||
343 | incr ovfl_pages $n | ||
344 | incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] | ||
345 | } | ||
346 | |||
347 | # If this is the first table entry analyzed for the page, then update | ||
348 | # the page-related statistics $leaf_pages and $unused_leaf. | ||
349 | # | ||
350 | if {![info exists seen($ci(page_no))]} { | ||
351 | set seen($ci(page_no)) 1 | ||
352 | incr leaf_pages | ||
353 | incr unused_leaf $ci(page_freebytes) | ||
354 | set pg $ci(page_no) | ||
355 | if {$prev_pgno>0 && $pg!=$prev_pgno+1} { | ||
356 | incr gap_cnt | ||
357 | } | ||
358 | set prev_pgno $ci(page_no) | ||
359 | } | ||
360 | } | ||
361 | btree_close_cursor $csr | ||
362 | |||
363 | # Handle the special case where a index contains no data. In this case | ||
364 | # all statistics are zero, except for the number of leaf pages (1) and | ||
365 | # the unused bytes on leaf pages ($pageSize - 8). | ||
366 | # | ||
367 | if {[llength [array names seen]]==0} { | ||
368 | set leaf_pages 1 | ||
369 | set unused_leaf [expr {$pageSize-8}] | ||
370 | } | ||
371 | |||
372 | # Insert the statistics for the index analyzed into the in-memory database. | ||
373 | # | ||
374 | set sql "INSERT INTO space_used VALUES(" | ||
375 | append sql [quote $name] | ||
376 | append sql ",[quote $tbl_name]" | ||
377 | append sql ",1" | ||
378 | append sql ",$cnt_leaf_entry" | ||
379 | append sql ",$cnt_leaf_entry" | ||
380 | append sql ",$total_payload" | ||
381 | append sql ",$total_ovfl" | ||
382 | append sql ",$cnt_ovfl" | ||
383 | append sql ",$mx_payload" | ||
384 | append sql ",0" | ||
385 | append sql ",$leaf_pages" | ||
386 | append sql ",$ovfl_pages" | ||
387 | append sql ",0" | ||
388 | append sql ",$unused_leaf" | ||
389 | append sql ",$unused_ovfl" | ||
390 | append sql ",$gap_cnt" | ||
391 | append sql ); | ||
392 | mem eval $sql | ||
393 | } | ||
394 | |||
395 | # Generate a single line of output in the statistics section of the | ||
396 | # report. | ||
397 | # | ||
398 | proc statline {title value {extra {}}} { | ||
399 | set len [string length $title] | ||
400 | set dots [string range {......................................} $len end] | ||
401 | set len [string length $value] | ||
402 | set sp2 [string range { } $len end] | ||
403 | if {$extra ne ""} { | ||
404 | set extra " $extra" | ||
405 | } | ||
406 | puts "$title$dots $value$sp2$extra" | ||
407 | } | ||
408 | |||
409 | # Generate a formatted percentage value for $num/$denom | ||
410 | # | ||
411 | proc percent {num denom {of {}}} { | ||
412 | if {$denom==0.0} {return ""} | ||
413 | set v [expr {$num*100.0/$denom}] | ||
414 | set of {} | ||
415 | if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { | ||
416 | return [format {%5.1f%% %s} $v $of] | ||
417 | } elseif {$v<0.1 || $v>99.9} { | ||
418 | return [format {%7.3f%% %s} $v $of] | ||
419 | } else { | ||
420 | return [format {%6.2f%% %s} $v $of] | ||
421 | } | ||
422 | } | ||
423 | |||
424 | proc divide {num denom} { | ||
425 | if {$denom==0} {return 0.0} | ||
426 | return [format %.2f [expr double($num)/double($denom)]] | ||
427 | } | ||
428 | |||
429 | # Generate a subreport that covers some subset of the database. | ||
430 | # the $where clause determines which subset to analyze. | ||
431 | # | ||
432 | proc subreport {title where} { | ||
433 | global pageSize file_pgcnt | ||
434 | |||
435 | # Query the in-memory database for the sum of various statistics | ||
436 | # for the subset of tables/indices identified by the WHERE clause in | ||
437 | # $where. Note that even if the WHERE clause matches no rows, the | ||
438 | # following query returns exactly one row (because it is an aggregate). | ||
439 | # | ||
440 | # The results of the query are stored directly by SQLite into local | ||
441 | # variables (i.e. $nentry, $nleaf etc.). | ||
442 | # | ||
443 | mem eval " | ||
444 | SELECT | ||
445 | int(sum(nentry)) AS nentry, | ||
446 | int(sum(leaf_entries)) AS nleaf, | ||
447 | int(sum(payload)) AS payload, | ||
448 | int(sum(ovfl_payload)) AS ovfl_payload, | ||
449 | max(mx_payload) AS mx_payload, | ||
450 | int(sum(ovfl_cnt)) as ovfl_cnt, | ||
451 | int(sum(leaf_pages)) AS leaf_pages, | ||
452 | int(sum(int_pages)) AS int_pages, | ||
453 | int(sum(ovfl_pages)) AS ovfl_pages, | ||
454 | int(sum(leaf_unused)) AS leaf_unused, | ||
455 | int(sum(int_unused)) AS int_unused, | ||
456 | int(sum(ovfl_unused)) AS ovfl_unused, | ||
457 | int(sum(gap_cnt)) AS gap_cnt | ||
458 | FROM space_used WHERE $where" {} {} | ||
459 | |||
460 | # Output the sub-report title, nicely decorated with * characters. | ||
461 | # | ||
462 | puts "" | ||
463 | set len [string length $title] | ||
464 | set stars [string repeat * [expr 65-$len]] | ||
465 | puts "*** $title $stars" | ||
466 | puts "" | ||
467 | |||
468 | # Calculate statistics and store the results in TCL variables, as follows: | ||
469 | # | ||
470 | # total_pages: Database pages consumed. | ||
471 | # total_pages_percent: Pages consumed as a percentage of the file. | ||
472 | # storage: Bytes consumed. | ||
473 | # payload_percent: Payload bytes used as a percentage of $storage. | ||
474 | # total_unused: Unused bytes on pages. | ||
475 | # avg_payload: Average payload per btree entry. | ||
476 | # avg_fanout: Average fanout for internal pages. | ||
477 | # avg_unused: Average unused bytes per btree entry. | ||
478 | # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. | ||
479 | # | ||
480 | set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] | ||
481 | set total_pages_percent [percent $total_pages $file_pgcnt] | ||
482 | set storage [expr {$total_pages*$pageSize}] | ||
483 | set payload_percent [percent $payload $storage {of storage consumed}] | ||
484 | set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] | ||
485 | set avg_payload [divide $payload $nleaf] | ||
486 | set avg_unused [divide $total_unused $nleaf] | ||
487 | if {$int_pages>0} { | ||
488 | # TODO: Is this formula correct? | ||
489 | set nTab [mem eval " | ||
490 | SELECT count(*) FROM ( | ||
491 | SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 | ||
492 | ) | ||
493 | "] | ||
494 | set avg_fanout [mem eval " | ||
495 | SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used | ||
496 | WHERE $where AND is_index = 0 | ||
497 | "] | ||
498 | set avg_fanout [format %.2f $avg_fanout] | ||
499 | } | ||
500 | set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] | ||
501 | |||
502 | # Print out the sub-report statistics. | ||
503 | # | ||
504 | statline {Percentage of total database} $total_pages_percent | ||
505 | statline {Number of entries} $nleaf | ||
506 | statline {Bytes of storage consumed} $storage | ||
507 | statline {Bytes of payload} $payload $payload_percent | ||
508 | statline {Average payload per entry} $avg_payload | ||
509 | statline {Average unused bytes per entry} $avg_unused | ||
510 | if {[info exists avg_fanout]} { | ||
511 | statline {Average fanout} $avg_fanout | ||
512 | } | ||
513 | if {$total_pages>1} { | ||
514 | set fragmentation [percent $gap_cnt [expr {$total_pages-1}] {fragmentation}] | ||
515 | statline {Fragmentation} $fragmentation | ||
516 | } | ||
517 | statline {Maximum payload per entry} $mx_payload | ||
518 | statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent | ||
519 | if {$int_pages>0} { | ||
520 | statline {Index pages used} $int_pages | ||
521 | } | ||
522 | statline {Primary pages used} $leaf_pages | ||
523 | statline {Overflow pages used} $ovfl_pages | ||
524 | statline {Total pages used} $total_pages | ||
525 | if {$int_unused>0} { | ||
526 | set int_unused_percent \ | ||
527 | [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] | ||
528 | statline "Unused bytes on index pages" $int_unused $int_unused_percent | ||
529 | } | ||
530 | statline "Unused bytes on primary pages" $leaf_unused \ | ||
531 | [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] | ||
532 | statline "Unused bytes on overflow pages" $ovfl_unused \ | ||
533 | [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] | ||
534 | statline "Unused bytes on all pages" $total_unused \ | ||
535 | [percent $total_unused $storage {of all space}] | ||
536 | return 1 | ||
537 | } | ||
538 | |||
539 | # Calculate the overhead in pages caused by auto-vacuum. | ||
540 | # | ||
541 | # This procedure calculates and returns the number of pages used by the | ||
542 | # auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, | ||
543 | # then 0 is returned. The two arguments are the size of the database file in | ||
544 | # pages and the page size used by the database (in bytes). | ||
545 | proc autovacuum_overhead {filePages pageSize} { | ||
546 | |||
547 | # Read the value of meta 4. If non-zero, then the database supports | ||
548 | # auto-vacuum. It would be possible to use "PRAGMA auto_vacuum" instead, | ||
549 | # but that would not work if the SQLITE_OMIT_PRAGMA macro was defined | ||
550 | # when the library was built. | ||
551 | set meta4 [lindex [btree_get_meta $::DB] 4] | ||
552 | |||
553 | # If the database is not an auto-vacuum database or the file consists | ||
554 | # of one page only then there is no overhead for auto-vacuum. Return zero. | ||
555 | if {0==$meta4 || $filePages==1} { | ||
556 | return 0 | ||
557 | } | ||
558 | |||
559 | # The number of entries on each pointer map page. The layout of the | ||
560 | # database file is one pointer-map page, followed by $ptrsPerPage other | ||
561 | # pages, followed by a pointer-map page etc. The first pointer-map page | ||
562 | # is the second page of the file overall. | ||
563 | set ptrsPerPage [expr double($pageSize/5)] | ||
564 | |||
565 | # Return the number of pointer map pages in the database. | ||
566 | return [expr int(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] | ||
567 | } | ||
568 | |||
569 | |||
570 | # Calculate the summary statistics for the database and store the results | ||
571 | # in TCL variables. They are output below. Variables are as follows: | ||
572 | # | ||
573 | # pageSize: Size of each page in bytes. | ||
574 | # file_bytes: File size in bytes. | ||
575 | # file_pgcnt: Number of pages in the file. | ||
576 | # file_pgcnt2: Number of pages in the file (calculated). | ||
577 | # av_pgcnt: Pages consumed by the auto-vacuum pointer-map. | ||
578 | # av_percent: Percentage of the file consumed by auto-vacuum pointer-map. | ||
579 | # inuse_pgcnt: Data pages in the file. | ||
580 | # inuse_percent: Percentage of pages used to store data. | ||
581 | # free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>) | ||
582 | # free_pgcnt2: Free pages in the file according to the file header. | ||
583 | # free_percent: Percentage of file consumed by free pages (calculated). | ||
584 | # free_percent2: Percentage of file consumed by free pages (header). | ||
585 | # ntable: Number of tables in the db. | ||
586 | # nindex: Number of indices in the db. | ||
587 | # nautoindex: Number of indices created automatically. | ||
588 | # nmanindex: Number of indices created manually. | ||
589 | # user_payload: Number of bytes of payload in table btrees | ||
590 | # (not including sqlite_master) | ||
591 | # user_percent: $user_payload as a percentage of total file size. | ||
592 | |||
593 | set file_bytes [file size $file_to_analyze] | ||
594 | set file_pgcnt [expr {$file_bytes/$pageSize}] | ||
595 | |||
596 | set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] | ||
597 | set av_percent [percent $av_pgcnt $file_pgcnt] | ||
598 | |||
599 | set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} | ||
600 | set inuse_pgcnt [expr int([mem eval $sql])] | ||
601 | set inuse_percent [percent $inuse_pgcnt $file_pgcnt] | ||
602 | |||
603 | set free_pgcnt [expr $file_pgcnt-$inuse_pgcnt-$av_pgcnt] | ||
604 | set free_percent [percent $free_pgcnt $file_pgcnt] | ||
605 | set free_pgcnt2 [lindex [btree_get_meta $DB] 0] | ||
606 | set free_percent2 [percent $free_pgcnt2 $file_pgcnt] | ||
607 | |||
608 | set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] | ||
609 | |||
610 | set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] | ||
611 | set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] | ||
612 | set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'} | ||
613 | set nautoindex [db eval $sql] | ||
614 | set nmanindex [expr {$nindex-$nautoindex}] | ||
615 | |||
616 | # set total_payload [mem eval "SELECT sum(payload) FROM space_used"] | ||
617 | set user_payload [mem one {SELECT int(sum(payload)) FROM space_used | ||
618 | WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}] | ||
619 | set user_percent [percent $user_payload $file_bytes] | ||
620 | |||
621 | # Output the summary statistics calculated above. | ||
622 | # | ||
623 | puts "/** Disk-Space Utilization Report For $file_to_analyze" | ||
624 | catch { | ||
625 | puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]" | ||
626 | } | ||
627 | puts "" | ||
628 | statline {Page size in bytes} $pageSize | ||
629 | statline {Pages in the whole file (measured)} $file_pgcnt | ||
630 | statline {Pages in the whole file (calculated)} $file_pgcnt2 | ||
631 | statline {Pages that store data} $inuse_pgcnt $inuse_percent | ||
632 | statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 | ||
633 | statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent | ||
634 | statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent | ||
635 | statline {Number of tables in the database} $ntable | ||
636 | statline {Number of indices} $nindex | ||
637 | statline {Number of named indices} $nmanindex | ||
638 | statline {Automatically generated indices} $nautoindex | ||
639 | statline {Size of the file in bytes} $file_bytes | ||
640 | statline {Bytes of user payload stored} $user_payload $user_percent | ||
641 | |||
642 | # Output table rankings | ||
643 | # | ||
644 | puts "" | ||
645 | puts "*** Page counts for all tables with their indices ********************" | ||
646 | puts "" | ||
647 | mem eval {SELECT tblname, count(*) AS cnt, | ||
648 | int(sum(int_pages+leaf_pages+ovfl_pages)) AS size | ||
649 | FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { | ||
650 | statline [string toupper $tblname] $size [percent $size $file_pgcnt] | ||
651 | } | ||
652 | |||
653 | # Output subreports | ||
654 | # | ||
655 | if {$nindex>0} { | ||
656 | subreport {All tables and indices} 1 | ||
657 | } | ||
658 | subreport {All tables} {NOT is_index} | ||
659 | if {$nindex>0} { | ||
660 | subreport {All indices} {is_index} | ||
661 | } | ||
662 | foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index | ||
663 | ORDER BY name}] { | ||
664 | regsub ' $tbl '' qn | ||
665 | set name [string toupper $tbl] | ||
666 | set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"] | ||
667 | if {$n>1} { | ||
668 | subreport "Table $name and all its indices" "tblname='$qn'" | ||
669 | subreport "Table $name w/o any indices" "name='$qn'" | ||
670 | subreport "Indices of table $name" "tblname='$qn' AND is_index" | ||
671 | } else { | ||
672 | subreport "Table $name" "name='$qn'" | ||
673 | } | ||
674 | } | ||
675 | |||
676 | # Output instructions on what the numbers above mean. | ||
677 | # | ||
678 | puts { | ||
679 | *** Definitions ****************************************************** | ||
680 | |||
681 | Page size in bytes | ||
682 | |||
683 | The number of bytes in a single page of the database file. | ||
684 | Usually 1024. | ||
685 | |||
686 | Number of pages in the whole file | ||
687 | } | ||
688 | puts \ | ||
689 | " The number of $pageSize-byte pages that go into forming the complete | ||
690 | database" | ||
691 | puts \ | ||
692 | { | ||
693 | Pages that store data | ||
694 | |||
695 | The number of pages that store data, either as primary B*Tree pages or | ||
696 | as overflow pages. The number at the right is the data pages divided by | ||
697 | the total number of pages in the file. | ||
698 | |||
699 | Pages on the freelist | ||
700 | |||
701 | The number of pages that are not currently in use but are reserved for | ||
702 | future use. The percentage at the right is the number of freelist pages | ||
703 | divided by the total number of pages in the file. | ||
704 | |||
705 | Pages of auto-vacuum overhead | ||
706 | |||
707 | The number of pages that store data used by the database to facilitate | ||
708 | auto-vacuum. This is zero for databases that do not support auto-vacuum. | ||
709 | |||
710 | Number of tables in the database | ||
711 | |||
712 | The number of tables in the database, including the SQLITE_MASTER table | ||
713 | used to store schema information. | ||
714 | |||
715 | Number of indices | ||
716 | |||
717 | The total number of indices in the database. | ||
718 | |||
719 | Number of named indices | ||
720 | |||
721 | The number of indices created using an explicit CREATE INDEX statement. | ||
722 | |||
723 | Automatically generated indices | ||
724 | |||
725 | The number of indices used to implement PRIMARY KEY or UNIQUE constraints | ||
726 | on tables. | ||
727 | |||
728 | Size of the file in bytes | ||
729 | |||
730 | The total amount of disk space used by the entire database files. | ||
731 | |||
732 | Bytes of user payload stored | ||
733 | |||
734 | The total number of bytes of user payload stored in the database. The | ||
735 | schema information in the SQLITE_MASTER table is not counted when | ||
736 | computing this number. The percentage at the right shows the payload | ||
737 | divided by the total file size. | ||
738 | |||
739 | Percentage of total database | ||
740 | |||
741 | The amount of the complete database file that is devoted to storing | ||
742 | information described by this category. | ||
743 | |||
744 | Number of entries | ||
745 | |||
746 | The total number of B-Tree key/value pairs stored under this category. | ||
747 | |||
748 | Bytes of storage consumed | ||
749 | |||
750 | The total amount of disk space required to store all B-Tree entries | ||
751 | under this category. The is the total number of pages used times | ||
752 | the pages size. | ||
753 | |||
754 | Bytes of payload | ||
755 | |||
756 | The amount of payload stored under this category. Payload is the data | ||
757 | part of table entries and the key part of index entries. The percentage | ||
758 | at the right is the bytes of payload divided by the bytes of storage | ||
759 | consumed. | ||
760 | |||
761 | Average payload per entry | ||
762 | |||
763 | The average amount of payload on each entry. This is just the bytes of | ||
764 | payload divided by the number of entries. | ||
765 | |||
766 | Average unused bytes per entry | ||
767 | |||
768 | The average amount of free space remaining on all pages under this | ||
769 | category on a per-entry basis. This is the number of unused bytes on | ||
770 | all pages divided by the number of entries. | ||
771 | |||
772 | Fragmentation | ||
773 | |||
774 | The percentage of pages in the table or index that are not | ||
775 | consecutive in the disk file. Many filesystems are optimized | ||
776 | for sequential file access so smaller fragmentation numbers | ||
777 | sometimes result in faster queries, especially for larger | ||
778 | database files that do not fit in the disk cache. | ||
779 | |||
780 | Maximum payload per entry | ||
781 | |||
782 | The largest payload size of any entry. | ||
783 | |||
784 | Entries that use overflow | ||
785 | |||
786 | The number of entries that user one or more overflow pages. | ||
787 | |||
788 | Total pages used | ||
789 | |||
790 | This is the number of pages used to hold all information in the current | ||
791 | category. This is the sum of index, primary, and overflow pages. | ||
792 | |||
793 | Index pages used | ||
794 | |||
795 | This is the number of pages in a table B-tree that hold only key (rowid) | ||
796 | information and no data. | ||
797 | |||
798 | Primary pages used | ||
799 | |||
800 | This is the number of B-tree pages that hold both key and data. | ||
801 | |||
802 | Overflow pages used | ||
803 | |||
804 | The total number of overflow pages used for this category. | ||
805 | |||
806 | Unused bytes on index pages | ||
807 | |||
808 | The total number of bytes of unused space on all index pages. The | ||
809 | percentage at the right is the number of unused bytes divided by the | ||
810 | total number of bytes on index pages. | ||
811 | |||
812 | Unused bytes on primary pages | ||
813 | |||
814 | The total number of bytes of unused space on all primary pages. The | ||
815 | percentage at the right is the number of unused bytes divided by the | ||
816 | total number of bytes on primary pages. | ||
817 | |||
818 | Unused bytes on overflow pages | ||
819 | |||
820 | The total number of bytes of unused space on all overflow pages. The | ||
821 | percentage at the right is the number of unused bytes divided by the | ||
822 | total number of bytes on overflow pages. | ||
823 | |||
824 | Unused bytes on all pages | ||
825 | |||
826 | The total number of bytes of unused space on all primary and overflow | ||
827 | pages. The percentage at the right is the number of unused bytes | ||
828 | divided by the total number of bytes. | ||
829 | } | ||
830 | |||
831 | # Output a dump of the in-memory database. This can be used for more | ||
832 | # complex offline analysis. | ||
833 | # | ||
834 | puts "**********************************************************************" | ||
835 | puts "The entire text of this report can be sourced into any SQL database" | ||
836 | puts "engine for further analysis. All of the text above is an SQL comment." | ||
837 | puts "The data used to generate this report follows:" | ||
838 | puts "*/" | ||
839 | puts "BEGIN;" | ||
840 | puts $tabledef | ||
841 | unset -nocomplain x | ||
842 | mem eval {SELECT * FROM space_used} x { | ||
843 | puts -nonewline "INSERT INTO space_used VALUES" | ||
844 | set sep ( | ||
845 | foreach col $x(*) { | ||
846 | set v $x($col) | ||
847 | if {$v=="" || ![string is double $v]} {set v [quote $v]} | ||
848 | puts -nonewline $sep$v | ||
849 | set sep , | ||
850 | } | ||
851 | puts ");" | ||
852 | } | ||
853 | puts "COMMIT;" | ||
854 | |||
855 | } err]} { | ||
856 | puts "ERROR: $err" | ||
857 | puts $errorInfo | ||
858 | exit 1 | ||
859 | } | ||