aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl')
-rw-r--r--libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl859
1 files changed, 859 insertions, 0 deletions
diff --git a/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl b/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl
new file mode 100644
index 0000000..d70b442
--- /dev/null
+++ b/libraries/sqlite/unix/sqlite-3.5.1/tool/spaceanal.tcl
@@ -0,0 +1,859 @@
1# Run this TCL script using "testfixture" in order get a report that shows
2# how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5
6if {[catch {
7
8# Get the name of the database to analyze
9#
10#set argv $argv0
11if {[llength $argv]!=1} {
12 puts stderr "Usage: $argv0 database-name"
13 exit 1
14}
15set file_to_analyze [lindex $argv 0]
16if {![file exists $file_to_analyze]} {
17 puts stderr "No such file: $file_to_analyze"
18 exit 1
19}
20if {![file readable $file_to_analyze]} {
21 puts stderr "File is not readable: $file_to_analyze"
22 exit 1
23}
24if {[file size $file_to_analyze]<512} {
25 puts stderr "Empty or malformed database: $file_to_analyze"
26 exit 1
27}
28
29# Maximum distance between pages before we consider it a "gap"
30#
31set MAXGAP 3
32
33# Open the database
34#
35sqlite3 db [lindex $argv 0]
36set DB [btree_open [lindex $argv 0] 1000 0]
37
38# In-memory database for collecting statistics. This script loops through
39# the tables and indices in the database being analyzed, adding a row for each
40# to an in-memory database (for which the schema is shown below). It then
41# queries the in-memory db to produce the space-analysis report.
42#
43sqlite3 mem :memory:
44set tabledef\
45{CREATE TABLE space_used(
46 name clob, -- Name of a table or index in the database file
47 tblname clob, -- Name of associated table
48 is_index boolean, -- TRUE if it is an index, false for a table
49 nentry int, -- Number of entries in the BTree
50 leaf_entries int, -- Number of leaf entries
51 payload int, -- Total amount of data stored in this table or index
52 ovfl_payload int, -- Total amount of data stored on overflow pages
53 ovfl_cnt int, -- Number of entries that use overflow
54 mx_payload int, -- Maximum payload size
55 int_pages int, -- Number of interior pages used
56 leaf_pages int, -- Number of leaf pages used
57 ovfl_pages int, -- Number of overflow pages used
58 int_unused int, -- Number of unused bytes on interior pages
59 leaf_unused int, -- Number of unused bytes on primary pages
60 ovfl_unused int, -- Number of unused bytes on overflow pages
61 gap_cnt int -- Number of gaps in the page layout
62);}
63mem eval $tabledef
64
65proc integerify {real} {
66 return [expr int($real)]
67}
68mem function int integerify
69
70# Quote a string for use in an SQL query. Examples:
71#
72# [quote {hello world}] == {'hello world'}
73# [quote {hello world's}] == {'hello world''s'}
74#
75proc quote {txt} {
76 regsub -all ' $txt '' q
77 return '$q'
78}
79
80# This proc is a wrapper around the btree_cursor_info command. The
81# second argument is an open btree cursor returned by [btree_cursor].
82# The first argument is the name of an array variable that exists in
83# the scope of the caller. If the third argument is non-zero, then
84# info is returned for the page that lies $up entries upwards in the
85# tree-structure. (i.e. $up==1 returns the parent page, $up==2 the
86# grandparent etc.)
87#
88# The following entries in that array are filled in with information retrieved
89# using [btree_cursor_info]:
90#
91# $arrayvar(page_no) = The page number
92# $arrayvar(entry_no) = The entry number
93# $arrayvar(page_entries) = Total number of entries on this page
94# $arrayvar(cell_size) = Cell size (local payload + header)
95# $arrayvar(page_freebytes) = Number of free bytes on this page
96# $arrayvar(page_freeblocks) = Number of free blocks on the page
97# $arrayvar(payload_bytes) = Total payload size (local + overflow)
98# $arrayvar(header_bytes) = Header size in bytes
99# $arrayvar(local_payload_bytes) = Local payload size
100# $arrayvar(parent) = Parent page number
101#
102proc cursor_info {arrayvar csr {up 0}} {
103 upvar $arrayvar a
104 foreach [list a(page_no) \
105 a(entry_no) \
106 a(page_entries) \
107 a(cell_size) \
108 a(page_freebytes) \
109 a(page_freeblocks) \
110 a(payload_bytes) \
111 a(header_bytes) \
112 a(local_payload_bytes) \
113 a(parent) \
114 a(first_ovfl) ] [btree_cursor_info $csr $up] break
115}
116
117# Determine the page-size of the database. This global variable is used
118# throughout the script.
119#
120set pageSize [db eval {PRAGMA page_size}]
121
122# Analyze every table in the database, one at a time.
123#
124# The following query returns the name and root-page of each table in the
125# database, including the sqlite_master table.
126#
127set sql {
128 SELECT name, rootpage FROM sqlite_master
129 WHERE type='table' AND rootpage>0
130 UNION ALL
131 SELECT 'sqlite_master', 1
132 ORDER BY 1
133}
134set wideZero [expr {10000000000 - 10000000000}]
135foreach {name rootpage} [db eval $sql] {
136 puts stderr "Analyzing table $name..."
137
138 # Code below traverses the table being analyzed (table name $name), using the
139 # btree cursor $cursor. Statistics related to table $name are accumulated in
140 # the following variables:
141 #
142 set total_payload $wideZero ;# Payload space used by all entries
143 set total_ovfl $wideZero ;# Payload space on overflow pages
144 set unused_int $wideZero ;# Unused space on interior nodes
145 set unused_leaf $wideZero ;# Unused space on leaf nodes
146 set unused_ovfl $wideZero ;# Unused space on overflow pages
147 set cnt_ovfl $wideZero ;# Number of entries that use overflows
148 set cnt_leaf_entry $wideZero ;# Number of leaf entries
149 set cnt_int_entry $wideZero ;# Number of interor entries
150 set mx_payload $wideZero ;# Maximum payload size
151 set ovfl_pages $wideZero ;# Number of overflow pages used
152 set leaf_pages $wideZero ;# Number of leaf pages
153 set int_pages $wideZero ;# Number of interior pages
154 set gap_cnt 0 ;# Number of holes in the page sequence
155 set prev_pgno 0 ;# Last page number seen
156
157 # As the btree is traversed, the array variable $seen($pgno) is set to 1
158 # the first time page $pgno is encountered.
159 #
160 catch {unset seen}
161
162 # The following loop runs once for each entry in table $name. The table
163 # is traversed using the btree cursor stored in variable $csr
164 #
165 set csr [btree_cursor $DB $rootpage 0]
166 for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
167 incr cnt_leaf_entry
168
169 # Retrieve information about the entry the btree-cursor points to into
170 # the array variable $ci (cursor info).
171 #
172 cursor_info ci $csr
173
174 # Check if the payload of this entry is greater than the current
175 # $mx_payload statistic for the table. Also increase the $total_payload
176 # statistic.
177 #
178 if {$ci(payload_bytes)>$mx_payload} {set mx_payload $ci(payload_bytes)}
179 incr total_payload $ci(payload_bytes)
180
181 # If this entry uses overflow pages, then update the $cnt_ovfl,
182 # $total_ovfl, $ovfl_pages and $unused_ovfl statistics.
183 #
184 set ovfl [expr {$ci(payload_bytes)-$ci(local_payload_bytes)}]
185 if {$ovfl} {
186 incr cnt_ovfl
187 incr total_ovfl $ovfl
188 set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
189 incr ovfl_pages $n
190 incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
191 set pglist [btree_ovfl_info $DB $csr]
192 } else {
193 set pglist {}
194 }
195
196 # If this is the first table entry analyzed for the page, then update
197 # the page-related statistics $leaf_pages and $unused_leaf. Also, if
198 # this page has a parent page that has not been analyzed, retrieve
199 # info for the parent and update statistics for it too.
200 #
201 if {![info exists seen($ci(page_no))]} {
202 set seen($ci(page_no)) 1
203 incr leaf_pages
204 incr unused_leaf $ci(page_freebytes)
205 set pglist "$ci(page_no) $pglist"
206
207 # Now check if the page has a parent that has not been analyzed. If
208 # so, update the $int_pages, $cnt_int_entry and $unused_int statistics
209 # accordingly. Then check if the parent page has a parent that has
210 # not yet been analyzed etc.
211 #
212 # set parent $ci(parent_page_no)
213 for {set up 1} \
214 {$ci(parent)!=0 && ![info exists seen($ci(parent))]} {incr up} \
215 {
216 # Mark the parent as seen.
217 #
218 set seen($ci(parent)) 1
219
220 # Retrieve info for the parent and update statistics.
221 cursor_info ci $csr $up
222 incr int_pages
223 incr cnt_int_entry $ci(page_entries)
224 incr unused_int $ci(page_freebytes)
225
226 # parent pages come before their first child
227 set pglist "$ci(page_no) $pglist"
228 }
229 }
230
231 # Check the page list for fragmentation
232 #
233 foreach pg $pglist {
234 if {$pg!=$prev_pgno+1 && $prev_pgno>0} {
235 incr gap_cnt
236 }
237 set prev_pgno $pg
238 }
239 }
240 btree_close_cursor $csr
241
242 # Handle the special case where a table contains no data. In this case
243 # all statistics are zero, except for the number of leaf pages (1) and
244 # the unused bytes on leaf pages ($pageSize - 8).
245 #
246 # An exception to the above is the sqlite_master table. If it is empty
247 # then all statistics are zero except for the number of leaf pages (1),
248 # and the number of unused bytes on leaf pages ($pageSize - 112).
249 #
250 if {[llength [array names seen]]==0} {
251 set leaf_pages 1
252 if {$rootpage==1} {
253 set unused_leaf [expr {$pageSize-112}]
254 } else {
255 set unused_leaf [expr {$pageSize-8}]
256 }
257 }
258
259 # Insert the statistics for the table analyzed into the in-memory database.
260 #
261 set sql "INSERT INTO space_used VALUES("
262 append sql [quote $name]
263 append sql ",[quote $name]"
264 append sql ",0"
265 append sql ",[expr {$cnt_leaf_entry+$cnt_int_entry}]"
266 append sql ",$cnt_leaf_entry"
267 append sql ",$total_payload"
268 append sql ",$total_ovfl"
269 append sql ",$cnt_ovfl"
270 append sql ",$mx_payload"
271 append sql ",$int_pages"
272 append sql ",$leaf_pages"
273 append sql ",$ovfl_pages"
274 append sql ",$unused_int"
275 append sql ",$unused_leaf"
276 append sql ",$unused_ovfl"
277 append sql ",$gap_cnt"
278 append sql );
279 mem eval $sql
280}
281
282# Analyze every index in the database, one at a time.
283#
284# The query below returns the name, associated table and root-page number
285# for every index in the database.
286#
287set sql {
288 SELECT name, tbl_name, rootpage FROM sqlite_master WHERE type='index'
289 ORDER BY 2, 1
290}
291foreach {name tbl_name rootpage} [db eval $sql] {
292 puts stderr "Analyzing index $name of table $tbl_name..."
293
294 # Code below traverses the index being analyzed (index name $name), using the
295 # btree cursor $cursor. Statistics related to index $name are accumulated in
296 # the following variables:
297 #
298 set total_payload $wideZero ;# Payload space used by all entries
299 set total_ovfl $wideZero ;# Payload space on overflow pages
300 set unused_leaf $wideZero ;# Unused space on leaf nodes
301 set unused_ovfl $wideZero ;# Unused space on overflow pages
302 set cnt_ovfl $wideZero ;# Number of entries that use overflows
303 set cnt_leaf_entry $wideZero ;# Number of leaf entries
304 set mx_payload $wideZero ;# Maximum payload size
305 set ovfl_pages $wideZero ;# Number of overflow pages used
306 set leaf_pages $wideZero ;# Number of leaf pages
307 set gap_cnt 0 ;# Number of holes in the page sequence
308 set prev_pgno 0 ;# Last page number seen
309
310 # As the btree is traversed, the array variable $seen($pgno) is set to 1
311 # the first time page $pgno is encountered.
312 #
313 catch {unset seen}
314
315 # The following loop runs once for each entry in index $name. The index
316 # is traversed using the btree cursor stored in variable $csr
317 #
318 set csr [btree_cursor $DB $rootpage 0]
319 for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
320 incr cnt_leaf_entry
321
322 # Retrieve information about the entry the btree-cursor points to into
323 # the array variable $ci (cursor info).
324 #
325 cursor_info ci $csr
326
327 # Check if the payload of this entry is greater than the current
328 # $mx_payload statistic for the table. Also increase the $total_payload
329 # statistic.
330 #
331 set payload [btree_keysize $csr]
332 if {$payload>$mx_payload} {set mx_payload $payload}
333 incr total_payload $payload
334
335 # If this entry uses overflow pages, then update the $cnt_ovfl,
336 # $total_ovfl, $ovfl_pages and $unused_ovfl statistics.
337 #
338 set ovfl [expr {$payload-$ci(local_payload_bytes)}]
339 if {$ovfl} {
340 incr cnt_ovfl
341 incr total_ovfl $ovfl
342 set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
343 incr ovfl_pages $n
344 incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
345 }
346
347 # If this is the first table entry analyzed for the page, then update
348 # the page-related statistics $leaf_pages and $unused_leaf.
349 #
350 if {![info exists seen($ci(page_no))]} {
351 set seen($ci(page_no)) 1
352 incr leaf_pages
353 incr unused_leaf $ci(page_freebytes)
354 set pg $ci(page_no)
355 if {$prev_pgno>0 && $pg!=$prev_pgno+1} {
356 incr gap_cnt
357 }
358 set prev_pgno $ci(page_no)
359 }
360 }
361 btree_close_cursor $csr
362
363 # Handle the special case where a index contains no data. In this case
364 # all statistics are zero, except for the number of leaf pages (1) and
365 # the unused bytes on leaf pages ($pageSize - 8).
366 #
367 if {[llength [array names seen]]==0} {
368 set leaf_pages 1
369 set unused_leaf [expr {$pageSize-8}]
370 }
371
372 # Insert the statistics for the index analyzed into the in-memory database.
373 #
374 set sql "INSERT INTO space_used VALUES("
375 append sql [quote $name]
376 append sql ",[quote $tbl_name]"
377 append sql ",1"
378 append sql ",$cnt_leaf_entry"
379 append sql ",$cnt_leaf_entry"
380 append sql ",$total_payload"
381 append sql ",$total_ovfl"
382 append sql ",$cnt_ovfl"
383 append sql ",$mx_payload"
384 append sql ",0"
385 append sql ",$leaf_pages"
386 append sql ",$ovfl_pages"
387 append sql ",0"
388 append sql ",$unused_leaf"
389 append sql ",$unused_ovfl"
390 append sql ",$gap_cnt"
391 append sql );
392 mem eval $sql
393}
394
395# Generate a single line of output in the statistics section of the
396# report.
397#
398proc statline {title value {extra {}}} {
399 set len [string length $title]
400 set dots [string range {......................................} $len end]
401 set len [string length $value]
402 set sp2 [string range { } $len end]
403 if {$extra ne ""} {
404 set extra " $extra"
405 }
406 puts "$title$dots $value$sp2$extra"
407}
408
409# Generate a formatted percentage value for $num/$denom
410#
411proc percent {num denom {of {}}} {
412 if {$denom==0.0} {return ""}
413 set v [expr {$num*100.0/$denom}]
414 set of {}
415 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
416 return [format {%5.1f%% %s} $v $of]
417 } elseif {$v<0.1 || $v>99.9} {
418 return [format {%7.3f%% %s} $v $of]
419 } else {
420 return [format {%6.2f%% %s} $v $of]
421 }
422}
423
424proc divide {num denom} {
425 if {$denom==0} {return 0.0}
426 return [format %.2f [expr double($num)/double($denom)]]
427}
428
429# Generate a subreport that covers some subset of the database.
430# the $where clause determines which subset to analyze.
431#
432proc subreport {title where} {
433 global pageSize file_pgcnt
434
435 # Query the in-memory database for the sum of various statistics
436 # for the subset of tables/indices identified by the WHERE clause in
437 # $where. Note that even if the WHERE clause matches no rows, the
438 # following query returns exactly one row (because it is an aggregate).
439 #
440 # The results of the query are stored directly by SQLite into local
441 # variables (i.e. $nentry, $nleaf etc.).
442 #
443 mem eval "
444 SELECT
445 int(sum(nentry)) AS nentry,
446 int(sum(leaf_entries)) AS nleaf,
447 int(sum(payload)) AS payload,
448 int(sum(ovfl_payload)) AS ovfl_payload,
449 max(mx_payload) AS mx_payload,
450 int(sum(ovfl_cnt)) as ovfl_cnt,
451 int(sum(leaf_pages)) AS leaf_pages,
452 int(sum(int_pages)) AS int_pages,
453 int(sum(ovfl_pages)) AS ovfl_pages,
454 int(sum(leaf_unused)) AS leaf_unused,
455 int(sum(int_unused)) AS int_unused,
456 int(sum(ovfl_unused)) AS ovfl_unused,
457 int(sum(gap_cnt)) AS gap_cnt
458 FROM space_used WHERE $where" {} {}
459
460 # Output the sub-report title, nicely decorated with * characters.
461 #
462 puts ""
463 set len [string length $title]
464 set stars [string repeat * [expr 65-$len]]
465 puts "*** $title $stars"
466 puts ""
467
468 # Calculate statistics and store the results in TCL variables, as follows:
469 #
470 # total_pages: Database pages consumed.
471 # total_pages_percent: Pages consumed as a percentage of the file.
472 # storage: Bytes consumed.
473 # payload_percent: Payload bytes used as a percentage of $storage.
474 # total_unused: Unused bytes on pages.
475 # avg_payload: Average payload per btree entry.
476 # avg_fanout: Average fanout for internal pages.
477 # avg_unused: Average unused bytes per btree entry.
478 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
479 #
480 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
481 set total_pages_percent [percent $total_pages $file_pgcnt]
482 set storage [expr {$total_pages*$pageSize}]
483 set payload_percent [percent $payload $storage {of storage consumed}]
484 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
485 set avg_payload [divide $payload $nleaf]
486 set avg_unused [divide $total_unused $nleaf]
487 if {$int_pages>0} {
488 # TODO: Is this formula correct?
489 set nTab [mem eval "
490 SELECT count(*) FROM (
491 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
492 )
493 "]
494 set avg_fanout [mem eval "
495 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
496 WHERE $where AND is_index = 0
497 "]
498 set avg_fanout [format %.2f $avg_fanout]
499 }
500 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}]
501
502 # Print out the sub-report statistics.
503 #
504 statline {Percentage of total database} $total_pages_percent
505 statline {Number of entries} $nleaf
506 statline {Bytes of storage consumed} $storage
507 statline {Bytes of payload} $payload $payload_percent
508 statline {Average payload per entry} $avg_payload
509 statline {Average unused bytes per entry} $avg_unused
510 if {[info exists avg_fanout]} {
511 statline {Average fanout} $avg_fanout
512 }
513 if {$total_pages>1} {
514 set fragmentation [percent $gap_cnt [expr {$total_pages-1}] {fragmentation}]
515 statline {Fragmentation} $fragmentation
516 }
517 statline {Maximum payload per entry} $mx_payload
518 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
519 if {$int_pages>0} {
520 statline {Index pages used} $int_pages
521 }
522 statline {Primary pages used} $leaf_pages
523 statline {Overflow pages used} $ovfl_pages
524 statline {Total pages used} $total_pages
525 if {$int_unused>0} {
526 set int_unused_percent \
527 [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
528 statline "Unused bytes on index pages" $int_unused $int_unused_percent
529 }
530 statline "Unused bytes on primary pages" $leaf_unused \
531 [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
532 statline "Unused bytes on overflow pages" $ovfl_unused \
533 [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
534 statline "Unused bytes on all pages" $total_unused \
535 [percent $total_unused $storage {of all space}]
536 return 1
537}
538
539# Calculate the overhead in pages caused by auto-vacuum.
540#
541# This procedure calculates and returns the number of pages used by the
542# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
543# then 0 is returned. The two arguments are the size of the database file in
544# pages and the page size used by the database (in bytes).
545proc autovacuum_overhead {filePages pageSize} {
546
547 # Read the value of meta 4. If non-zero, then the database supports
548 # auto-vacuum. It would be possible to use "PRAGMA auto_vacuum" instead,
549 # but that would not work if the SQLITE_OMIT_PRAGMA macro was defined
550 # when the library was built.
551 set meta4 [lindex [btree_get_meta $::DB] 4]
552
553 # If the database is not an auto-vacuum database or the file consists
554 # of one page only then there is no overhead for auto-vacuum. Return zero.
555 if {0==$meta4 || $filePages==1} {
556 return 0
557 }
558
559 # The number of entries on each pointer map page. The layout of the
560 # database file is one pointer-map page, followed by $ptrsPerPage other
561 # pages, followed by a pointer-map page etc. The first pointer-map page
562 # is the second page of the file overall.
563 set ptrsPerPage [expr double($pageSize/5)]
564
565 # Return the number of pointer map pages in the database.
566 return [expr int(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
567}
568
569
570# Calculate the summary statistics for the database and store the results
571# in TCL variables. They are output below. Variables are as follows:
572#
573# pageSize: Size of each page in bytes.
574# file_bytes: File size in bytes.
575# file_pgcnt: Number of pages in the file.
576# file_pgcnt2: Number of pages in the file (calculated).
577# av_pgcnt: Pages consumed by the auto-vacuum pointer-map.
578# av_percent: Percentage of the file consumed by auto-vacuum pointer-map.
579# inuse_pgcnt: Data pages in the file.
580# inuse_percent: Percentage of pages used to store data.
581# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>)
582# free_pgcnt2: Free pages in the file according to the file header.
583# free_percent: Percentage of file consumed by free pages (calculated).
584# free_percent2: Percentage of file consumed by free pages (header).
585# ntable: Number of tables in the db.
586# nindex: Number of indices in the db.
587# nautoindex: Number of indices created automatically.
588# nmanindex: Number of indices created manually.
589# user_payload: Number of bytes of payload in table btrees
590# (not including sqlite_master)
591# user_percent: $user_payload as a percentage of total file size.
592
593set file_bytes [file size $file_to_analyze]
594set file_pgcnt [expr {$file_bytes/$pageSize}]
595
596set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize]
597set av_percent [percent $av_pgcnt $file_pgcnt]
598
599set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
600set inuse_pgcnt [expr int([mem eval $sql])]
601set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
602
603set free_pgcnt [expr $file_pgcnt-$inuse_pgcnt-$av_pgcnt]
604set free_percent [percent $free_pgcnt $file_pgcnt]
605set free_pgcnt2 [lindex [btree_get_meta $DB] 0]
606set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
607
608set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
609
610set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
611set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
612set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
613set nautoindex [db eval $sql]
614set nmanindex [expr {$nindex-$nautoindex}]
615
616# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
617set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
618 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
619set user_percent [percent $user_payload $file_bytes]
620
621# Output the summary statistics calculated above.
622#
623puts "/** Disk-Space Utilization Report For $file_to_analyze"
624catch {
625 puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
626}
627puts ""
628statline {Page size in bytes} $pageSize
629statline {Pages in the whole file (measured)} $file_pgcnt
630statline {Pages in the whole file (calculated)} $file_pgcnt2
631statline {Pages that store data} $inuse_pgcnt $inuse_percent
632statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
633statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
634statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
635statline {Number of tables in the database} $ntable
636statline {Number of indices} $nindex
637statline {Number of named indices} $nmanindex
638statline {Automatically generated indices} $nautoindex
639statline {Size of the file in bytes} $file_bytes
640statline {Bytes of user payload stored} $user_payload $user_percent
641
642# Output table rankings
643#
644puts ""
645puts "*** Page counts for all tables with their indices ********************"
646puts ""
647mem eval {SELECT tblname, count(*) AS cnt,
648 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
649 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
650 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
651}
652
653# Output subreports
654#
655if {$nindex>0} {
656 subreport {All tables and indices} 1
657}
658subreport {All tables} {NOT is_index}
659if {$nindex>0} {
660 subreport {All indices} {is_index}
661}
662foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
663 ORDER BY name}] {
664 regsub ' $tbl '' qn
665 set name [string toupper $tbl]
666 set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
667 if {$n>1} {
668 subreport "Table $name and all its indices" "tblname='$qn'"
669 subreport "Table $name w/o any indices" "name='$qn'"
670 subreport "Indices of table $name" "tblname='$qn' AND is_index"
671 } else {
672 subreport "Table $name" "name='$qn'"
673 }
674}
675
676# Output instructions on what the numbers above mean.
677#
678puts {
679*** Definitions ******************************************************
680
681Page size in bytes
682
683 The number of bytes in a single page of the database file.
684 Usually 1024.
685
686Number of pages in the whole file
687}
688puts \
689" The number of $pageSize-byte pages that go into forming the complete
690 database"
691puts \
692{
693Pages that store data
694
695 The number of pages that store data, either as primary B*Tree pages or
696 as overflow pages. The number at the right is the data pages divided by
697 the total number of pages in the file.
698
699Pages on the freelist
700
701 The number of pages that are not currently in use but are reserved for
702 future use. The percentage at the right is the number of freelist pages
703 divided by the total number of pages in the file.
704
705Pages of auto-vacuum overhead
706
707 The number of pages that store data used by the database to facilitate
708 auto-vacuum. This is zero for databases that do not support auto-vacuum.
709
710Number of tables in the database
711
712 The number of tables in the database, including the SQLITE_MASTER table
713 used to store schema information.
714
715Number of indices
716
717 The total number of indices in the database.
718
719Number of named indices
720
721 The number of indices created using an explicit CREATE INDEX statement.
722
723Automatically generated indices
724
725 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
726 on tables.
727
728Size of the file in bytes
729
730 The total amount of disk space used by the entire database files.
731
732Bytes of user payload stored
733
734 The total number of bytes of user payload stored in the database. The
735 schema information in the SQLITE_MASTER table is not counted when
736 computing this number. The percentage at the right shows the payload
737 divided by the total file size.
738
739Percentage of total database
740
741 The amount of the complete database file that is devoted to storing
742 information described by this category.
743
744Number of entries
745
746 The total number of B-Tree key/value pairs stored under this category.
747
748Bytes of storage consumed
749
750 The total amount of disk space required to store all B-Tree entries
751 under this category. The is the total number of pages used times
752 the pages size.
753
754Bytes of payload
755
756 The amount of payload stored under this category. Payload is the data
757 part of table entries and the key part of index entries. The percentage
758 at the right is the bytes of payload divided by the bytes of storage
759 consumed.
760
761Average payload per entry
762
763 The average amount of payload on each entry. This is just the bytes of
764 payload divided by the number of entries.
765
766Average unused bytes per entry
767
768 The average amount of free space remaining on all pages under this
769 category on a per-entry basis. This is the number of unused bytes on
770 all pages divided by the number of entries.
771
772Fragmentation
773
774 The percentage of pages in the table or index that are not
775 consecutive in the disk file. Many filesystems are optimized
776 for sequential file access so smaller fragmentation numbers
777 sometimes result in faster queries, especially for larger
778 database files that do not fit in the disk cache.
779
780Maximum payload per entry
781
782 The largest payload size of any entry.
783
784Entries that use overflow
785
786 The number of entries that user one or more overflow pages.
787
788Total pages used
789
790 This is the number of pages used to hold all information in the current
791 category. This is the sum of index, primary, and overflow pages.
792
793Index pages used
794
795 This is the number of pages in a table B-tree that hold only key (rowid)
796 information and no data.
797
798Primary pages used
799
800 This is the number of B-tree pages that hold both key and data.
801
802Overflow pages used
803
804 The total number of overflow pages used for this category.
805
806Unused bytes on index pages
807
808 The total number of bytes of unused space on all index pages. The
809 percentage at the right is the number of unused bytes divided by the
810 total number of bytes on index pages.
811
812Unused bytes on primary pages
813
814 The total number of bytes of unused space on all primary pages. The
815 percentage at the right is the number of unused bytes divided by the
816 total number of bytes on primary pages.
817
818Unused bytes on overflow pages
819
820 The total number of bytes of unused space on all overflow pages. The
821 percentage at the right is the number of unused bytes divided by the
822 total number of bytes on overflow pages.
823
824Unused bytes on all pages
825
826 The total number of bytes of unused space on all primary and overflow
827 pages. The percentage at the right is the number of unused bytes
828 divided by the total number of bytes.
829}
830
831# Output a dump of the in-memory database. This can be used for more
832# complex offline analysis.
833#
834puts "**********************************************************************"
835puts "The entire text of this report can be sourced into any SQL database"
836puts "engine for further analysis. All of the text above is an SQL comment."
837puts "The data used to generate this report follows:"
838puts "*/"
839puts "BEGIN;"
840puts $tabledef
841unset -nocomplain x
842mem eval {SELECT * FROM space_used} x {
843 puts -nonewline "INSERT INTO space_used VALUES"
844 set sep (
845 foreach col $x(*) {
846 set v $x($col)
847 if {$v=="" || ![string is double $v]} {set v [quote $v]}
848 puts -nonewline $sep$v
849 set sep ,
850 }
851 puts ");"
852}
853puts "COMMIT;"
854
855} err]} {
856 puts "ERROR: $err"
857 puts $errorInfo
858 exit 1
859}