Rev 2: Get workable stats. in http://people.ubuntu.com/~robertc/baz2.0/plugins/repodetails/trunk
Robert Collins
robertc at robertcollins.net
Fri Oct 17 04:11:37 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/repodetails/trunk
------------------------------------------------------------
revno: 2
revision-id: robertc at robertcollins.net-20081017031136-ucaw6x6oc0axse22
parent: robertc at robertcollins.net-20081017015000-00m52hln7vqsye3d
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-10-17 14:11:36 +1100
message:
Get workable stats.
modified:
README readme-20081017014933-iriuw53viune2txe-1
__init__.py __init__.py-20081017014933-iriuw53viune2txe-2
tests/test_repositorydetails.py test_repositorydetai-20081017014933-iriuw53viune2txe-6
=== modified file 'README'
--- a/README 2008-10-17 01:50:00 +0000
+++ b/README 2008-10-17 03:11:36 +0000
@@ -2,3 +2,5 @@
Install as any normal bzr plugin, see bzr help repodetails for usage details.
+Note that the tests for this are likely very machine specific; patches to them
+should fix all the variables to make them more deterministic.
=== modified file '__init__.py'
--- a/__init__.py 2008-10-17 01:50:00 +0000
+++ b/__init__.py 2008-10-17 03:11:36 +0000
@@ -23,16 +23,80 @@
version_info = (1, 9, 0, 'dev', 0)
import bzrlib.commands
-from bzrlib import repository
+from bzrlib import chk_map, repository
+from bzrlib.inventory import CHKInventory
+from bzrlib.repofmt.pack_repo import (
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ RepositoryFormatKnitPack5,
+ RepositoryFormatKnitPack5RichRoot,
+ RepositoryFormatKnitPack5RichRootBroken,
+ RepositoryFormatPackDevelopment2,
+ RepositoryFormatPackDevelopment2Subtree,
+ RepositoryFormatPackDevelopment3,
+ RepositoryFormatPackDevelopment3Subtree,
+ )
+
+pack_rev_types = (
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ RepositoryFormatKnitPack5,
+ RepositoryFormatKnitPack5RichRoot,
+ RepositoryFormatKnitPack5RichRootBroken,
+ RepositoryFormatPackDevelopment2,
+ RepositoryFormatPackDevelopment2Subtree,
+ RepositoryFormatPackDevelopment3,
+ RepositoryFormatPackDevelopment3Subtree,
+ )
+pack_signature_types = (
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ RepositoryFormatKnitPack5,
+ RepositoryFormatKnitPack5RichRoot,
+ RepositoryFormatKnitPack5RichRootBroken,
+ RepositoryFormatPackDevelopment2,
+ RepositoryFormatPackDevelopment2Subtree,
+ RepositoryFormatPackDevelopment3,
+ RepositoryFormatPackDevelopment3Subtree,
+ )
+pack_text_types = (
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ RepositoryFormatKnitPack5,
+ RepositoryFormatKnitPack5RichRoot,
+ RepositoryFormatKnitPack5RichRootBroken,
+ RepositoryFormatPackDevelopment2,
+ RepositoryFormatPackDevelopment2Subtree,
+ RepositoryFormatPackDevelopment3,
+ RepositoryFormatPackDevelopment3Subtree,
+ )
+pack_xml_inv_types = (
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ RepositoryFormatKnitPack5,
+ RepositoryFormatKnitPack5RichRoot,
+ RepositoryFormatKnitPack5RichRootBroken,
+ RepositoryFormatPackDevelopment2,
+ RepositoryFormatPackDevelopment2Subtree,
+ )
+pack_chk_inv_types = (
+ RepositoryFormatPackDevelopment3,
+ RepositoryFormatPackDevelopment3Subtree,
+ )
class ObjectStats(object):
"""Statistics for a particular type of object."""
def __init__(self):
- self.objects = 1
- self.raw_size = 1
- self.compressed_size = 1
+ self.objects = 0
+ self.raw_size = 0
+ self.compressed_size = 0
class RepoStats(object):
@@ -52,37 +116,111 @@
repo.unlock()
+def _gather_object_vf_texts(objectstats, vf):
+ for text in _gather_and_iter_object_vf_texts(objectstats, vf):
+ pass
+
+
+def _gather_and_iter_object_vf_texts(objectstats, vf, keys=None):
+ if keys is None:
+ keys = vf.keys()
+ objectstats.objects += len(keys)
+ # XXX: Doesn't consider duplicate-in-separate-packs overhead.
+ details = vf._index.get_build_details(keys)
+ for detail in details.itervalues():
+ objectstats.compressed_size += detail[0][2]
+ for entry in vf.get_record_stream(keys, 'unordered', True):
+ bytes = entry.get_bytes_as('fulltext')
+ objectstats.raw_size += len(bytes)
+ yield bytes, entry.key
+
+
+def _gather_chk_inv(objectstats, repo):
+ # first pass: the inventory objects yield chk dicts:
+ pending = set()
+ done = set()
+ for bytes, key in _gather_and_iter_object_vf_texts(objectstats, repo.inventories):
+ inv = CHKInventory.deserialise(repo.chk_bytes, bytes, key)
+ pending.add(inv.id_to_entry._root_node)
+ while pending:
+ # Don't visit nodes twice
+ done.update(pending)
+ next = pending
+ pending = set()
+ for bytes, key in _gather_and_iter_object_vf_texts(objectstats, repo.chk_bytes, next):
+ node = chk_map._deserialise(bytes, key)
+ pending.update(node.refs())
+
+
def _gather_stats_locked(repo):
result = RepoStats()
- result.revision_count = 1
result.revisions = ObjectStats()
result.inventories = ObjectStats()
result.texts = ObjectStats()
result.signatures = ObjectStats()
- result.signatures.objects = len(repo.signatures.keys())
+ keys = repo.revisions.keys()
+ result.revision_count = len(keys)
+ if isinstance(repo._format, pack_rev_types):
+ _gather_object_vf_texts(result.revisions, repo.revisions)
+ else:
+ raise AssertionError("Don't know how to process %r" % repo)
+ if isinstance(repo._format, pack_xml_inv_types):
+ _gather_object_vf_texts(result.inventories, repo.inventories)
+ elif isinstance(repo._format, pack_chk_inv_types):
+ _gather_chk_inv(result.inventories, repo)
+ else:
+ raise AssertionError("Don't know how to process %r" % repo)
+ if isinstance(repo._format, pack_text_types):
+ _gather_object_vf_texts(result.texts, repo.texts)
+ else:
+ raise AssertionError("Don't know how to process %r" % repo)
+ if isinstance(repo._format, pack_signature_types):
+ _gather_object_vf_texts(result.signatures, repo.signatures)
+ else:
+ raise AssertionError("Don't know how to process %r" % repo)
+ result.total = ObjectStats()
+ result.total.objects = sum([result.revisions.objects,
+ result.inventories.objects, result.texts.objects,
+ result.signatures.objects])
+ result.total.raw_size = sum([result.revisions.raw_size,
+ result.inventories.raw_size, result.texts.raw_size,
+ result.signatures.raw_size])
+ result.total.compressed_size = sum([result.revisions.compressed_size,
+ result.inventories.compressed_size, result.texts.compressed_size,
+ result.signatures.compressed_size])
return result
class cmd_repository_details(bzrlib.commands.Command):
- """Get details for a bzr repository."""
+ """Get details for a bzr repository.
+
+ Stacked repositories have both the repo and what its stacked on analysed,
+ or may break... who knows.
+ """
- def _format_object(self, objectstats):
- return "%6d KiB %6d KiB %7d" % (objectstats.raw_size/1024,
- objectstats.compressed_size/1024, objectstats.objects)
+ def _format_object(self, objectstats, total):
+ # Mangle the percentages for very small repos to avoid divide by zero.
+ raw_percent = (objectstats.raw_size + 1)*100/(total.raw_size + 1)
+ compressed_percent = (objectstats.compressed_size + 1)*100/(total.compressed_size + 1)
+ return "%6d KiB %3d%% %6d KiB %3d%% %7d" % (objectstats.raw_size/1024,
+ raw_percent, objectstats.compressed_size/1024, compressed_percent,
+ objectstats.objects)
def run(self):
repo = repository.Repository.open(".")
stats = gather_stats(repo)
self.outf.write("Commits: %d\n" % stats.revision_count)
- self.outf.write(" Raw Compressed Objects\n")
+ self.outf.write(" Raw % Compressed % Objects\n")
self.outf.write("Revisions: %s\n" %
- self._format_object(stats.revisions))
+ self._format_object(stats.revisions, stats.total))
self.outf.write("Inventories: %s\n" %
- self._format_object(stats.inventories))
+ self._format_object(stats.inventories, stats.total))
self.outf.write("Texts: %s\n" %
- self._format_object(stats.texts))
+ self._format_object(stats.texts, stats.total))
self.outf.write("Signatures: %s\n" %
- self._format_object(stats.signatures))
+ self._format_object(stats.signatures, stats.total))
+ self.outf.write("Total: %s\n" %
+ self._format_object(stats.total, stats.total))
bzrlib.commands.register_command(cmd_repository_details)
=== modified file 'tests/test_repositorydetails.py'
--- a/tests/test_repositorydetails.py 2008-10-17 01:50:00 +0000
+++ b/tests/test_repositorydetails.py 2008-10-17 03:11:36 +0000
@@ -32,50 +32,75 @@
out, err = self.run_bzr(["repository-details"])
self.assertEqual(
"Commits: 1\n"
- " Raw Compressed Objects\n"
- "Revisions: 0 KiB 0 KiB 1\n"
- "Inventories: 0 KiB 0 KiB 1\n"
- "Texts: 0 KiB 0 KiB 1\n"
- "Signatures: 0 KiB 0 KiB 0\n"
+ " Raw % Compressed % Objects\n"
+ "Revisions: 0 KiB 53% 0 KiB 45% 1\n"
+ "Inventories: 0 KiB 44% 0 KiB 35% 1\n"
+ "Texts: 0 KiB 2% 0 KiB 19% 1\n"
+ "Signatures: 0 KiB 0% 0 KiB 0% 0\n"
+ "Total: 0 KiB 100% 0 KiB 100% 3\n"
, out)
self.assertEqual("", err)
def test_gather_stats_pack92(self):
tree = self.make_branch_and_tree('.', format="pack-0.92")
self.build_tree(["foo"])
- tree.add(["foo"])
- tree.commit("first post")
+ tree.add(["foo"], ["foo-id"])
+ tree.commit("first post", rev_id="foo", committer="foo at bar", timestamp=100000)
stats = repodetails.gather_stats(tree.branch.repository)
self.assertEqual(1, stats.revision_count)
self.assertEqual(1, stats.revisions.objects)
- self.assertEqual(1, stats.revisions.raw_size)
- self.assertEqual(1, stats.revisions.compressed_size)
+ self.assertEqual(271, stats.revisions.raw_size)
+ self.assertEqual(253, stats.revisions.compressed_size)
self.assertEqual(1, stats.inventories.objects)
- self.assertEqual(1, stats.inventories.raw_size)
- self.assertEqual(1, stats.inventories.compressed_size)
+ self.assertEqual(174, stats.inventories.raw_size)
+ self.assertEqual(192, stats.inventories.compressed_size)
self.assertEqual(1, stats.texts.objects)
- self.assertEqual(1, stats.texts.raw_size)
- self.assertEqual(1, stats.texts.compressed_size)
+ self.assertEqual(16, stats.texts.raw_size)
+ self.assertEqual(94, stats.texts.compressed_size)
self.assertEqual(0, stats.signatures.objects)
- self.assertEqual(1, stats.signatures.raw_size)
- self.assertEqual(1, stats.signatures.compressed_size)
+ self.assertEqual(0, stats.signatures.raw_size)
+ self.assertEqual(0, stats.signatures.compressed_size)
+ self.assertEqual(3, stats.total.objects)
+ self.assertEqual(461, stats.total.raw_size)
+ self.assertEqual(539, stats.total.compressed_size)
def test_gather_stats_chk(self):
tree = self.make_branch_and_tree('.', format="development3")
self.build_tree(["foo"])
- tree.add(["foo"])
- tree.commit("first post")
+ tree.add(["foo"], ["foo-id"])
+ tree.commit("first post", rev_id="foo", committer="foo at bar", timestamp=100000)
stats = repodetails.gather_stats(tree.branch.repository)
self.assertEqual(1, stats.revision_count)
self.assertEqual(1, stats.revisions.objects)
- self.assertEqual(1, stats.revisions.raw_size)
- self.assertEqual(1, stats.revisions.compressed_size)
- self.assertEqual(1, stats.inventories.objects)
- self.assertEqual(1, stats.inventories.raw_size)
- self.assertEqual(1, stats.inventories.compressed_size)
+ self.assertEqual(271, stats.revisions.raw_size)
+ self.assertEqual(250, stats.revisions.compressed_size)
+ # inv, root, tree-root-node, foo-id-node.
+ self.assertEqual(4, stats.inventories.objects)
+ self.assertEqual(343, stats.inventories.raw_size)
+ self.assertEqual(615, stats.inventories.compressed_size)
self.assertEqual(1, stats.texts.objects)
- self.assertEqual(1, stats.texts.raw_size)
- self.assertEqual(1, stats.texts.compressed_size)
- self.assertEqual(0, stats.signatures.objects)
- self.assertEqual(1, stats.signatures.raw_size)
- self.assertEqual(1, stats.signatures.compressed_size)
+ self.assertEqual(16, stats.texts.raw_size)
+ self.assertEqual(94, stats.texts.compressed_size)
+ self.assertEqual(0, stats.signatures.objects)
+ self.assertEqual(0, stats.signatures.raw_size)
+ self.assertEqual(0, stats.signatures.compressed_size)
+ self.assertEqual(6, stats.total.objects)
+ self.assertEqual(630, stats.total.raw_size)
+ self.assertEqual(959, stats.total.compressed_size)
+
+ def test_gather_stats_empty(self):
+ tree = self.make_branch_and_tree('.')
+ stats = repodetails.gather_stats(tree.branch.repository)
+ self.assertEqual(0, stats.revision_count)
+ self.assertEqual(0, stats.revisions.objects)
+ self.assertEqual(0, stats.revisions.raw_size)
+ self.assertEqual(0, stats.revisions.compressed_size)
+ self.assertEqual(0, stats.inventories.objects)
+ self.assertEqual(0, stats.inventories.raw_size)
+ self.assertEqual(0, stats.inventories.compressed_size)
+ self.assertEqual(0, stats.texts.objects)
+ self.assertEqual(0, stats.texts.raw_size)
+ self.assertEqual(0, stats.texts.compressed_size)
+ self.assertEqual(0, stats.signatures.objects)
+ self.assertEqual(0, stats.signatures.raw_size)
+ self.assertEqual(0, stats.signatures.compressed_size)
More information about the bazaar-commits
mailing list