Rev 2: Get workable stats. in http://people.ubuntu.com/~robertc/baz2.0/plugins/repodetails/trunk

Robert Collins robertc at robertcollins.net
Fri Oct 17 04:11:37 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/repodetails/trunk

------------------------------------------------------------
revno: 2
revision-id: robertc at robertcollins.net-20081017031136-ucaw6x6oc0axse22
parent: robertc at robertcollins.net-20081017015000-00m52hln7vqsye3d
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-10-17 14:11:36 +1100
message:
  Get workable stats.
modified:
  README                         readme-20081017014933-iriuw53viune2txe-1
  __init__.py                    __init__.py-20081017014933-iriuw53viune2txe-2
  tests/test_repositorydetails.py test_repositorydetai-20081017014933-iriuw53viune2txe-6
=== modified file 'README'
--- a/README	2008-10-17 01:50:00 +0000
+++ b/README	2008-10-17 03:11:36 +0000
@@ -2,3 +2,5 @@
 
 Install as any normal bzr plugin, see bzr help repodetails for usage details.
 
+Note that the tests for this are likely very machine specific; patches to them
+should fix all the variables to make them more deterministic.

=== modified file '__init__.py'
--- a/__init__.py	2008-10-17 01:50:00 +0000
+++ b/__init__.py	2008-10-17 03:11:36 +0000
@@ -23,16 +23,80 @@
 version_info = (1, 9, 0, 'dev', 0)
 
 import bzrlib.commands
-from bzrlib import repository
+from bzrlib import chk_map, repository
+from bzrlib.inventory import CHKInventory
+from bzrlib.repofmt.pack_repo import (
+    RepositoryFormatKnitPack1,
+    RepositoryFormatKnitPack3,
+    RepositoryFormatKnitPack4,
+    RepositoryFormatKnitPack5,
+    RepositoryFormatKnitPack5RichRoot,
+    RepositoryFormatKnitPack5RichRootBroken,
+    RepositoryFormatPackDevelopment2,
+    RepositoryFormatPackDevelopment2Subtree,
+    RepositoryFormatPackDevelopment3,
+    RepositoryFormatPackDevelopment3Subtree,
+    )
+
+pack_rev_types = (
+    RepositoryFormatKnitPack1,
+    RepositoryFormatKnitPack3,
+    RepositoryFormatKnitPack4,
+    RepositoryFormatKnitPack5,
+    RepositoryFormatKnitPack5RichRoot,
+    RepositoryFormatKnitPack5RichRootBroken,
+    RepositoryFormatPackDevelopment2,
+    RepositoryFormatPackDevelopment2Subtree,
+    RepositoryFormatPackDevelopment3,
+    RepositoryFormatPackDevelopment3Subtree,
+    )
+pack_signature_types = (
+    RepositoryFormatKnitPack1,
+    RepositoryFormatKnitPack3,
+    RepositoryFormatKnitPack4,
+    RepositoryFormatKnitPack5,
+    RepositoryFormatKnitPack5RichRoot,
+    RepositoryFormatKnitPack5RichRootBroken,
+    RepositoryFormatPackDevelopment2,
+    RepositoryFormatPackDevelopment2Subtree,
+    RepositoryFormatPackDevelopment3,
+    RepositoryFormatPackDevelopment3Subtree,
+    )
+pack_text_types = (
+    RepositoryFormatKnitPack1,
+    RepositoryFormatKnitPack3,
+    RepositoryFormatKnitPack4,
+    RepositoryFormatKnitPack5,
+    RepositoryFormatKnitPack5RichRoot,
+    RepositoryFormatKnitPack5RichRootBroken,
+    RepositoryFormatPackDevelopment2,
+    RepositoryFormatPackDevelopment2Subtree,
+    RepositoryFormatPackDevelopment3,
+    RepositoryFormatPackDevelopment3Subtree,
+    )
+pack_xml_inv_types = (
+    RepositoryFormatKnitPack1,
+    RepositoryFormatKnitPack3,
+    RepositoryFormatKnitPack4,
+    RepositoryFormatKnitPack5,
+    RepositoryFormatKnitPack5RichRoot,
+    RepositoryFormatKnitPack5RichRootBroken,
+    RepositoryFormatPackDevelopment2,
+    RepositoryFormatPackDevelopment2Subtree,
+    )
+pack_chk_inv_types = (
+    RepositoryFormatPackDevelopment3,
+    RepositoryFormatPackDevelopment3Subtree,
+    )
 
 
 class ObjectStats(object):
     """Statistics for a particular type of object."""
 
     def __init__(self):
-        self.objects = 1
-        self.raw_size = 1
-        self.compressed_size = 1
+        self.objects = 0
+        self.raw_size = 0
+        self.compressed_size = 0
 
 
 class RepoStats(object):
@@ -52,37 +116,111 @@
         repo.unlock()
 
 
+def _gather_object_vf_texts(objectstats, vf):
+    for text in _gather_and_iter_object_vf_texts(objectstats, vf):
+        pass
+
+
+def _gather_and_iter_object_vf_texts(objectstats, vf, keys=None):
+    if keys is None:
+        keys = vf.keys()
+    objectstats.objects += len(keys)
+    # XXX: Doesn't consider duplicate-in-separate-packs overhead.
+    details = vf._index.get_build_details(keys)
+    for detail in details.itervalues():
+        objectstats.compressed_size += detail[0][2]
+    for entry in vf.get_record_stream(keys, 'unordered', True):
+        bytes = entry.get_bytes_as('fulltext')
+        objectstats.raw_size += len(bytes)
+        yield bytes, entry.key
+
+
+def _gather_chk_inv(objectstats, repo):
+    # first pass: the inventory objects yield chk dicts:
+    pending = set()
+    done = set()
+    for bytes, key in _gather_and_iter_object_vf_texts(objectstats, repo.inventories):
+        inv = CHKInventory.deserialise(repo.chk_bytes, bytes, key)
+        pending.add(inv.id_to_entry._root_node)
+    while pending:
+        # Don't visit nodes twice
+        done.update(pending)
+        next = pending
+        pending = set()
+        for bytes, key in _gather_and_iter_object_vf_texts(objectstats, repo.chk_bytes, next):
+            node = chk_map._deserialise(bytes, key)
+            pending.update(node.refs())
+
+
 def _gather_stats_locked(repo):
     result = RepoStats()
-    result.revision_count = 1
     result.revisions = ObjectStats()
     result.inventories = ObjectStats()
     result.texts = ObjectStats()
     result.signatures = ObjectStats()
-    result.signatures.objects = len(repo.signatures.keys())
+    keys = repo.revisions.keys()
+    result.revision_count = len(keys)
+    if isinstance(repo._format, pack_rev_types):
+        _gather_object_vf_texts(result.revisions, repo.revisions)
+    else:
+        raise AssertionError("Don't know how to process %r" % repo)
+    if isinstance(repo._format, pack_xml_inv_types):
+        _gather_object_vf_texts(result.inventories, repo.inventories)
+    elif isinstance(repo._format, pack_chk_inv_types):
+        _gather_chk_inv(result.inventories, repo)
+    else:
+        raise AssertionError("Don't know how to process %r" % repo)
+    if isinstance(repo._format, pack_text_types):
+        _gather_object_vf_texts(result.texts, repo.texts)
+    else:
+        raise AssertionError("Don't know how to process %r" % repo)
+    if isinstance(repo._format, pack_signature_types):
+        _gather_object_vf_texts(result.signatures, repo.signatures)
+    else:
+        raise AssertionError("Don't know how to process %r" % repo)
+    result.total = ObjectStats()
+    result.total.objects = sum([result.revisions.objects,
+        result.inventories.objects, result.texts.objects,
+        result.signatures.objects])
+    result.total.raw_size = sum([result.revisions.raw_size,
+        result.inventories.raw_size, result.texts.raw_size,
+        result.signatures.raw_size])
+    result.total.compressed_size = sum([result.revisions.compressed_size,
+        result.inventories.compressed_size, result.texts.compressed_size,
+        result.signatures.compressed_size])
     return result
 
 
 class cmd_repository_details(bzrlib.commands.Command):
-    """Get details for a bzr repository."""
+    """Get details for a bzr repository.
+    
+    Stacked repositories have both the repo and what its stacked on analysed,
+    or may break... who knows.
+    """
 
-    def _format_object(self, objectstats):
-        return "%6d KiB  %6d KiB %7d" % (objectstats.raw_size/1024,
-            objectstats.compressed_size/1024, objectstats.objects)
+    def _format_object(self, objectstats, total):
+        # Mangle the percentages for very small repos to avoid divide by zero.
+        raw_percent = (objectstats.raw_size + 1)*100/(total.raw_size + 1)
+        compressed_percent = (objectstats.compressed_size + 1)*100/(total.compressed_size + 1)
+        return "%6d KiB %3d%%  %6d KiB %3d%%  %7d" % (objectstats.raw_size/1024,
+            raw_percent, objectstats.compressed_size/1024, compressed_percent,
+            objectstats.objects)
 
     def run(self):
         repo = repository.Repository.open(".")
         stats = gather_stats(repo)
         self.outf.write("Commits: %d\n" % stats.revision_count)
-        self.outf.write("                    Raw  Compressed Objects\n")
+        self.outf.write("                    Raw    %  Compressed    %  Objects\n")
         self.outf.write("Revisions:   %s\n" %
-            self._format_object(stats.revisions))
+            self._format_object(stats.revisions, stats.total))
         self.outf.write("Inventories: %s\n" %
-            self._format_object(stats.inventories))
+            self._format_object(stats.inventories, stats.total))
         self.outf.write("Texts:       %s\n" %
-            self._format_object(stats.texts))
+            self._format_object(stats.texts, stats.total))
         self.outf.write("Signatures:  %s\n" %
-            self._format_object(stats.signatures))
+            self._format_object(stats.signatures, stats.total))
+        self.outf.write("Total:       %s\n" %
+            self._format_object(stats.total, stats.total))
 
 
 bzrlib.commands.register_command(cmd_repository_details)

=== modified file 'tests/test_repositorydetails.py'
--- a/tests/test_repositorydetails.py	2008-10-17 01:50:00 +0000
+++ b/tests/test_repositorydetails.py	2008-10-17 03:11:36 +0000
@@ -32,50 +32,75 @@
         out, err = self.run_bzr(["repository-details"])
         self.assertEqual(
             "Commits: 1\n"
-            "                    Raw  Compressed Objects\n"
-            "Revisions:        0 KiB       0 KiB       1\n"
-            "Inventories:      0 KiB       0 KiB       1\n"
-            "Texts:            0 KiB       0 KiB       1\n"
-            "Signatures:       0 KiB       0 KiB       0\n"
+            "                    Raw    %  Compressed    %  Objects\n"
+            "Revisions:        0 KiB  53%       0 KiB  45%        1\n"
+            "Inventories:      0 KiB  44%       0 KiB  35%        1\n"
+            "Texts:            0 KiB   2%       0 KiB  19%        1\n"
+            "Signatures:       0 KiB   0%       0 KiB   0%        0\n"
+            "Total:            0 KiB 100%       0 KiB 100%        3\n"
             , out)
         self.assertEqual("", err)
 
     def test_gather_stats_pack92(self):
         tree = self.make_branch_and_tree('.', format="pack-0.92")
         self.build_tree(["foo"])
-        tree.add(["foo"])
-        tree.commit("first post")
+        tree.add(["foo"], ["foo-id"])
+        tree.commit("first post", rev_id="foo", committer="foo at bar", timestamp=100000)
         stats = repodetails.gather_stats(tree.branch.repository)
         self.assertEqual(1, stats.revision_count)
         self.assertEqual(1, stats.revisions.objects)
-        self.assertEqual(1, stats.revisions.raw_size)
-        self.assertEqual(1, stats.revisions.compressed_size)
+        self.assertEqual(271, stats.revisions.raw_size)
+        self.assertEqual(253, stats.revisions.compressed_size)
         self.assertEqual(1, stats.inventories.objects)
-        self.assertEqual(1, stats.inventories.raw_size)
-        self.assertEqual(1, stats.inventories.compressed_size)
+        self.assertEqual(174, stats.inventories.raw_size)
+        self.assertEqual(192, stats.inventories.compressed_size)
         self.assertEqual(1, stats.texts.objects)
-        self.assertEqual(1, stats.texts.raw_size)
-        self.assertEqual(1, stats.texts.compressed_size)
+        self.assertEqual(16, stats.texts.raw_size)
+        self.assertEqual(94, stats.texts.compressed_size)
         self.assertEqual(0, stats.signatures.objects)
-        self.assertEqual(1, stats.signatures.raw_size)
-        self.assertEqual(1, stats.signatures.compressed_size)
+        self.assertEqual(0, stats.signatures.raw_size)
+        self.assertEqual(0, stats.signatures.compressed_size)
+        self.assertEqual(3, stats.total.objects)
+        self.assertEqual(461, stats.total.raw_size)
+        self.assertEqual(539, stats.total.compressed_size)
 
     def test_gather_stats_chk(self):
         tree = self.make_branch_and_tree('.', format="development3")
         self.build_tree(["foo"])
-        tree.add(["foo"])
-        tree.commit("first post")
+        tree.add(["foo"], ["foo-id"])
+        tree.commit("first post", rev_id="foo", committer="foo at bar", timestamp=100000)
         stats = repodetails.gather_stats(tree.branch.repository)
         self.assertEqual(1, stats.revision_count)
         self.assertEqual(1, stats.revisions.objects)
-        self.assertEqual(1, stats.revisions.raw_size)
-        self.assertEqual(1, stats.revisions.compressed_size)
-        self.assertEqual(1, stats.inventories.objects)
-        self.assertEqual(1, stats.inventories.raw_size)
-        self.assertEqual(1, stats.inventories.compressed_size)
+        self.assertEqual(271, stats.revisions.raw_size)
+        self.assertEqual(250, stats.revisions.compressed_size)
+        # inv, root, tree-root-node, foo-id-node.
+        self.assertEqual(4, stats.inventories.objects)
+        self.assertEqual(343, stats.inventories.raw_size)
+        self.assertEqual(615, stats.inventories.compressed_size)
         self.assertEqual(1, stats.texts.objects)
-        self.assertEqual(1, stats.texts.raw_size)
-        self.assertEqual(1, stats.texts.compressed_size)
-        self.assertEqual(0, stats.signatures.objects)
-        self.assertEqual(1, stats.signatures.raw_size)
-        self.assertEqual(1, stats.signatures.compressed_size)
+        self.assertEqual(16, stats.texts.raw_size)
+        self.assertEqual(94, stats.texts.compressed_size)
+        self.assertEqual(0, stats.signatures.objects)
+        self.assertEqual(0, stats.signatures.raw_size)
+        self.assertEqual(0, stats.signatures.compressed_size)
+        self.assertEqual(6, stats.total.objects)
+        self.assertEqual(630, stats.total.raw_size)
+        self.assertEqual(959, stats.total.compressed_size)
+
+    def test_gather_stats_empty(self):
+        tree = self.make_branch_and_tree('.')
+        stats = repodetails.gather_stats(tree.branch.repository)
+        self.assertEqual(0, stats.revision_count)
+        self.assertEqual(0, stats.revisions.objects)
+        self.assertEqual(0, stats.revisions.raw_size)
+        self.assertEqual(0, stats.revisions.compressed_size)
+        self.assertEqual(0, stats.inventories.objects)
+        self.assertEqual(0, stats.inventories.raw_size)
+        self.assertEqual(0, stats.inventories.compressed_size)
+        self.assertEqual(0, stats.texts.objects)
+        self.assertEqual(0, stats.texts.raw_size)
+        self.assertEqual(0, stats.texts.compressed_size)
+        self.assertEqual(0, stats.signatures.objects)
+        self.assertEqual(0, stats.signatures.raw_size)
+        self.assertEqual(0, stats.signatures.compressed_size)




More information about the bazaar-commits mailing list