Rev 2260: New version of gather_stats which gathers aggregate data too. in http://bazaar.launchpad.net/~bzr/bzr/bzr.dev.hpss.api.changes

Robert Collins robertc at robertcollins.net
Sat Feb 3 10:56:06 GMT 2007


------------------------------------------------------------
revno: 2260
revision-id: robertc at robertcollins.net-20070203105604-z5stj4qy1r038m7x
parent: robertc at robertcollins.net-20070203100211-bf9qwy73gh40nszi
committer: Robert Collins <robertc at robertcollins.net>
branch nick: hpss.api.changes
timestamp: Sat 2007-02-03 21:56:04 +1100
message:
  New version of gather_stats which gathers aggregate data too.
modified:
  bzrlib/info.py                 info.py-20050323235939-6bbfe7d9700b0b9b
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_implementations/test_statistics.py test_statistics.py-20070203082432-6738e8fl0mm7ikre-1
=== modified file 'bzrlib/info.py'
--- a/bzrlib/info.py	2007-02-03 10:02:11 +0000
+++ b/bzrlib/info.py	2007-02-03 10:56:04 +0000
@@ -242,6 +242,7 @@
         timestamp, timezone = stats['latestrev']
         print '  latest revision: %s' % osutils.format_date(timestamp,
             timezone)
+    return stats
 
 
 def _show_repository_info(repository):
@@ -251,14 +252,16 @@
         print 'Create working tree for new branches inside the repository.'
 
 
-def _show_repository_stats(repository):
+def _show_repository_stats(stats):
     """Show statistics about a repository."""
-    if repository.bzrdir.root_transport.listable():
+    if 'revisions' in stats or 'size' in stats:
         print
         print 'Revision store:'
-        c, t = repository._revision_store.total_size(repository.get_transaction())
-        print '  %8d revision%s' % (c, plural(c))
-        print '  %8d KiB' % (t/1024)
+    if 'revisions' in stats:
+        revisions = stats['revisions']
+        print '  %8d revision%s' % (revisions, plural(revisions))
+    if 'size' in stats:
+        print '  %8d KiB' % (stats['size']/1024)
 
 
 @deprecated_function(zero_eight)
@@ -319,8 +322,8 @@
     _show_missing_revisions_branch(branch)
     _show_missing_revisions_working(working)
     _show_working_stats(working)
-    _show_branch_stats(branch, verbose)
-    _show_repository_stats(repository)
+    stats = _show_branch_stats(branch, verbose)
+    _show_repository_stats(stats)
 
 
 def show_branch_info(branch, verbose):
@@ -333,8 +336,8 @@
     _show_format_info(control, repository, branch)
     _show_locking_info(repository, branch)
     _show_missing_revisions_branch(branch)
-    _show_branch_stats(branch, verbose)
-    _show_repository_stats(repository)
+    stats = _show_branch_stats(branch, verbose)
+    _show_repository_stats(stats)
 
 
 def show_repository_info(repository, verbose):
@@ -345,4 +348,5 @@
     _show_format_info(control, repository)
     _show_locking_info(repository)
     _show_repository_info(repository)
-    _show_repository_stats(repository)
+    stats = repository.gather_stats()
+    _show_repository_stats(stats)

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-02-03 10:02:11 +0000
+++ b/bzrlib/repository.py	2007-02-03 10:56:04 +0000
@@ -243,42 +243,53 @@
         return self.control_files.get_physical_lock_status()
 
     @needs_read_lock
-    def gather_stats(self, revid, committers=None):
+    def gather_stats(self, revid=None, committers=None):
         """Gather statistics from a revision id.
 
-        :param revid: The revision id to gather statistics from.
+        :param revid: The revision id to gather statistics from, if None, then
+            no revision specific statistics are gathered.
         :param committers: Optional parameter controlling whether to grab
-            a count of committers.
+            a count of committers from the revision specific statistics.
         :return: A dictionary of statistics. Currently this contains:
             committers: The number of committers if requested.
             firstrev: A tuple with timestamp, timezone for the penultimate left
                 most ancestor of revid, if revid is not the NULL_REVISION.
             latestrev: A tuple with timestamp, timezone for revid, if revid is
                 not the NULL_REVISION.
+            revisions: The total revision count in the repository.
+            size: An estimate disk size of the repository in bytes.
         """
         result = {}
-        if committers:
+        if revid and committers:
             result['committers'] = 0
-        if revid == _mod_revision.NULL_REVISION:
-            return result
-        all_committers = set()
-        revisions = self.get_ancestry(revid)
-        # pop the leading None
-        revisions.pop(0)
-        first_revision = None
-        if not committers:
-            # ignore the revisions in the middle - just grab first and last
-            revisions = revisions[0], revisions[-1]
-        for revision in self.get_revisions(revisions):
-            if not first_revision:
-                first_revision = revision
-            if committers:
-                all_committers.add(revision.committer)
-        last_revision = revision
-        if committers:
-            result['committers'] = len(all_committers)
-        result['firstrev'] = first_revision.timestamp, first_revision.timezone
-        result['latestrev'] = last_revision.timestamp, last_revision.timezone
+        if revid and revid != _mod_revision.NULL_REVISION:
+            if committers:
+                all_committers = set()
+            revisions = self.get_ancestry(revid)
+            # pop the leading None
+            revisions.pop(0)
+            first_revision = None
+            if not committers:
+                # ignore the revisions in the middle - just grab first and last
+                revisions = revisions[0], revisions[-1]
+            for revision in self.get_revisions(revisions):
+                if not first_revision:
+                    first_revision = revision
+                if committers:
+                    all_committers.add(revision.committer)
+            last_revision = revision
+            if committers:
+                result['committers'] = len(all_committers)
+            result['firstrev'] = (first_revision.timestamp,
+                first_revision.timezone)
+            result['latestrev'] = (last_revision.timestamp,
+                last_revision.timezone)
+
+        # now gather global repository information
+        if self.bzrdir.root_transport.listable():
+            c, t = self._revision_store.total_size(self.get_transaction())
+            result['revisions'] = c
+            result['size'] = t
         return result
 
     @needs_read_lock

=== modified file 'bzrlib/tests/repository_implementations/test_statistics.py'
--- a/bzrlib/tests/repository_implementations/test_statistics.py	2007-02-03 10:02:11 +0000
+++ b/bzrlib/tests/repository_implementations/test_statistics.py	2007-02-03 10:56:04 +0000
@@ -21,6 +21,16 @@
 
 class TestGatherStats(TestCaseWithRepository):
 
+    def check_stats_has_size(self, stats):
+        """Check that stats has a reasonable size entry."""
+        # actual disk size varies from implementation to implementation,
+        # but they should all provide it on their native transport.
+        self.assertTrue('size' in stats)
+        # and it should be a number
+        self.assertIsInstance(stats['size'], (int, long))
+        # and now remove it to make other assertions work without variation.
+        del stats['size']
+
     def test_gather_stats(self):
         """First smoke test covering the refactoring into the Repository api."""
         tree = self.make_branch_and_memory_tree('.')
@@ -36,15 +46,48 @@
             timestamp=1172491381, timezone=0)
         tree.unlock()
         # now, in the same repository, asking for stats with/without the 
-        # committers flag generates the same date information
+        # committers flag generates the same date information.
         stats = tree.branch.repository.gather_stats(rev2, committers=False)
+        self.check_stats_has_size(stats)
         self.assertEqual({
             'firstrev': (1170491381.0, 0),
-            'latestrev': (1171491381.0, 0)},
+            'latestrev': (1171491381.0, 0),
+            'revisions': 3,
+            },
             stats)
         stats = tree.branch.repository.gather_stats(rev2, committers=True)
+        self.check_stats_has_size(stats)
         self.assertEqual({
             'committers': 2,
             'firstrev': (1170491381.0, 0),
-            'latestrev': (1171491381.0, 0)},
+            'latestrev': (1171491381.0, 0),
+            'revisions': 3,
+            },
+            stats)
+
+    def test_gather_stats_norevid_gets_size(self):
+        """Without a revid, repository size is still gathered."""
+        tree = self.make_branch_and_memory_tree('.')
+        tree.lock_write()
+        tree.add('')
+        # put something in the repository, because zero-size is borink.
+        rev1 = tree.commit('first post')
+        tree.unlock()
+        # now ask for global repository stats.
+        stats = tree.branch.repository.gather_stats()
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'revisions': 1
+            },
+            stats)
+
+    def test_gather_stats_empty_repo(self):
+        """An empty repository still has size and revisions."""
+        tree = self.make_branch_and_memory_tree('.')
+        # now ask for global repository stats.
+        stats = tree.branch.repository.gather_stats()
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'revisions': 0
+            },
             stats)



More information about the bazaar-commits mailing list