Rev 2261: (robertc) Move info stats gathering into Repository.gather_stats allowing server optimisation. in http://people.ubuntu.com/~robertc/baz2.0/integration

Robert Collins robertc at robertcollins.net
Sun Feb 4 12:35:53 GMT 2007


------------------------------------------------------------
revno: 2261
revision-id: robertc at robertcollins.net-20070204123551-yw876d2dmdhyb3nw
parent: pqm at pqm.ubuntu.com-20070203195358-c2d9c5b94445119b
parent: robertc at robertcollins.net-20070203105604-z5stj4qy1r038m7x
committer: Robert Collins <robertc at robertcollins.net>
branch nick: integration
timestamp: Sun 2007-02-04 23:35:51 +1100
message:
  (robertc) Move info stats gathering into Repository.gather_stats allowing server optimisation.
added:
  bzrlib/tests/repository_implementations/test_statistics.py test_statistics.py-20070203082432-6738e8fl0mm7ikre-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/branch.py               branch.py-20050309040759-e4baf4e0d046576e
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/info.py                 info.py-20050323235939-6bbfe7d9700b0b9b
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_implementations/__init__.py __init__.py-20060131092037-9564957a7d4a841b
    ------------------------------------------------------------
    revno: 2258.1.2
    merged: robertc at robertcollins.net-20070203105604-z5stj4qy1r038m7x
    parent: robertc at robertcollins.net-20070203100211-bf9qwy73gh40nszi
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: hpss.api.changes
    timestamp: Sat 2007-02-03 21:56:04 +1100
    message:
      New version of gather_stats which gathers aggregate data too.
    ------------------------------------------------------------
    revno: 2258.1.1
    merged: robertc at robertcollins.net-20070203100211-bf9qwy73gh40nszi
    parent: pqm at pqm.ubuntu.com-20070202204950-910381483d737306
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: hpss.api.changes
    timestamp: Sat 2007-02-03 21:02:11 +1100
    message:
      Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
=== added file 'bzrlib/tests/repository_implementations/test_statistics.py'
--- a/bzrlib/tests/repository_implementations/test_statistics.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/repository_implementations/test_statistics.py	2007-02-03 10:56:04 +0000
@@ -0,0 +1,93 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for repository statistic-gathering apis."""
+
+from bzrlib.tests.repository_implementations.test_repository import TestCaseWithRepository
+
+
+class TestGatherStats(TestCaseWithRepository):
+
+    def check_stats_has_size(self, stats):
+        """Check that stats has a reasonable size entry."""
+        # actual disk size varies from implementation to implementation,
+        # but they should all provide it on their native transport.
+        self.assertTrue('size' in stats)
+        # and it should be a number
+        self.assertIsInstance(stats['size'], (int, long))
+        # and now remove it to make other assertions work without variation.
+        del stats['size']
+
+    def test_gather_stats(self):
+        """First smoke test covering the refactoring into the Repository api."""
+        tree = self.make_branch_and_memory_tree('.')
+        tree.lock_write()
+        tree.add('')
+        # three commits: one to be included by reference, one to be 
+        # requested, and one to be in the repository but [mostly] ignored.
+        rev1 = tree.commit('first post', committer='person 1',
+            timestamp=1170491381, timezone=0)
+        rev2 = tree.commit('second post', committer='person 2',
+            timestamp=1171491381, timezone=0)
+        rev3 = tree.commit('third post', committer='person 3',
+            timestamp=1172491381, timezone=0)
+        tree.unlock()
+        # now, in the same repository, asking for stats with/without the 
+        # committers flag generates the same date information.
+        stats = tree.branch.repository.gather_stats(rev2, committers=False)
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'firstrev': (1170491381.0, 0),
+            'latestrev': (1171491381.0, 0),
+            'revisions': 3,
+            },
+            stats)
+        stats = tree.branch.repository.gather_stats(rev2, committers=True)
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'committers': 2,
+            'firstrev': (1170491381.0, 0),
+            'latestrev': (1171491381.0, 0),
+            'revisions': 3,
+            },
+            stats)
+
+    def test_gather_stats_norevid_gets_size(self):
+        """Without a revid, repository size is still gathered."""
+        tree = self.make_branch_and_memory_tree('.')
+        tree.lock_write()
+        tree.add('')
+        # put something in the repository, because zero-size is borink.
+        rev1 = tree.commit('first post')
+        tree.unlock()
+        # now ask for global repository stats.
+        stats = tree.branch.repository.gather_stats()
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'revisions': 1
+            },
+            stats)
+
+    def test_gather_stats_empty_repo(self):
+        """An empty repository still has size and revisions."""
+        tree = self.make_branch_and_memory_tree('.')
+        # now ask for global repository stats.
+        stats = tree.branch.repository.gather_stats()
+        self.check_stats_has_size(stats)
+        self.assertEqual({
+            'revisions': 0
+            },
+            stats)

=== modified file 'NEWS'
--- a/NEWS	2007-02-03 16:20:12 +0000
+++ b/NEWS	2007-02-04 12:35:51 +0000
@@ -47,6 +47,10 @@
       other for code reuse without requiring users to perform file-renaming
       gymnastics. (Robert Collins)
 
+    * New Repository method 'gather_stats' for statistic data collection.
+      This is expected to grow to cover a number of related uses mainly
+      related to bzr info. (Robert Collins)
+
   BUGFIXES:
 
     * ``bzr annotate`` now uses dotted revnos from the viewpoint of the

=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py	2007-02-02 09:17:23 +0000
+++ b/bzrlib/branch.py	2007-02-03 10:02:11 +0000
@@ -696,7 +696,7 @@
 
     def get_format_description(self):
         """Return the short format description for this format."""
-        raise NotImplementedError(self.get_format_string)
+        raise NotImplementedError(self.get_format_description)
 
     def initialize(self, a_bzrdir):
         """Create a branch of this format in a_bzrdir."""

=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2007-01-30 11:52:30 +0000
+++ b/bzrlib/errors.py	2007-02-03 10:02:11 +0000
@@ -1677,6 +1677,7 @@
 class NoSmartMedium(BzrError):
 
     _fmt = "The transport '%(transport)s' cannot tunnel the smart protocol."
+    internal_error = True
 
     def __init__(self, transport):
         self.transport = transport

=== modified file 'bzrlib/info.py'
--- a/bzrlib/info.py	2007-02-01 14:47:47 +0000
+++ b/bzrlib/info.py	2007-02-03 10:56:04 +0000
@@ -27,7 +27,7 @@
 from bzrlib.errors import (NoWorkingTree, NotBranchError,
                            NoRepositoryPresent, NotLocalUrl)
 from bzrlib.missing import find_unmerged
-from bzrlib.symbol_versioning import (deprecated_function, 
+from bzrlib.symbol_versioning import (deprecated_function,
         zero_eight)
 
 
@@ -225,40 +225,24 @@
 
 def _show_branch_stats(branch, verbose):
     """Show statistics about a branch."""
-    repository = branch.repository
-    history = branch.revision_history()
-
+    revno, head = branch.last_revision_info()
     print
     print 'Branch history:'
-    revno = len(history)
     print '  %8d revision%s' % (revno, plural(revno))
+    stats = branch.repository.gather_stats(head, committers=verbose)
     if verbose:
-        committers = {}
-        for rev in history:
-            committers[repository.get_revision(rev).committer] = True
-        print '  %8d committer%s' % (len(committers), plural(len(committers)))
-    if revno > 0:
-        firstrev = repository.get_revision(history[0])
-        age = int((time.time() - firstrev.timestamp) / 3600 / 24)
+        committers = stats['committers']
+        print '  %8d committer%s' % (committers, plural(committers))
+    if revno:
+        timestamp, timezone = stats['firstrev']
+        age = int((time.time() - timestamp) / 3600 / 24)
         print '  %8d day%s old' % (age, plural(age))
-        print '   first revision: %s' % osutils.format_date(firstrev.timestamp,
-                                                            firstrev.timezone)
-
-        lastrev = repository.get_revision(history[-1])
-        print '  latest revision: %s' % osutils.format_date(lastrev.timestamp,
-                                                            lastrev.timezone)
-
-#     print
-#     print 'Text store:'
-#     c, t = branch.text_store.total_size()
-#     print '  %8d file texts' % c
-#     print '  %8d KiB' % (t/1024)
-
-#     print
-#     print 'Inventory store:'
-#     c, t = branch.inventory_store.total_size()
-#     print '  %8d inventories' % c
-#     print '  %8d KiB' % (t/1024)
+        print '   first revision: %s' % osutils.format_date(timestamp,
+            timezone)
+        timestamp, timezone = stats['latestrev']
+        print '  latest revision: %s' % osutils.format_date(timestamp,
+            timezone)
+    return stats
 
 
 def _show_repository_info(repository):
@@ -268,14 +252,16 @@
         print 'Create working tree for new branches inside the repository.'
 
 
-def _show_repository_stats(repository):
+def _show_repository_stats(stats):
     """Show statistics about a repository."""
-    if repository.bzrdir.root_transport.listable():
+    if 'revisions' in stats or 'size' in stats:
         print
         print 'Revision store:'
-        c, t = repository._revision_store.total_size(repository.get_transaction())
-        print '  %8d revision%s' % (c, plural(c))
-        print '  %8d KiB' % (t/1024)
+    if 'revisions' in stats:
+        revisions = stats['revisions']
+        print '  %8d revision%s' % (revisions, plural(revisions))
+    if 'size' in stats:
+        print '  %8d KiB' % (stats['size']/1024)
 
 
 @deprecated_function(zero_eight)
@@ -336,8 +322,8 @@
     _show_missing_revisions_branch(branch)
     _show_missing_revisions_working(working)
     _show_working_stats(working)
-    _show_branch_stats(branch, verbose)
-    _show_repository_stats(repository)
+    stats = _show_branch_stats(branch, verbose)
+    _show_repository_stats(stats)
 
 
 def show_branch_info(branch, verbose):
@@ -350,8 +336,8 @@
     _show_format_info(control, repository, branch)
     _show_locking_info(repository, branch)
     _show_missing_revisions_branch(branch)
-    _show_branch_stats(branch, verbose)
-    _show_repository_stats(repository)
+    stats = _show_branch_stats(branch, verbose)
+    _show_repository_stats(stats)
 
 
 def show_repository_info(repository, verbose):
@@ -362,4 +348,5 @@
     _show_format_info(control, repository)
     _show_locking_info(repository)
     _show_repository_info(repository)
-    _show_repository_stats(repository)
+    stats = repository.gather_stats()
+    _show_repository_stats(stats)

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-01-26 21:56:16 +0000
+++ b/bzrlib/repository.py	2007-02-03 10:56:04 +0000
@@ -243,6 +243,56 @@
         return self.control_files.get_physical_lock_status()
 
     @needs_read_lock
+    def gather_stats(self, revid=None, committers=None):
+        """Gather statistics from a revision id.
+
+        :param revid: The revision id to gather statistics from, if None, then
+            no revision specific statistics are gathered.
+        :param committers: Optional parameter controlling whether to grab
+            a count of committers from the revision specific statistics.
+        :return: A dictionary of statistics. Currently this contains:
+            committers: The number of committers if requested.
+            firstrev: A tuple with timestamp, timezone for the penultimate left
+                most ancestor of revid, if revid is not the NULL_REVISION.
+            latestrev: A tuple with timestamp, timezone for revid, if revid is
+                not the NULL_REVISION.
+            revisions: The total revision count in the repository.
+            size: An estimate disk size of the repository in bytes.
+        """
+        result = {}
+        if revid and committers:
+            result['committers'] = 0
+        if revid and revid != _mod_revision.NULL_REVISION:
+            if committers:
+                all_committers = set()
+            revisions = self.get_ancestry(revid)
+            # pop the leading None
+            revisions.pop(0)
+            first_revision = None
+            if not committers:
+                # ignore the revisions in the middle - just grab first and last
+                revisions = revisions[0], revisions[-1]
+            for revision in self.get_revisions(revisions):
+                if not first_revision:
+                    first_revision = revision
+                if committers:
+                    all_committers.add(revision.committer)
+            last_revision = revision
+            if committers:
+                result['committers'] = len(all_committers)
+            result['firstrev'] = (first_revision.timestamp,
+                first_revision.timezone)
+            result['latestrev'] = (last_revision.timestamp,
+                last_revision.timezone)
+
+        # now gather global repository information
+        if self.bzrdir.root_transport.listable():
+            c, t = self._revision_store.total_size(self.get_transaction())
+            result['revisions'] = c
+            result['size'] = t
+        return result
+
+    @needs_read_lock
     def missing_revision_ids(self, other, revision_id=None):
         """Return the revision ids that other has that this does not.
         

=== modified file 'bzrlib/tests/repository_implementations/__init__.py'
--- a/bzrlib/tests/repository_implementations/__init__.py	2007-01-25 00:35:22 +0000
+++ b/bzrlib/tests/repository_implementations/__init__.py	2007-02-03 10:02:11 +0000
@@ -48,6 +48,7 @@
         'bzrlib.tests.repository_implementations.test_reconcile',
         'bzrlib.tests.repository_implementations.test_repository',
         'bzrlib.tests.repository_implementations.test_revision',
+        'bzrlib.tests.repository_implementations.test_statistics',
         ]
     all_formats = [v for (k, v) in repository.format_registry.iteritems()]
     all_formats.extend(_legacy_formats)



More information about the bazaar-commits mailing list