Rev 2261: (robertc) Move info stats gathering into Repository.gather_stats allowing server optimisation. in http://people.ubuntu.com/~robertc/baz2.0/integration
Robert Collins
robertc at robertcollins.net
Sun Feb 4 12:35:53 GMT 2007
------------------------------------------------------------
revno: 2261
revision-id: robertc at robertcollins.net-20070204123551-yw876d2dmdhyb3nw
parent: pqm at pqm.ubuntu.com-20070203195358-c2d9c5b94445119b
parent: robertc at robertcollins.net-20070203105604-z5stj4qy1r038m7x
committer: Robert Collins <robertc at robertcollins.net>
branch nick: integration
timestamp: Sun 2007-02-04 23:35:51 +1100
message:
(robertc) Move info stats gathering into Repository.gather_stats allowing server optimisation.
added:
bzrlib/tests/repository_implementations/test_statistics.py test_statistics.py-20070203082432-6738e8fl0mm7ikre-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/info.py info.py-20050323235939-6bbfe7d9700b0b9b
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/repository_implementations/__init__.py __init__.py-20060131092037-9564957a7d4a841b
------------------------------------------------------------
revno: 2258.1.2
merged: robertc at robertcollins.net-20070203105604-z5stj4qy1r038m7x
parent: robertc at robertcollins.net-20070203100211-bf9qwy73gh40nszi
committer: Robert Collins <robertc at robertcollins.net>
branch nick: hpss.api.changes
timestamp: Sat 2007-02-03 21:56:04 +1100
message:
New version of gather_stats which gathers aggregate data too.
------------------------------------------------------------
revno: 2258.1.1
merged: robertc at robertcollins.net-20070203100211-bf9qwy73gh40nszi
parent: pqm at pqm.ubuntu.com-20070202204950-910381483d737306
committer: Robert Collins <robertc at robertcollins.net>
branch nick: hpss.api.changes
timestamp: Sat 2007-02-03 21:02:11 +1100
message:
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
=== added file 'bzrlib/tests/repository_implementations/test_statistics.py'
--- a/bzrlib/tests/repository_implementations/test_statistics.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/repository_implementations/test_statistics.py 2007-02-03 10:56:04 +0000
@@ -0,0 +1,93 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Tests for repository statistic-gathering apis."""
+
+from bzrlib.tests.repository_implementations.test_repository import TestCaseWithRepository
+
+
+class TestGatherStats(TestCaseWithRepository):
+
+ def check_stats_has_size(self, stats):
+ """Check that stats has a reasonable size entry."""
+ # actual disk size varies from implementation to implementation,
+ # but they should all provide it on their native transport.
+ self.assertTrue('size' in stats)
+ # and it should be a number
+ self.assertIsInstance(stats['size'], (int, long))
+ # and now remove it to make other assertions work without variation.
+ del stats['size']
+
+ def test_gather_stats(self):
+ """First smoke test covering the refactoring into the Repository api."""
+ tree = self.make_branch_and_memory_tree('.')
+ tree.lock_write()
+ tree.add('')
+ # three commits: one to be included by reference, one to be
+ # requested, and one to be in the repository but [mostly] ignored.
+ rev1 = tree.commit('first post', committer='person 1',
+ timestamp=1170491381, timezone=0)
+ rev2 = tree.commit('second post', committer='person 2',
+ timestamp=1171491381, timezone=0)
+ rev3 = tree.commit('third post', committer='person 3',
+ timestamp=1172491381, timezone=0)
+ tree.unlock()
+ # now, in the same repository, asking for stats with/without the
+ # committers flag generates the same date information.
+ stats = tree.branch.repository.gather_stats(rev2, committers=False)
+ self.check_stats_has_size(stats)
+ self.assertEqual({
+ 'firstrev': (1170491381.0, 0),
+ 'latestrev': (1171491381.0, 0),
+ 'revisions': 3,
+ },
+ stats)
+ stats = tree.branch.repository.gather_stats(rev2, committers=True)
+ self.check_stats_has_size(stats)
+ self.assertEqual({
+ 'committers': 2,
+ 'firstrev': (1170491381.0, 0),
+ 'latestrev': (1171491381.0, 0),
+ 'revisions': 3,
+ },
+ stats)
+
+ def test_gather_stats_norevid_gets_size(self):
+ """Without a revid, repository size is still gathered."""
+ tree = self.make_branch_and_memory_tree('.')
+ tree.lock_write()
+ tree.add('')
+ # put something in the repository, because zero-size is borink.
+ rev1 = tree.commit('first post')
+ tree.unlock()
+ # now ask for global repository stats.
+ stats = tree.branch.repository.gather_stats()
+ self.check_stats_has_size(stats)
+ self.assertEqual({
+ 'revisions': 1
+ },
+ stats)
+
+ def test_gather_stats_empty_repo(self):
+ """An empty repository still has size and revisions."""
+ tree = self.make_branch_and_memory_tree('.')
+ # now ask for global repository stats.
+ stats = tree.branch.repository.gather_stats()
+ self.check_stats_has_size(stats)
+ self.assertEqual({
+ 'revisions': 0
+ },
+ stats)
=== modified file 'NEWS'
--- a/NEWS 2007-02-03 16:20:12 +0000
+++ b/NEWS 2007-02-04 12:35:51 +0000
@@ -47,6 +47,10 @@
other for code reuse without requiring users to perform file-renaming
gymnastics. (Robert Collins)
+ * New Repository method 'gather_stats' for statistic data collection.
+ This is expected to grow to cover a number of related uses mainly
+ related to bzr info. (Robert Collins)
+
BUGFIXES:
* ``bzr annotate`` now uses dotted revnos from the viewpoint of the
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py 2007-02-02 09:17:23 +0000
+++ b/bzrlib/branch.py 2007-02-03 10:02:11 +0000
@@ -696,7 +696,7 @@
def get_format_description(self):
"""Return the short format description for this format."""
- raise NotImplementedError(self.get_format_string)
+ raise NotImplementedError(self.get_format_description)
def initialize(self, a_bzrdir):
"""Create a branch of this format in a_bzrdir."""
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2007-01-30 11:52:30 +0000
+++ b/bzrlib/errors.py 2007-02-03 10:02:11 +0000
@@ -1677,6 +1677,7 @@
class NoSmartMedium(BzrError):
_fmt = "The transport '%(transport)s' cannot tunnel the smart protocol."
+ internal_error = True
def __init__(self, transport):
self.transport = transport
=== modified file 'bzrlib/info.py'
--- a/bzrlib/info.py 2007-02-01 14:47:47 +0000
+++ b/bzrlib/info.py 2007-02-03 10:56:04 +0000
@@ -27,7 +27,7 @@
from bzrlib.errors import (NoWorkingTree, NotBranchError,
NoRepositoryPresent, NotLocalUrl)
from bzrlib.missing import find_unmerged
-from bzrlib.symbol_versioning import (deprecated_function,
+from bzrlib.symbol_versioning import (deprecated_function,
zero_eight)
@@ -225,40 +225,24 @@
def _show_branch_stats(branch, verbose):
"""Show statistics about a branch."""
- repository = branch.repository
- history = branch.revision_history()
-
+ revno, head = branch.last_revision_info()
print
print 'Branch history:'
- revno = len(history)
print ' %8d revision%s' % (revno, plural(revno))
+ stats = branch.repository.gather_stats(head, committers=verbose)
if verbose:
- committers = {}
- for rev in history:
- committers[repository.get_revision(rev).committer] = True
- print ' %8d committer%s' % (len(committers), plural(len(committers)))
- if revno > 0:
- firstrev = repository.get_revision(history[0])
- age = int((time.time() - firstrev.timestamp) / 3600 / 24)
+ committers = stats['committers']
+ print ' %8d committer%s' % (committers, plural(committers))
+ if revno:
+ timestamp, timezone = stats['firstrev']
+ age = int((time.time() - timestamp) / 3600 / 24)
print ' %8d day%s old' % (age, plural(age))
- print ' first revision: %s' % osutils.format_date(firstrev.timestamp,
- firstrev.timezone)
-
- lastrev = repository.get_revision(history[-1])
- print ' latest revision: %s' % osutils.format_date(lastrev.timestamp,
- lastrev.timezone)
-
-# print
-# print 'Text store:'
-# c, t = branch.text_store.total_size()
-# print ' %8d file texts' % c
-# print ' %8d KiB' % (t/1024)
-
-# print
-# print 'Inventory store:'
-# c, t = branch.inventory_store.total_size()
-# print ' %8d inventories' % c
-# print ' %8d KiB' % (t/1024)
+ print ' first revision: %s' % osutils.format_date(timestamp,
+ timezone)
+ timestamp, timezone = stats['latestrev']
+ print ' latest revision: %s' % osutils.format_date(timestamp,
+ timezone)
+ return stats
def _show_repository_info(repository):
@@ -268,14 +252,16 @@
print 'Create working tree for new branches inside the repository.'
-def _show_repository_stats(repository):
+def _show_repository_stats(stats):
"""Show statistics about a repository."""
- if repository.bzrdir.root_transport.listable():
+ if 'revisions' in stats or 'size' in stats:
print
print 'Revision store:'
- c, t = repository._revision_store.total_size(repository.get_transaction())
- print ' %8d revision%s' % (c, plural(c))
- print ' %8d KiB' % (t/1024)
+ if 'revisions' in stats:
+ revisions = stats['revisions']
+ print ' %8d revision%s' % (revisions, plural(revisions))
+ if 'size' in stats:
+ print ' %8d KiB' % (stats['size']/1024)
@deprecated_function(zero_eight)
@@ -336,8 +322,8 @@
_show_missing_revisions_branch(branch)
_show_missing_revisions_working(working)
_show_working_stats(working)
- _show_branch_stats(branch, verbose)
- _show_repository_stats(repository)
+ stats = _show_branch_stats(branch, verbose)
+ _show_repository_stats(stats)
def show_branch_info(branch, verbose):
@@ -350,8 +336,8 @@
_show_format_info(control, repository, branch)
_show_locking_info(repository, branch)
_show_missing_revisions_branch(branch)
- _show_branch_stats(branch, verbose)
- _show_repository_stats(repository)
+ stats = _show_branch_stats(branch, verbose)
+ _show_repository_stats(stats)
def show_repository_info(repository, verbose):
@@ -362,4 +348,5 @@
_show_format_info(control, repository)
_show_locking_info(repository)
_show_repository_info(repository)
- _show_repository_stats(repository)
+ stats = repository.gather_stats()
+ _show_repository_stats(stats)
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-01-26 21:56:16 +0000
+++ b/bzrlib/repository.py 2007-02-03 10:56:04 +0000
@@ -243,6 +243,56 @@
return self.control_files.get_physical_lock_status()
@needs_read_lock
+ def gather_stats(self, revid=None, committers=None):
+ """Gather statistics from a revision id.
+
+ :param revid: The revision id to gather statistics from, if None, then
+ no revision specific statistics are gathered.
+ :param committers: Optional parameter controlling whether to grab
+ a count of committers from the revision specific statistics.
+ :return: A dictionary of statistics. Currently this contains:
+ committers: The number of committers if requested.
+ firstrev: A tuple with timestamp, timezone for the penultimate left
+ most ancestor of revid, if revid is not the NULL_REVISION.
+ latestrev: A tuple with timestamp, timezone for revid, if revid is
+ not the NULL_REVISION.
+ revisions: The total revision count in the repository.
+ size: An estimate disk size of the repository in bytes.
+ """
+ result = {}
+ if revid and committers:
+ result['committers'] = 0
+ if revid and revid != _mod_revision.NULL_REVISION:
+ if committers:
+ all_committers = set()
+ revisions = self.get_ancestry(revid)
+ # pop the leading None
+ revisions.pop(0)
+ first_revision = None
+ if not committers:
+ # ignore the revisions in the middle - just grab first and last
+ revisions = revisions[0], revisions[-1]
+ for revision in self.get_revisions(revisions):
+ if not first_revision:
+ first_revision = revision
+ if committers:
+ all_committers.add(revision.committer)
+ last_revision = revision
+ if committers:
+ result['committers'] = len(all_committers)
+ result['firstrev'] = (first_revision.timestamp,
+ first_revision.timezone)
+ result['latestrev'] = (last_revision.timestamp,
+ last_revision.timezone)
+
+ # now gather global repository information
+ if self.bzrdir.root_transport.listable():
+ c, t = self._revision_store.total_size(self.get_transaction())
+ result['revisions'] = c
+ result['size'] = t
+ return result
+
+ @needs_read_lock
def missing_revision_ids(self, other, revision_id=None):
"""Return the revision ids that other has that this does not.
=== modified file 'bzrlib/tests/repository_implementations/__init__.py'
--- a/bzrlib/tests/repository_implementations/__init__.py 2007-01-25 00:35:22 +0000
+++ b/bzrlib/tests/repository_implementations/__init__.py 2007-02-03 10:02:11 +0000
@@ -48,6 +48,7 @@
'bzrlib.tests.repository_implementations.test_reconcile',
'bzrlib.tests.repository_implementations.test_repository',
'bzrlib.tests.repository_implementations.test_revision',
+ 'bzrlib.tests.repository_implementations.test_statistics',
]
all_formats = [v for (k, v) in repository.format_registry.iteritems()]
all_formats.extend(_legacy_formats)
More information about the bazaar-commits
mailing list