Rev 4147: Add a refresh_data method on Repository allowing cleaner handling of insertions into RemoteRepository objects with _real_repository instances. in http://people.ubuntu.com/~robertc/baz2.0/pending/repository.refresh_data

Mon Mar 16 05:05:57 GMT 2009

At http://people.ubuntu.com/~robertc/baz2.0/pending/repository.refresh_data

------------------------------------------------------------
revno: 4147
revision-id: robertc at robertcollins.net-20090316050552-hqcgx49ugew0facc
parent: robertc at robertcollins.net-20090316033405-wpq3do6shludp1bh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository.refresh_data
timestamp: Mon 2009-03-16 16:05:52 +1100
message:
  Add a refresh_data method on Repository allowing cleaner handling of insertions into RemoteRepository objects with _real_repository instances.
=== modified file 'NEWS'

--- a/NEWS	2009-03-16 03:34:05 +0000
+++ b/NEWS	2009-03-16 05:05:52 +0000
@@ -77,6 +77,10 @@
     * New ``assertLength`` method based on one Martin has squirreled away
       somewhere. (Robert Collins, Martin Pool)
 
+    * New repository method ``refresh_data`` to cause any repository to
+      make visible data inserted into the repository by a smart server
+      fetch operation. (Robert Collins, Andrew Bennetts)
+
 
 bzr 1.13rc1 "paraskavedekatriaphobia" 2009-03-10
 ------------------------------------------------

=== modified file 'bzrlib/lockable_files.py'
--- a/bzrlib/lockable_files.py	2009-03-11 21:44:21 +0000
+++ b/bzrlib/lockable_files.py	2009-03-16 05:05:52 +0000
@@ -271,7 +271,7 @@
             #traceback.print_stack()
             self._lock_mode = 'w'
             self._lock_warner.lock_count = 1
-            self._set_transaction(transactions.WriteTransaction())
+            self._set_write_transaction()
             self._token_from_lock = token_from_lock
             return token_from_lock
 
@@ -285,9 +285,17 @@
             #traceback.print_stack()
             self._lock_mode = 'r'
             self._lock_warner.lock_count = 1
-            self._set_transaction(transactions.ReadOnlyTransaction())
-            # 5K may be excessive, but hey, its a knob.
-            self.get_transaction().set_cache_size(5000)
+            self._set_read_transaction()
+
+    def _set_read_transaction(self):
+        """Setup a read transaction."""
+        self._set_transaction(transactions.ReadOnlyTransaction())
+        # 5K may be excessive, but hey, its a knob.
+        self.get_transaction().set_cache_size(5000)
+
+    def _set_write_transaction(self):
+        """Setup a write transaction."""
+        self._set_transaction(transactions.WriteTransaction())
 
     def unlock(self):
         if not self._lock_mode:

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2009-03-16 03:34:05 +0000
+++ b/bzrlib/remote.py	2009-03-16 05:05:52 +0000
@@ -1090,6 +1090,20 @@
         self._ensure_real()
         return self._real_repository.make_working_trees()
 
+    def refresh_data(self):
+        """Re-read any data needed to to synchronise with disk.
+
+        This method is intended to be called after another repository instance
+        (such as one used by a smart server) has inserted data into the
+        repository. It may not be called during a write group, but may be
+        called at any other time.
+        """
+        if self.is_in_write_group():
+            raise errors.InternalBzrError(
+                "May not refresh_data while in a write group.")
+        if self._real_repository is not None:
+            self._real_repository.refresh_data()
+
     def revision_ids_to_search_result(self, result_set):
         """Convert a set of revision ids to a graph SearchResult."""
         result_parents = set()
@@ -1134,7 +1148,7 @@
         # if there is no specific appropriate InterRepository, this will get
         # the InterRepository base class, which raises an
         # IncompatibleRepositories when asked to fetch.
-        inter = InterRepository.get(source, self)
+        inter = repository.InterRepository.get(source, self)
         return inter.fetch(revision_id=revision_id, pb=pb,
             find_ghosts=find_ghosts, fetch_spec=fetch_spec)
 
@@ -1512,12 +1526,7 @@
             self._ensure_real()
             self._real_repository._pack_collection.autopack()
             return
-        if self._real_repository is not None:
-            # Reset the real repository's cache of pack names.
-            # XXX: At some point we may be able to skip this and just rely on
-            # the automatic retry logic to do the right thing, but for now we
-            # err on the side of being correct rather than being optimal.
-            self._real_repository._pack_collection.reload_pack_names()
+        self.refresh_data()
         if response[0] != 'ok':
             raise errors.UnexpectedSmartServerResponse(response)
 
@@ -1568,11 +1577,7 @@
             resume_tokens = tokens
             return resume_tokens, missing_keys
         else:
-            if self.target_repo._real_repository is not None:
-                collection = getattr(self.target_repo._real_repository,
-                    '_pack_collection', None)
-                if collection is not None:
-                    collection.reload_pack_names()
+            self.target_repo.refresh_data()
             return [], set()
 
 

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2009-02-27 01:02:40 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2009-03-16 05:05:52 +0000
@@ -213,6 +213,17 @@
         revision_id = osutils.safe_revision_id(revision_id)
         return self.get_revision_reconcile(revision_id)
 
+    def _refresh_data(self):
+        if not self.is_locked():
+            return
+        # Create a new transaction to force all knits to see the scope change.
+        # This is safe because we're outside a write group.
+        self.control_files._finish_transaction()
+        if self.is_write_locked():
+            self.control_files._set_write_transaction()
+        else:
+            self.control_files._set_read_transaction()
+
     @needs_write_lock
     def reconcile(self, other=None, thorough=False):
         """Reconcile this repository."""

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-03-12 02:45:17 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-03-16 05:05:52 +0000
@@ -1297,6 +1297,7 @@
         :param index_builder_class: The index builder class to use.
         :param index_class: The index class to use.
         """
+        # XXX: This should call self.reset()
         self.repo = repo
         self.transport = transport
         self._index_transport = index_transport
@@ -1307,6 +1308,7 @@
         self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
         self.packs = []
         # name:Pack mapping
+        self._names = None
         self._packs_by_name = {}
         # the previous pack-names content
         self._packs_at_load = None
@@ -1839,6 +1841,12 @@
         present is now missing. This happens when another process re-packs the
         repository, etc.
         """
+        # The ensure_loaded call is to handle the case where the first call
+        # made involving the collection was to reload_pack_names, where we 
+        # don't have a view of disk contents. Its a bit of a bandaid, and
+        # causes two reads of pack-names, but its a rare corner case not struck
+        # with regular push/pull etc.
+        self.ensure_loaded()
         # This is functionally similar to _save_pack_names, but we don't write
         # out the new value.
         disk_nodes, _, _ = self._diff_pack_names()
@@ -2115,14 +2123,9 @@
         return graph.CachingParentsProvider(self)
 
     def _refresh_data(self):
-        if self._write_lock_count == 1 or (
-            self.control_files._lock_count == 1 and
-            self.control_files._lock_mode == 'r'):
-            # forget what names there are
-            self._pack_collection.reset()
-            # XXX: Better to do an in-memory merge when acquiring a new lock -
-            # factor out code from _save_pack_names.
-            self._pack_collection.ensure_loaded()
+        if not self.is_locked():
+            return
+        self._pack_collection.reload_pack_names()
 
     def _start_write_group(self):
         self._pack_collection._start_write_group()
@@ -2153,7 +2156,8 @@
         return self._write_lock_count
 
     def lock_write(self, token=None):
-        if not self._write_lock_count and self.is_locked():
+        locked = self.is_locked()
+        if not self._write_lock_count and locked:
             raise errors.ReadOnlyError(self)
         self._write_lock_count += 1
         if self._write_lock_count == 1:
@@ -2161,9 +2165,11 @@
             for repo in self._fallback_repositories:
                 # Writes don't affect fallback repos
                 repo.lock_read()
-        self._refresh_data()
+        if not locked:
+            self._refresh_data()
 
     def lock_read(self):
+        locked = self.is_locked()
         if self._write_lock_count:
             self._write_lock_count += 1
         else:
@@ -2171,7 +2177,8 @@
             for repo in self._fallback_repositories:
                 # Writes don't affect fallback repos
                 repo.lock_read()
-        self._refresh_data()
+        if not locked:
+            self._refresh_data()
 
     def leave_lock_in_place(self):
         # not supported - raise an error

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-03-16 03:34:05 +0000
+++ b/bzrlib/repository.py	2009-03-16 05:05:52 +0000
@@ -884,18 +884,22 @@
 
         XXX: this docstring is duplicated in many places, e.g. lockable_files.py
         """
+        locked = self.is_locked()
         result = self.control_files.lock_write(token=token)
         for repo in self._fallback_repositories:
             # Writes don't affect fallback repos
             repo.lock_read()
-        self._refresh_data()
+        if not locked:
+            self._refresh_data()
         return result
 
     def lock_read(self):
+        locked = self.is_locked()
         self.control_files.lock_read()
         for repo in self._fallback_repositories:
             repo.lock_read()
-        self._refresh_data()
+        if not locked:
+            self._refresh_data()
 
     def get_physical_lock_status(self):
         return self.control_files.get_physical_lock_status()
@@ -1084,6 +1088,19 @@
     def suspend_write_group(self):
         raise errors.UnsuspendableWriteGroup(self)
 
+    def refresh_data(self):
+        """Re-read any data needed to to synchronise with disk.
+
+        This method is intended to be called after another repository instance
+        (such as one used by a smart server) has inserted data into the
+        repository. It may not be called during a write group, but may be
+        called at any other time.
+        """
+        if self.is_in_write_group():
+            raise errors.InternalBzrError(
+                "May not refresh_data while in a write group.")
+        self._refresh_data()
+
     def resume_write_group(self, tokens):
         if not self.is_write_locked():
             raise errors.NotWriteLocked(self)
@@ -1844,7 +1861,11 @@
         for repositories to maintain loaded indices across multiple locks
         by checking inside their implementation of this method to see
         whether their indices are still valid. This depends of course on
-        the disk format being validatable in this manner.
+        the disk format being validatable in this manner. This method is
+        also called by the refresh_data() public interface to cause a refresh
+        to occur while in a write lock so that data inserted by a smart server
+        push operation is visible on the client's instance of the physical
+        repository.
         """
 
     @needs_read_lock

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2009-03-16 01:56:18 +0000
+++ b/bzrlib/tests/__init__.py	2009-03-16 05:05:52 +0000
@@ -2113,6 +2113,13 @@
         made_control = self.make_bzrdir(relpath, format=format)
         return made_control.create_repository(shared=shared)
 
+    def make_smart_server(self, path):
+        smart_server = server.SmartTCPServer_for_testing()
+        smart_server.setUp(self.get_server())
+        remote_transport = get_transport(smart_server.get_url()).clone(path)
+        self.addCleanup(smart_server.tearDown)
+        return remote_transport
+
     def make_branch_and_memory_tree(self, relpath, format=None):
         """Create a branch on the default transport and a MemoryTree for it."""
         b = self.make_branch(relpath, format=format)

=== modified file 'bzrlib/tests/branch_implementations/test_stacking.py'
--- a/bzrlib/tests/branch_implementations/test_stacking.py	2009-03-11 06:50:16 +0000
+++ b/bzrlib/tests/branch_implementations/test_stacking.py	2009-03-16 05:05:52 +0000
@@ -126,14 +126,6 @@
         self.assertRevisionNotInRepository('mainline', new_branch_revid)
         self.assertRevisionInRepository('newbranch', new_branch_revid)
 
-    # XXX: this helper probably belongs on TestCaseWithTransport
-    def make_smart_server(self, path):
-        smart_server = server.SmartTCPServer_for_testing()
-        smart_server.setUp(self.get_server())
-        remote_transport = get_transport(smart_server.get_url()).clone(path)
-        self.addCleanup(smart_server.tearDown)
-        return remote_transport
-
     def test_sprout_stacked_from_smart_server(self):
         if isinstance(self.branch_format, branch.BzrBranchFormat4):
             raise TestNotApplicable('Branch format 4 is not usable via HPSS.')

=== modified file 'bzrlib/tests/per_repository/__init__.py'
--- a/bzrlib/tests/per_repository/__init__.py	2009-03-07 06:58:17 +0000
+++ b/bzrlib/tests/per_repository/__init__.py	2009-03-16 05:05:52 +0000
@@ -869,6 +869,7 @@
         'test_iter_reverse_revision_history',
         'test_pack',
         'test_reconcile',
+        'test_refresh_data',
         'test_repository',
         'test_revision',
         'test_statistics',

=== added file 'bzrlib/tests/per_repository/test_refresh_data.py'
--- a/bzrlib/tests/per_repository/test_refresh_data.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/per_repository/test_refresh_data.py	2009-03-16 05:05:52 +0000
@@ -0,0 +1,79 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for Repository.refresh_data."""
+
+from bzrlib import (
+    errors,
+    remote,
+    )
+from bzrlib.tests import TestSkipped
+from bzrlib.tests.per_repository import TestCaseWithRepository
+
+
+class TestRefreshData(TestCaseWithRepository):
+
+    def test_refresh_data_unlocked(self):
+        # While not interesting, it should not error.
+        repo = self.make_repository('.')
+        repo.refresh_data()
+
+    def test_refresh_data_read_locked(self):
+        # While not interesting, it should not error.
+        repo = self.make_repository('.')
+        repo.lock_read()
+        self.addCleanup(repo.unlock)
+        repo.refresh_data()
+
+    def test_refresh_data_write_locked(self):
+        # While not interesting, it should not error.
+        repo = self.make_repository('.')
+        repo.lock_write()
+        self.addCleanup(repo.unlock)
+        repo.refresh_data()
+
+    def test_refresh_data_in_write_group_errors(self):
+        repo = self.make_repository('.')
+        repo.lock_write()
+        self.addCleanup(repo.unlock)
+        repo.start_write_group()
+        self.addCleanup(repo.abort_write_group)
+        # No flow control anticipated, BzrError is enough
+        self.assertRaises(errors.BzrError, repo.refresh_data)
+
+    def test_refresh_data_after_fetch_new_data_visible(self):
+        source = self.make_branch_and_tree('source')
+        revid = source.commit('foo')
+        repo = self.make_repository('target')
+        token = repo.lock_write()
+        self.addCleanup(repo.unlock)
+        # Force data reading on weaves/knits
+        repo.revisions.keys()
+        repo.inventories.keys()
+        # server repo is the instance a smart server might hold for this
+        # repository.
+        server_repo = repo.bzrdir.open_repository()
+        try:
+            server_repo.lock_write(token)
+        except errors.TokenLockingNotSupported:
+            raise TestSkipped('Cannot concurrently insert into repo format %r'
+                % self.repository_format)
+        try:
+            server_repo.fetch(source.branch.repository, revid)
+        finally:
+            server_repo.unlock()
+        repo.refresh_data()
+        self.assertNotEqual({}, repo.get_graph().get_parent_map([revid]))