Rev 3799: _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened. in http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786

John Arbash Meinel john at arbash-meinel.com
Fri Oct 24 21:13:21 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786

------------------------------------------------------------
revno: 3799
revision-id: john at arbash-meinel.com-20081024201311-4kteabxxs3istdan
parent: john at arbash-meinel.com-20081023210643-6pxsgdybl89n0tz9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pack_retry_153786
timestamp: Fri 2008-10-24 15:13:11 -0500
message:
  _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.
-------------- next part --------------
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2008-10-21 03:47:13 +0000
+++ b/bzrlib/errors.py	2008-10-24 20:13:11 +0000
@@ -1483,6 +1483,37 @@
         self.options = options
 
 
+class RetryWithNewPacks(BzrError):
+    """Raised when we realize that the packs on disk have changed.
+
+    This is meant as more of a signaling exception, to trap between where a
+    local error occurred and the code that can actually handle the error and
+    code that can retry appropriately.
+    """
+
+    internal_error = True
+
+    _fmt = ("Pack files have changed, reload and retry.")
+
+    def __init__(self, reload_occurred, exc_info):
+        """create a new RestartWithNewPacks error.
+
+        :param reload_occurred: Set to True if we know that the packs have
+            already been reloaded, and we are failing because of an in-memory
+            cache miss. If set to True then we will ignore if a reload says
+            nothing has changed, because we assume it has already reloaded. If
+            False, then a reload with nothing changed will force an error.
+        :param exc_info: The original exception traceback, so if there is a
+            problem we can raise the original error (value from sys.exc_info())
+        """
+        BzrError.__init__(self)
+        self.reload_occurred = reload_occurred
+        self.exc_info = exc_info
+        # TODO: The global error handler should probably treat this by
+        #       raising/printing the original exception with a bit about
+        #       RetryWithNewPacks also not being caught
+
+
 class NoSuchExportFormat(BzrError):
     
     _fmt = "Export format %(format)r not supported"

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-10-21 03:36:14 +0000
+++ b/bzrlib/knit.py	2008-10-24 20:13:11 +0000
@@ -64,6 +64,7 @@
 from itertools import izip, chain
 import operator
 import os
+import sys
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
@@ -707,7 +708,7 @@
     """
 
     def __init__(self, index, data_access, max_delta_chain=200,
-        annotated=False):
+                 annotated=False, reload_func=None):
         """Create a KnitVersionedFiles with index and data_access.
 
         :param index: The index for the knit data.
@@ -717,6 +718,9 @@
             insertion. Set to 0 to prohibit the use of deltas.
         :param annotated: Set to True to cause annotations to be calculated and
             stored during insertion.
+        :param reload_func: An function that can be called if we think we need
+            to reload the pack listing and try again. See
+            'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.
         """
         self._index = index
         self._access = data_access
@@ -726,6 +730,7 @@
         else:
             self._factory = KnitPlainFactory()
         self._fallback_vfs = []
+        self._reload_func = reload_func
 
     def __repr__(self):
         return "%s(%r, %r)" % (
@@ -1159,6 +1164,22 @@
         if not self._index.has_graph:
             # Cannot topological order when no graph has been stored.
             ordering = 'unordered'
+
+        remaining_keys = keys
+        while True:
+            try:
+                keys = set(remaining_keys)
+                for content_factory in self._get_remaining_record_stream(keys,
+                                            ordering, include_delta_closure):
+                    remaining_keys.discard(content_factory.key)
+                    yield content_factory
+                return
+            except errors.RetryWithNewPacks, e:
+                # reload_func
+                raise
+
+    def _get_remaining_record_stream(self, keys, ordering,
+                                     include_delta_closure):
         if include_delta_closure:
             positions = self._get_components_positions(keys, allow_missing=True)
         else:
@@ -2433,10 +2454,23 @@
         if current_index is not None:
             request_lists.append((current_index, current_list))
         for index, offsets in request_lists:
-            transport, path = self._indices[index]
-            reader = pack.make_readv_reader(transport, path, offsets)
-            for names, read_func in reader.iter_records():
-                yield read_func(None)
+            try:
+                transport, path = self._indices[index]
+            except KeyError:
+                # A KeyError here indicates that someone has triggered an index
+                # reload, and this index has gone missing, we need to start
+                # over.
+                raise errors.RetryWithNewPacks(reload_occurred=True,
+                                               exc_info=sys.exc_info())
+            try:
+                reader = pack.make_readv_reader(transport, path, offsets)
+                for names, read_func in reader.iter_records():
+                    yield read_func(None)
+            except errors.NoSuchFile:
+                # A NoSuchFile error indicates that a pack file has gone
+                # missing on disk, we need to trigger a reload, and start over.
+                raise errors.RetryWithNewPacks(reload_occurred=False,
+                                               exc_info=sys.exc_info())
 
     def set_writer(self, writer, index, transport_packname):
         """Set a writer to use for adding data."""

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2008-10-21 03:47:13 +0000
+++ b/bzrlib/tests/test_knit.py	2008-10-24 20:13:11 +0000
@@ -363,6 +363,40 @@
         writer.end()
         self.assertEqual(['1234567890'], list(access.get_raw_records(memos)))
 
+    def test_missing_index_raises_retry(self):
+        access, writer = self._get_access(packname='packname', index='foo')
+        memos = []
+        memos.extend(access.add_raw_records([('key', 10)], '1234567890'))
+        writer.end()
+        transport = self.get_transport()
+        # Note that the 'index' key has changed
+        access = _DirectPackAccess({'bar':(transport, 'packname')})
+        e = self.assertListRaises(errors.RetryWithNewPacks,
+                                  access.get_raw_records, memos)
+        # Because a key was passed in which does not match our index list, we
+        # assume that the listing was already reloaded
+        self.assertTrue(e.reload_occurred)
+        self.assertIsInstance(e.exc_info, tuple)
+        self.assertIs(e.exc_info[0], KeyError)
+        self.assertIsInstance(e.exc_info[1], KeyError)
+
+    def test_missing_file_raises_retry(self):
+        access, writer = self._get_access(packname='packname', index='foo')
+        memos = []
+        memos.extend(access.add_raw_records([('key', 10)], '1234567890'))
+        writer.end()
+        transport = self.get_transport()
+        # Note that the 'filename' has been changed
+        access = _DirectPackAccess({'foo':(transport, 'different-packname')})
+        e = self.assertListRaises(errors.RetryWithNewPacks,
+                                  access.get_raw_records, memos)
+        # The file has gone missing, so we assume we need to reload
+        self.assertFalse(e.reload_occurred)
+        self.assertIsInstance(e.exc_info, tuple)
+        self.assertIs(e.exc_info[0], errors.NoSuchFile)
+        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
+        self.assertEqual('different-packname', e.exc_info[1].path)
+
 
 class LowLevelKnitDataTests(TestCase):
 



More information about the bazaar-commits mailing list