Rev 3367: Prevent corrupt knits being created when a stream is interrupted with basis parents not present. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files

Robert Collins robertc at robertcollins.net
Wed Apr 30 03:50:36 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/versioned_files

------------------------------------------------------------
revno: 3367
revision-id: robertc at robertcollins.net-20080430025031-3e4hnirlk03cmq4h
parent: robertc at robertcollins.net-20080430014801-td3qtpnf46amdsy5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: data_stream_revamp
timestamp: Wed 2008-04-30 12:50:31 +1000
message:
  Prevent corrupt knits being created when a stream is interrupted with basis parents not present.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-04-30 01:48:01 +0000
+++ b/bzrlib/knit.py	2008-04-30 02:50:31 +0000
@@ -758,12 +758,10 @@
         # write all the data
         raw_record_sizes = [record[3] for record in records]
         positions = self._data.add_raw_records(raw_record_sizes, data)
-        offset = 0
         index_entries = []
-        for (version_id, options, parents, size), access_memo in zip(
+        for (version_id, options, parents, _), access_memo in zip(
             records, positions):
             index_entries.append((version_id, options, access_memo, parents))
-            offset += size
         self._index.add_versions(index_entries)
 
     def copy_to(self, name, transport):
@@ -1108,6 +1106,14 @@
         native_types.add("knit-%sft-gz" % annotated)
         knit_types = native_types.union(convertibles)
         adapters = {}
+        # Buffered index entries that we can't add immediately because their
+        # basis parent is missing. We don't buffer all because generating
+        # annotations may require access to some of the new records. However we
+        # can't generate annotations from new deltas until their basis parent
+        # is present anyway, so we get away with not needing an index that
+        # reports on the new keys.
+        # key = basis_parent, value = index entry to add
+        buffered_index_entries = {}
         for record in stream:
             # Raise an error when a record is missing.
             if record.storage_kind == 'absent':
@@ -1137,9 +1143,18 @@
                 # deprecated format this is tolerable. It can be fixed if
                 # needed by in the kndx index support raising on a duplicate
                 # add with identical parents and options.
-                self._add_raw_records(
-                    [(record.key[0], options, parents, len(bytes))],
-                    bytes)
+                access_memo = self._data.add_raw_records([len(bytes)], bytes)[0]
+                index_entry = (record.key[0], options, access_memo, parents)
+                buffered = False
+                if 'fulltext' not in options:
+                    basis_parent = parents[0]
+                    if not self.has_version(basis_parent):
+                        pending = buffered_index_entries.setdefault(
+                            basis_parent, [])
+                        pending.append(index_entry)
+                        buffered = True
+                if not buffered:
+                    self._index.add_versions([index_entry])
             elif record.storage_kind == 'fulltext':
                 self.add_lines(record.key[0], parents,
                     split_lines(record.get_bytes_as('fulltext')))
@@ -1152,6 +1167,20 @@
                     self.add_lines(record.key[0], parents, lines)
                 except errors.RevisionAlreadyPresent:
                     pass
+            # Add any records whose basis parent is now available.
+            added_keys = [record.key[0]]
+            while added_keys:
+                key = added_keys.pop(0)
+                if key in buffered_index_entries:
+                    index_entries = buffered_index_entries[key]
+                    self._index.add_versions(index_entries)
+                    added_keys.extend(
+                        [index_entry[0] for index_entry in index_entries])
+                    del buffered_index_entries[key]
+        # If there were any deltas which had a missing basis parent, error.
+        if buffered_index_entries:
+            raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],
+                self)
 
     def versions(self):
         """See VersionedFile.versions."""
@@ -1376,7 +1405,7 @@
         versions = self.versions()
         parent_map = self.get_parent_map(versions)
         for version in versions:
-            if self.get_method(version) != 'fulltext':
+            if self._index.get_method(version) != 'fulltext':
                 compression_parent = parent_map[version][0]
                 if compression_parent not in parent_map:
                     raise errors.KnitCorrupt(self,

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-04-30 01:48:01 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-04-30 02:50:31 +0000
@@ -359,6 +359,18 @@
         else:
             self.assertIdenticalVersionedFile(f, target)
 
+    def test_insert_record_stream_delta_missing_basis_no_corruption(self):
+        """Insertion where a needed basis is not included aborts safely."""
+        # Annotated source - deltas can be used in any knit.
+        source = make_file_knit('source', get_transport(self.get_url('.')),
+            create=True)
+        get_diamond_vf(source)
+        entries = source.get_record_stream(['origin', 'merged'], 'unordered', False)
+        f = self.get_file()
+        self.assertRaises(RevisionNotPresent, f.insert_record_stream, entries)
+        f.check()
+        self.assertFalse(f.has_version('merged'))
+
     def test_adds_with_parent_texts(self):
         f = self.get_file()
         parent_texts = {}




More information about the bazaar-commits mailing list