Rev 3367: Prevent corrupt knits being created when a stream is interrupted with basis parents not present. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files
Robert Collins
robertc at robertcollins.net
Wed Apr 30 03:50:36 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/versioned_files
------------------------------------------------------------
revno: 3367
revision-id: robertc at robertcollins.net-20080430025031-3e4hnirlk03cmq4h
parent: robertc at robertcollins.net-20080430014801-td3qtpnf46amdsy5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: data_stream_revamp
timestamp: Wed 2008-04-30 12:50:31 +1000
message:
Prevent corrupt knits being created when a stream is interrupted with basis parents not present.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-04-30 01:48:01 +0000
+++ b/bzrlib/knit.py 2008-04-30 02:50:31 +0000
@@ -758,12 +758,10 @@
# write all the data
raw_record_sizes = [record[3] for record in records]
positions = self._data.add_raw_records(raw_record_sizes, data)
- offset = 0
index_entries = []
- for (version_id, options, parents, size), access_memo in zip(
+ for (version_id, options, parents, _), access_memo in zip(
records, positions):
index_entries.append((version_id, options, access_memo, parents))
- offset += size
self._index.add_versions(index_entries)
def copy_to(self, name, transport):
@@ -1108,6 +1106,14 @@
native_types.add("knit-%sft-gz" % annotated)
knit_types = native_types.union(convertibles)
adapters = {}
+ # Buffered index entries that we can't add immediately because their
+ # basis parent is missing. We don't buffer all because generating
+ # annotations may require access to some of the new records. However we
+ # can't generate annotations from new deltas until their basis parent
+ # is present anyway, so we get away with not needing an index that
+ # reports on the new keys.
+ # key = basis_parent, value = index entry to add
+ buffered_index_entries = {}
for record in stream:
# Raise an error when a record is missing.
if record.storage_kind == 'absent':
@@ -1137,9 +1143,18 @@
# deprecated format this is tolerable. It can be fixed if
# needed by in the kndx index support raising on a duplicate
# add with identical parents and options.
- self._add_raw_records(
- [(record.key[0], options, parents, len(bytes))],
- bytes)
+ access_memo = self._data.add_raw_records([len(bytes)], bytes)[0]
+ index_entry = (record.key[0], options, access_memo, parents)
+ buffered = False
+ if 'fulltext' not in options:
+ basis_parent = parents[0]
+ if not self.has_version(basis_parent):
+ pending = buffered_index_entries.setdefault(
+ basis_parent, [])
+ pending.append(index_entry)
+ buffered = True
+ if not buffered:
+ self._index.add_versions([index_entry])
elif record.storage_kind == 'fulltext':
self.add_lines(record.key[0], parents,
split_lines(record.get_bytes_as('fulltext')))
@@ -1152,6 +1167,20 @@
self.add_lines(record.key[0], parents, lines)
except errors.RevisionAlreadyPresent:
pass
+ # Add any records whose basis parent is now available.
+ added_keys = [record.key[0]]
+ while added_keys:
+ key = added_keys.pop(0)
+ if key in buffered_index_entries:
+ index_entries = buffered_index_entries[key]
+ self._index.add_versions(index_entries)
+ added_keys.extend(
+ [index_entry[0] for index_entry in index_entries])
+ del buffered_index_entries[key]
+ # If there were any deltas which had a missing basis parent, error.
+ if buffered_index_entries:
+ raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],
+ self)
def versions(self):
"""See VersionedFile.versions."""
@@ -1376,7 +1405,7 @@
versions = self.versions()
parent_map = self.get_parent_map(versions)
for version in versions:
- if self.get_method(version) != 'fulltext':
+ if self._index.get_method(version) != 'fulltext':
compression_parent = parent_map[version][0]
if compression_parent not in parent_map:
raise errors.KnitCorrupt(self,
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2008-04-30 01:48:01 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2008-04-30 02:50:31 +0000
@@ -359,6 +359,18 @@
else:
self.assertIdenticalVersionedFile(f, target)
+ def test_insert_record_stream_delta_missing_basis_no_corruption(self):
+ """Insertion where a needed basis is not included aborts safely."""
+ # Annotated source - deltas can be used in any knit.
+ source = make_file_knit('source', get_transport(self.get_url('.')),
+ create=True)
+ get_diamond_vf(source)
+ entries = source.get_record_stream(['origin', 'merged'], 'unordered', False)
+ f = self.get_file()
+ self.assertRaises(RevisionNotPresent, f.insert_record_stream, entries)
+ f.check()
+ self.assertFalse(f.has_version('merged'))
+
def test_adds_with_parent_texts(self):
f = self.get_file()
parent_texts = {}
More information about the bazaar-commits
mailing list