Rev 4021: Polish on inserting record streams with missing compression parents. in http://people.ubuntu.com/~robertc/baz2.0/VersionedFiles.insert-record-stream.partial
Robert Collins
robertc at robertcollins.net
Fri Feb 20 02:38:31 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/VersionedFiles.insert-record-stream.partial
------------------------------------------------------------
revno: 4021
revision-id: robertc at robertcollins.net-20090220023828-rcqd2pyw22zqjkwl
parent: robertc at robertcollins.net-20090220015545-qloec8tf9jnqi2d8
committer: Robert Collins <robertc at robertcollins.net>
branch nick: VersionedFiles.insert-record-stream.partial
timestamp: Fri 2009-02-20 13:38:28 +1100
message:
Polish on inserting record streams with missing compression parents.
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2009-02-20 01:55:45 +0000
+++ b/bzrlib/knit.py 2009-02-20 02:38:28 +0000
@@ -1439,18 +1439,10 @@
# They're required to be physically in this
# KnitVersionedFiles, not in a fallback.
if not self._index.has_key(compression_parent):
+ pending = buffered_index_entries.setdefault(
+ compression_parent, [])
+ pending.append(index_entry)
buffered = True
- if buffered:
- pending = buffered_index_entries.setdefault(
- compression_parent, [])
- if self._index._insert_unsatisfied_external_refs:
- # This index allows us to add immediately, rather than
- # buffer.
- # XXX maybe put _insert_unsatisfied_external_refs on
- # add_records?
- self._index.add_records([index_entry])
- else:
- pending.append(index_entry)
if not buffered:
self._index.add_records([index_entry])
elif record.storage_kind == 'chunked':
@@ -1484,15 +1476,23 @@
[index_entry[0] for index_entry in index_entries])
del buffered_index_entries[key]
if buffered_index_entries:
- self._index._add_missing_compression_parents(
- buffered_index_entries.keys())
+ # There were index entries buffered at the end of the stream,
+ # So these need to be added (if the index supports holding such
+ # entries for later insertion)
+ for key in buffered_index_entries:
+ index_entries = buffered_index_entries[key]
+ self._index.add_records(index_entries,
+ missing_compression_parents=True)
def get_missing_compression_parent_keys(self):
"""Return an iterable of keys of missing compression parents.
Check this after calling insert_record_stream to find out if there are
any missing compression parents. If there are, the records that
- depend on them are *not* yet inserted.
+ depend on them are not able to be inserted safely. For atomic
+ KnitVersionedFiles built on packs, the transaction should be aborted or
+ suspended - commit will fail at this point. Nonatomic knits will error
+ earlier because they have no staging area to put pending entries into.
"""
return self._index.get_missing_compression_parents()
@@ -1841,8 +1841,6 @@
HEADER = "# bzr knit index 8\n"
- _insert_unsatisfied_external_refs = False
-
def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):
"""Create a _KndxIndex on transport using mapper."""
self._transport = transport
@@ -1853,20 +1851,23 @@
self._reset_cache()
self.has_graph = True
- def _add_missing_compression_parents(self, keys):
- raise errors.RevisionNotPresent(keys, '??')
-
- def get_missing_compression_parents(self, keys):
- return []
-
- def add_records(self, records, random_id=False):
+ def add_records(self, records, random_id=False, missing_compression_parents=False):
"""Add multiple records to the index.
:param records: a list of tuples:
(key, options, access_memo, parents).
:param random_id: If True the ids being added were randomly generated
and no check for existence will be performed.
+ :param missing_compression_parents: If True the records being added are
+ only compressed against texts already in the index (or inside
+ records). If False the records all refer to unavailable texts (or
+ texts inside records) as compression parents.
"""
+ if missing_compression_parents:
+ # It might be nice to get the edge of the records. But keys isn't
+ # _wrong_.
+ keys = sorted(record[0] for record in records)
+ raise errors.RevisionNotPresent(keys, self)
paths = {}
for record in records:
key = record[0]
@@ -2203,8 +2204,6 @@
class _KnitGraphIndex(object):
"""A KnitVersionedFiles index layered on GraphIndex."""
- _insert_unsatisfied_external_refs = True
-
def __init__(self, graph_index, is_locked, deltas=False, parents=True,
add_callback=None):
"""Construct a KnitGraphIndex on a graph_index.
@@ -2237,7 +2236,8 @@
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._graph_index)
- def add_records(self, records, random_id=False):
+ def add_records(self, records, random_id=False,
+ missing_compression_parents=False):
"""Add multiple records to the index.
This function does not insert data into the Immutable GraphIndex
@@ -2249,6 +2249,10 @@
(key, options, access_memo, parents).
:param random_id: If True the ids being added were randomly generated
and no check for existence will be performed.
+ :param missing_compression_parents: If True the records being added are
+ only compressed against texts already in the index (or inside
+ records). If False the records all refer to unavailable texts (or
+ texts inside records) as compression parents.
"""
if not self._add_callback:
raise errors.ReadOnlyError(self)
@@ -2256,6 +2260,7 @@
# anymore.
keys = {}
+ compression_parents = set()
for (key, options, access_memo, parents) in records:
if self._parents:
parents = tuple(parents)
@@ -2272,6 +2277,8 @@
if self._deltas:
if 'line-delta' in options:
node_refs = (parents, (parents[0],))
+ if missing_compression_parents:
+ compression_parents.add(parents[0])
else:
node_refs = (parents, ())
else:
@@ -2298,8 +2305,18 @@
else:
for key, (value, node_refs) in keys.iteritems():
result.append((key, value))
+ self._add_callback(result)
+ if missing_compression_parents:
+ # This may appear to be incorrect (it does not check for
+ # compression parents that are in the existing graph index),
+ # but such records won't have been buffered, so this is
+ # actually correct: every entry when
+ # missing_compression_parents==True either has a missing parent, or
+ # a parent that is one of the keys in records.
+ compression_parents.difference_update(keys)
+ self._missing_compression_parents.update(compression_parents)
+ # Adding records may have satisfied missing compression parents.
self._missing_compression_parents.difference_update(keys)
- self._add_callback(result)
def scan_unvalidated_index(self, graph_index):
"""Inform this _KnitGraphIndex that there is an unvalidated index.
@@ -2315,12 +2332,11 @@
new_missing.difference_update(self.get_parent_map(new_missing))
self._missing_compression_parents.update(new_missing)
- def _add_missing_compression_parents(self, keys):
- self._missing_compression_parents.update(keys)
-
def get_missing_compression_parents(self):
- """Return the keys of compression parents missing from unvalidated
- indices.
+ """Return the keys of missing compression parents.
+
+ Missing compression parents occur when a record stream was missing
+ basis texts, or a index was scanned that had missing basis texts.
"""
return frozenset(self._missing_compression_parents)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-02-20 00:43:38 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-02-20 02:38:28 +0000
@@ -1812,8 +1812,7 @@
('texts', self.repo.texts),
('signatures', self.repo.signatures),
):
- # We use KnitVersionedFiles exclusively so can rely on _index.
- missing = versioned_file._index.get_missing_compression_parents()
+ missing = versioned_file.get_missing_compression_parents()
all_missing.update([(prefix,) + key for key in missing])
if all_missing:
raise errors.BzrCheckError(
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2009-02-20 00:52:10 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2009-02-20 02:38:28 +0000
@@ -1999,8 +1999,9 @@
keys = [self.get_simple_key('origin'), self.get_simple_key('merged')]
entries = source.get_record_stream(keys, 'unordered', False)
files = self.get_versionedfiles()
- self.assertEqual([], list(files.get_missing_compression_parent_keys()))
if self.support_partial_insertion:
+ self.assertEqual([],
+ list(files.get_missing_compression_parent_keys()))
files.insert_record_stream(entries)
missing_bases = files.get_missing_compression_parent_keys()
self.assertEqual(set([self.get_simple_key('left')]),
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2009-02-17 01:25:34 +0000
+++ b/bzrlib/versionedfile.py 2009-02-20 02:38:28 +0000
@@ -930,7 +930,10 @@
Check this after calling insert_record_stream to find out if there are
any missing compression parents. If there are, the records that
- depend on them are *not* yet inserted.
+ depend on them are not able to be inserted safely. The precise
+ behaviour depends on the concrete VersionedFiles class in use.
+
+ Classes that do not support this will raise NotImplementedError.
"""
raise NotImplementedError(self.get_missing_compression_parent_keys)
@@ -1168,11 +1171,6 @@
sha1s[prefix + (suffix,)] = sha1
return sha1s
- def get_missing_compression_parent_keys(self):
- """See VersionedFile.get_missing_compression_parent_keys."""
- # weaves don't support partial inserts
- return []
-
def insert_record_stream(self, stream):
"""Insert a record stream into this container.
More information about the bazaar-commits
mailing list