Rev 4021: Polish on inserting record streams with missing compression parents. in http://people.ubuntu.com/~robertc/baz2.0/VersionedFiles.insert-record-stream.partial

Robert Collins robertc at robertcollins.net
Fri Feb 20 02:38:31 GMT 2009


At http://people.ubuntu.com/~robertc/baz2.0/VersionedFiles.insert-record-stream.partial

------------------------------------------------------------
revno: 4021
revision-id: robertc at robertcollins.net-20090220023828-rcqd2pyw22zqjkwl
parent: robertc at robertcollins.net-20090220015545-qloec8tf9jnqi2d8
committer: Robert Collins <robertc at robertcollins.net>
branch nick: VersionedFiles.insert-record-stream.partial
timestamp: Fri 2009-02-20 13:38:28 +1100
message:
  Polish on inserting record streams with missing compression parents.
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2009-02-20 01:55:45 +0000
+++ b/bzrlib/knit.py	2009-02-20 02:38:28 +0000
@@ -1439,18 +1439,10 @@
                     # They're required to be physically in this
                     # KnitVersionedFiles, not in a fallback.
                     if not self._index.has_key(compression_parent):
+                        pending = buffered_index_entries.setdefault(
+                            compression_parent, [])
+                        pending.append(index_entry)
                         buffered = True
-                if buffered:
-                    pending = buffered_index_entries.setdefault(
-                        compression_parent, [])
-                    if self._index._insert_unsatisfied_external_refs: 
-                        # This index allows us to add immediately, rather than
-                        # buffer.
-                        # XXX maybe put _insert_unsatisfied_external_refs on
-                        # add_records?
-                        self._index.add_records([index_entry])
-                    else:
-                        pending.append(index_entry)
                 if not buffered:
                     self._index.add_records([index_entry])
             elif record.storage_kind == 'chunked':
@@ -1484,15 +1476,23 @@
                         [index_entry[0] for index_entry in index_entries])
                     del buffered_index_entries[key]
         if buffered_index_entries:
-            self._index._add_missing_compression_parents(
-                buffered_index_entries.keys())
+            # There were index entries buffered at the end of the stream,
+            # So these need to be added (if the index supports holding such
+            # entries for later insertion)
+            for key in buffered_index_entries:
+                index_entries = buffered_index_entries[key]
+                self._index.add_records(index_entries,
+                    missing_compression_parents=True)
 
     def get_missing_compression_parent_keys(self):
         """Return an iterable of keys of missing compression parents.
 
         Check this after calling insert_record_stream to find out if there are
         any missing compression parents.  If there are, the records that
-        depend on them are *not* yet inserted.
+        depend on them are not able to be inserted safely. For atomic
+        KnitVersionedFiles built on packs, the transaction should be aborted or
+        suspended - commit will fail at this point. Nonatomic knits will error
+        earlier because they have no staging area to put pending entries into.
         """
         return self._index.get_missing_compression_parents()
 
@@ -1841,8 +1841,6 @@
 
     HEADER = "# bzr knit index 8\n"
 
-    _insert_unsatisfied_external_refs = False
-
     def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):
         """Create a _KndxIndex on transport using mapper."""
         self._transport = transport
@@ -1853,20 +1851,23 @@
         self._reset_cache()
         self.has_graph = True
 
-    def _add_missing_compression_parents(self, keys):
-        raise errors.RevisionNotPresent(keys, '??')
-
-    def get_missing_compression_parents(self, keys):
-        return []
-        
-    def add_records(self, records, random_id=False):
+    def add_records(self, records, random_id=False, missing_compression_parents=False):
         """Add multiple records to the index.
         
         :param records: a list of tuples:
                          (key, options, access_memo, parents).
         :param random_id: If True the ids being added were randomly generated
             and no check for existence will be performed.
+        :param missing_compression_parents: If True the records being added are
+            only compressed against texts already in the index (or inside
+            records). If False the records all refer to unavailable texts (or
+            texts inside records) as compression parents.
         """
+        if missing_compression_parents:
+            # It might be nice to get the edge of the records. But keys isn't
+            # _wrong_.
+            keys = sorted(record[0] for record in records)
+            raise errors.RevisionNotPresent(keys, self)
         paths = {}
         for record in records:
             key = record[0]
@@ -2203,8 +2204,6 @@
 class _KnitGraphIndex(object):
     """A KnitVersionedFiles index layered on GraphIndex."""
 
-    _insert_unsatisfied_external_refs = True
-
     def __init__(self, graph_index, is_locked, deltas=False, parents=True,
         add_callback=None):
         """Construct a KnitGraphIndex on a graph_index.
@@ -2237,7 +2236,8 @@
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._graph_index)
 
-    def add_records(self, records, random_id=False):
+    def add_records(self, records, random_id=False,
+        missing_compression_parents=False):
         """Add multiple records to the index.
         
         This function does not insert data into the Immutable GraphIndex
@@ -2249,6 +2249,10 @@
                          (key, options, access_memo, parents).
         :param random_id: If True the ids being added were randomly generated
             and no check for existence will be performed.
+        :param missing_compression_parents: If True the records being added are
+            only compressed against texts already in the index (or inside
+            records). If False the records all refer to unavailable texts (or
+            texts inside records) as compression parents.
         """
         if not self._add_callback:
             raise errors.ReadOnlyError(self)
@@ -2256,6 +2260,7 @@
         # anymore.
 
         keys = {}
+        compression_parents = set()
         for (key, options, access_memo, parents) in records:
             if self._parents:
                 parents = tuple(parents)
@@ -2272,6 +2277,8 @@
                 if self._deltas:
                     if 'line-delta' in options:
                         node_refs = (parents, (parents[0],))
+                        if missing_compression_parents:
+                            compression_parents.add(parents[0])
                     else:
                         node_refs = (parents, ())
                 else:
@@ -2298,8 +2305,18 @@
         else:
             for key, (value, node_refs) in keys.iteritems():
                 result.append((key, value))
+        self._add_callback(result)
+        if missing_compression_parents:
+            # This may appear to be incorrect (it does not check for
+            # compression parents that are in the existing graph index),
+            # but such records won't have been buffered, so this is
+            # actually correct: every entry when
+            # missing_compression_parents==True either has a missing parent, or
+            # a parent that is one of the keys in records.
+            compression_parents.difference_update(keys)
+            self._missing_compression_parents.update(compression_parents)
+        # Adding records may have satisfied missing compression parents.
         self._missing_compression_parents.difference_update(keys)
-        self._add_callback(result)
         
     def scan_unvalidated_index(self, graph_index):
         """Inform this _KnitGraphIndex that there is an unvalidated index.
@@ -2315,12 +2332,11 @@
             new_missing.difference_update(self.get_parent_map(new_missing))
             self._missing_compression_parents.update(new_missing)
 
-    def _add_missing_compression_parents(self, keys):
-        self._missing_compression_parents.update(keys)
-        
     def get_missing_compression_parents(self):
-        """Return the keys of compression parents missing from unvalidated
-        indices.
+        """Return the keys of missing compression parents.
+
+        Missing compression parents occur when a record stream was missing
+        basis texts, or a index was scanned that had missing basis texts.
         """
         return frozenset(self._missing_compression_parents)
 

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-02-20 00:43:38 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-02-20 02:38:28 +0000
@@ -1812,8 +1812,7 @@
                 ('texts', self.repo.texts),
                 ('signatures', self.repo.signatures),
                 ):
-            # We use KnitVersionedFiles exclusively so can rely on _index.
-            missing = versioned_file._index.get_missing_compression_parents()
+            missing = versioned_file.get_missing_compression_parents()
             all_missing.update([(prefix,) + key for key in missing])
         if all_missing:
             raise errors.BzrCheckError(

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2009-02-20 00:52:10 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2009-02-20 02:38:28 +0000
@@ -1999,8 +1999,9 @@
         keys = [self.get_simple_key('origin'), self.get_simple_key('merged')]
         entries = source.get_record_stream(keys, 'unordered', False)
         files = self.get_versionedfiles()
-        self.assertEqual([], list(files.get_missing_compression_parent_keys()))
         if self.support_partial_insertion:
+            self.assertEqual([],
+                list(files.get_missing_compression_parent_keys()))
             files.insert_record_stream(entries)
             missing_bases = files.get_missing_compression_parent_keys()
             self.assertEqual(set([self.get_simple_key('left')]),

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2009-02-17 01:25:34 +0000
+++ b/bzrlib/versionedfile.py	2009-02-20 02:38:28 +0000
@@ -930,7 +930,10 @@
 
         Check this after calling insert_record_stream to find out if there are
         any missing compression parents.  If there are, the records that
-        depend on them are *not* yet inserted.
+        depend on them are not able to be inserted safely. The precise
+        behaviour depends on the concrete VersionedFiles class in use.
+
+        Classes that do not support this will raise NotImplementedError.
         """
         raise NotImplementedError(self.get_missing_compression_parent_keys)
 
@@ -1168,11 +1171,6 @@
                 sha1s[prefix + (suffix,)] = sha1
         return sha1s
 
-    def get_missing_compression_parent_keys(self):
-        """See VersionedFile.get_missing_compression_parent_keys."""
-        # weaves don't support partial inserts
-        return []
-    
     def insert_record_stream(self, stream):
         """Insert a record stream into this container.
 




More information about the bazaar-commits mailing list