Rev 3362: Generate streams with absent records. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files

Robert Collins robertc at robertcollins.net
Thu Apr 24 08:28:23 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/versioned_files

------------------------------------------------------------
revno: 3362
revision-id: robertc at robertcollins.net-20080424072818-l03s8dhk100cfjqf
parent: robertc at robertcollins.net-20080424042745-zouzpwd67ftfk0wn
committer: Robert Collins <robertc at robertcollins.net>
branch nick: data_stream_revamp
timestamp: Thu 2008-04-24 17:28:18 +1000
message:
  Generate streams with absent records.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-04-24 04:27:45 +0000
+++ b/bzrlib/knit.py	2008-04-24 07:28:18 +0000
@@ -119,6 +119,7 @@
 from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
 import bzrlib.ui
 from bzrlib.versionedfile import (
+    AbsentContentFactory,
     adapter_registry,
     ContentFactory,
     InterVersionedFile,
@@ -886,12 +887,22 @@
             knit = None
         # Double index lookups here : need a unified api ?
         parent_map = self.get_parent_map(versions)
-        position_map = self._get_components_positions(versions)
+        absent_versions = set(versions) - set(parent_map)
         if ordering == 'topological':
-            versions = topo_sort(parent_map)
+            present_versions = topo_sort(parent_map)
+        else:
+            # List comprehension to keep the requested order (as that seems
+            # marginally useful, at least until we start doing IO optimising
+            # here.
+            present_versions = [version for version in versions if version in
+                parent_map]
+        position_map = self._get_components_positions(present_versions)
         # c = component_id, r = record_details, i_m = index_memo, n = next
-        records = [(version, position_map[version][1]) for version in versions]
+        records = [(version, position_map[version][1]) for version in
+            present_versions]
         record_map = {}
+        for version in absent_versions:
+            yield AbsentContentFactory((version,))
         for version, raw_data, sha1 in \
                 self._data.read_records_iter_raw(records):
             (record_details, index_memo, _) = position_map[version]

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-04-24 04:27:45 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-04-24 07:28:18 +0000
@@ -205,6 +205,28 @@
         self.assertEqual(set([('base',), ('left',), ('right',), ('merged',)]),
             seen)
 
+    def test_get_record_stream_missing_records_are_absent(self):
+        f, parents = get_diamond_vf(self.get_file())
+        entries = f.get_record_stream(['merged', 'left', 'right', 'or', 'base'],
+            'unordered', False)
+        seen = set()
+        for factory in entries:
+            seen.add(factory.key)
+            if factory.key == ('or',):
+                self.assertEqual('absent', factory.storage_kind)
+                self.assertEqual(None, factory.sha1)
+                self.assertEqual(None, factory.parents)
+            else:
+                self.assertValidStorageKind(factory.storage_kind)
+                self.assertEqual(f.get_sha1s([factory.key[0]])[0], factory.sha1)
+                self.assertEqual(parents[factory.key[0]], factory.parents)
+                self.assertIsInstance(factory.get_bytes_as(factory.storage_kind),
+                    str)
+        self.assertEqual(
+            set([('base',), ('left',), ('right',), ('merged',), ('or',)]),
+            seen)
+
+
     def test_insert_record_stream_empty(self):
         """Inserting an empty record stream should work."""
         f = self.get_file()

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-04-23 05:17:05 +0000
+++ b/bzrlib/versionedfile.py	2008-04-24 07:28:18 +0000
@@ -80,6 +80,24 @@
         self.parents = None
 
 
+class AbsentContentFactory(object):
+    """A placeholder content factory for unavailable texts.
+    
+    :ivar sha1: None.
+    :ivar storage_kind: 'absent'.
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: None.
+    """
+
+    def __init__(self, key):
+        """Create a ContentFactory."""
+        self.sha1 = None
+        self.storage_kind = 'absent'
+        self.key = key
+        self.parents = None
+
+
 class VersionedFile(object):
     """Versioned text file storage.
     

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-04-24 04:27:45 +0000
+++ b/bzrlib/weave.py	2008-04-24 07:28:18 +0000
@@ -91,6 +91,7 @@
 from bzrlib.trace import mutter
 from bzrlib.tsort import topo_sort
 from bzrlib.versionedfile import (
+    AbsentContentFactory,
     adapter_registry,
     ContentFactory,
     InterVersionedFile,
@@ -309,7 +310,10 @@
             parents = self.get_parent_map(versions)
             versions = topo_sort(parents)
         for version in versions:
-            yield WeaveContentFactory(version, self)
+            if version in self:
+                yield WeaveContentFactory(version, self)
+            else:
+                yield AbsentContentFactory((version,))
 
     def get_parent_map(self, version_ids):
         """See VersionedFile.get_parent_map."""




More information about the bazaar-commits mailing list