Rev 3891: Start working on a ChunkedContentFactory. in http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

John Arbash Meinel john at arbash-meinel.com
Thu Dec 11 00:55:01 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

------------------------------------------------------------
revno: 3891
revision-id: john at arbash-meinel.com-20081211005436-a8bn72zw43b1vd9r
parent: pqm at pqm.ubuntu.com-20081210082822-li6ku9s3k63kjrpr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_record_stream_chunked
timestamp: Wed 2008-12-10 18:54:36 -0600
message:
  Start working on a ChunkedContentFactory.
  
  This allows get_bytes_as('chunked') for both FulltextContentFactory,
  and for ChunkedContentFactory, as it is a trivial conversion to
  go between the two styles.
  We will also want to special case when converting 'chunked' into
  'lines'. But that is for future work.
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-12-05 15:34:02 +0000
+++ b/bzrlib/knit.py	2008-12-11 00:54:36 +0000
@@ -110,7 +110,7 @@
     adapter_registry,
     ConstantMapper,
     ContentFactory,
-    FulltextContentFactory,
+    ChunkedContentFactory,
     VersionedFile,
     VersionedFiles,
     )
@@ -276,11 +276,13 @@
     def get_bytes_as(self, storage_kind):
         if storage_kind == self.storage_kind:
             return self._raw_record
-        if storage_kind == 'fulltext' and self._knit is not None:
-            return self._knit.get_text(self.key[0])
-        else:
-            raise errors.UnavailableRepresentation(self.key, storage_kind,
-                self.storage_kind)
+        if self._knit is not None:
+            if storage_kind == 'chunked':
+                return self._knit.get_lines(self.key[0])
+            elif storage_kind == 'fulltext':
+                return self._knit.get_text(self.key[0])
+        raise errors.UnavailableRepresentation(self.key, storage_kind,
+            self.storage_kind)
 
 
 class KnitContent(object):
@@ -1288,9 +1290,8 @@
                 text_map, _ = self._get_content_maps(keys, non_local)
                 for key in keys:
                     lines = text_map.pop(key)
-                    text = ''.join(lines)
-                    yield FulltextContentFactory(key, global_map[key], None,
-                                                 text)
+                    yield ChunkedContentFactory(key, global_map[key], None,
+                                                lines)
         else:
             for source, keys in source_keys:
                 if source is parent_maps[0]:
@@ -1443,7 +1444,8 @@
                         buffered = True
                 if not buffered:
                     self._index.add_records([index_entry])
-            elif record.storage_kind == 'fulltext':
+            elif (record.storage_kind == 'fulltext'
+                  or record.storage_kind == 'chunked'):
                 self.add_lines(record.key, parents,
                     split_lines(record.get_bytes_as('fulltext')))
             else:

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-12-03 21:05:01 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-12-11 00:54:36 +0000
@@ -1558,8 +1558,9 @@
         """Assert that storage_kind is a valid storage_kind."""
         self.assertSubset([storage_kind],
             ['mpdiff', 'knit-annotated-ft', 'knit-annotated-delta',
-             'knit-ft', 'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
-             'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'])
+             'knit-ft', 'knit-delta', 'chunked', 'fulltext',
+             'knit-annotated-ft-gz', 'knit-annotated-delta-gz', 'knit-ft-gz',
+             'knit-delta-gz'])
 
     def capture_stream(self, f, entries, on_seen, parents):
         """Capture a stream for testing."""
@@ -1636,9 +1637,11 @@
                 [None, files.get_sha1s([factory.key])[factory.key]])
             self.assertEqual(parent_map[factory.key], factory.parents)
             # self.assertEqual(files.get_text(factory.key),
-            self.assertIsInstance(factory.get_bytes_as('fulltext'), str)
-            self.assertIsInstance(factory.get_bytes_as(factory.storage_kind),
-                str)
+            ft_bytes = factory.get_bytes_as('fulltext')
+            self.assertIsInstance(ft_bytes, str)
+            chunked_bytes = factory.get_bytes_as('chunked')
+            self.assertEqualDiff(ft_bytes, ''.join(chunked_bytes))
+
         self.assertStreamOrder(sort_order, seen, keys)
 
     def assertStreamOrder(self, sort_order, seen, keys):

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-12-03 21:05:01 +0000
+++ b/bzrlib/versionedfile.py	2008-12-11 00:54:36 +0000
@@ -59,6 +59,8 @@
     'bzrlib.knit', 'FTAnnotatedToUnannotated')
 adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
     'bzrlib.knit', 'FTAnnotatedToFullText')
+# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
+#     'bzrlib.knit', 'FTAnnotatedToChunked')
 
 
 class ContentFactory(object):
@@ -84,12 +86,46 @@
         self.parents = None
 
 
+class ChunkedContentFactory(ContentFactory):
+    """Static data content factory.
+
+    This takes a 'chunked' list of strings. The only requirement on 'chunked' is
+    that ''.join(lines) becomes a valid fulltext. A tuple of a single string
+    satisfies this, as does a list of lines.
+
+    :ivar sha1: None, or the sha1 of the content fulltext.
+    :ivar storage_kind: The native storage kind of this factory. Always
+        'fulltext' (for compatibility with clients that don't know 'chunked')
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: A tuple of parent keys for self.key. If the object has
+        no parent information, None (as opposed to () for an empty list of
+        parents).
+     """
+
+    def __init__(self, key, parents, sha1, chunks):
+        """Create a ContentFactory."""
+        self.sha1 = sha1
+        self.storage_kind = 'fulltext' #XXX: This should really be 'chunked'
+        self.key = key
+        self.parents = parents
+        self._chunks = chunks
+
+    def get_bytes_as(self, storage_kind):
+        if storage_kind == 'chunked':
+            return self._chunks
+        elif storage_kind == 'fulltext':
+            return ''.join(self._chunks)
+        raise errors.UnavailableRepresentation(self.key, storage_kind,
+            self.storage_kind)
+
+
 class FulltextContentFactory(ContentFactory):
     """Static data content factory.
 
     This takes a fulltext when created and just returns that during
     get_bytes_as('fulltext').
-    
+
     :ivar sha1: None, or the sha1 of the content fulltext.
     :ivar storage_kind: The native storage kind of this factory. Always
         'fulltext'.
@@ -111,6 +147,8 @@
     def get_bytes_as(self, storage_kind):
         if storage_kind == self.storage_kind:
             return self._text
+        elif storage_kind == 'chunked':
+            return (self._text,)
         raise errors.UnavailableRepresentation(self.key, storage_kind,
             self.storage_kind)
 
@@ -1251,8 +1289,7 @@
                 lines = self._lines[key]
                 parents = self._parents[key]
                 pending.remove(key)
-                yield FulltextContentFactory(key, parents, None,
-                    ''.join(lines))
+                yield ChunkedContentFactory(key, parents, None, lines)
         for versionedfile in self.fallback_versionedfiles:
             for record in versionedfile.get_record_stream(
                 pending, 'unordered', True):
@@ -1422,9 +1459,9 @@
             if lines is not None:
                 if not isinstance(lines, list):
                     raise AssertionError
-                yield FulltextContentFactory((k,), None, 
+                yield ChunkedContentFactory((k,), None,
                         sha1=osutils.sha_strings(lines),
-                        text=''.join(lines))
+                        chunks=lines)
             else:
                 yield AbsentContentFactory((k,))
 

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-10-01 05:40:45 +0000
+++ b/bzrlib/weave.py	2008-12-11 00:54:36 +0000
@@ -122,6 +122,8 @@
     def get_bytes_as(self, storage_kind):
         if storage_kind == 'fulltext':
             return self._weave.get_text(self.key[-1])
+        elif storage_kind == 'chunked':
+            return self._weave.get_lines(self.key[-1])
         else:
             raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
 
@@ -357,7 +359,8 @@
                 raise RevisionNotPresent([record.key[0]], self)
             # adapt to non-tuple interface
             parents = [parent[0] for parent in record.parents]
-            if record.storage_kind == 'fulltext':
+            if (record.storage_kind == 'fulltext'
+                or record.storage_kind == 'chunked'):
                 self.add_lines(record.key[0], parents,
                     split_lines(record.get_bytes_as('fulltext')))
             else:



More information about the bazaar-commits mailing list