Rev 3891: Start working on a ChunkedContentFactory. in http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 11 00:55:01 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked
------------------------------------------------------------
revno: 3891
revision-id: john at arbash-meinel.com-20081211005436-a8bn72zw43b1vd9r
parent: pqm at pqm.ubuntu.com-20081210082822-li6ku9s3k63kjrpr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_record_stream_chunked
timestamp: Wed 2008-12-10 18:54:36 -0600
message:
Start working on a ChunkedContentFactory.
This allows get_bytes_as('chunked') for both FulltextContentFactory,
and for ChunkedContentFactory, as it is a trivial conversion to
go between the two styles.
We will also want to special case when converting 'chunked' into
'lines'. But that is for future work.
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-12-05 15:34:02 +0000
+++ b/bzrlib/knit.py 2008-12-11 00:54:36 +0000
@@ -110,7 +110,7 @@
adapter_registry,
ConstantMapper,
ContentFactory,
- FulltextContentFactory,
+ ChunkedContentFactory,
VersionedFile,
VersionedFiles,
)
@@ -276,11 +276,13 @@
def get_bytes_as(self, storage_kind):
if storage_kind == self.storage_kind:
return self._raw_record
- if storage_kind == 'fulltext' and self._knit is not None:
- return self._knit.get_text(self.key[0])
- else:
- raise errors.UnavailableRepresentation(self.key, storage_kind,
- self.storage_kind)
+ if self._knit is not None:
+ if storage_kind == 'chunked':
+ return self._knit.get_lines(self.key[0])
+ elif storage_kind == 'fulltext':
+ return self._knit.get_text(self.key[0])
+ raise errors.UnavailableRepresentation(self.key, storage_kind,
+ self.storage_kind)
class KnitContent(object):
@@ -1288,9 +1290,8 @@
text_map, _ = self._get_content_maps(keys, non_local)
for key in keys:
lines = text_map.pop(key)
- text = ''.join(lines)
- yield FulltextContentFactory(key, global_map[key], None,
- text)
+ yield ChunkedContentFactory(key, global_map[key], None,
+ lines)
else:
for source, keys in source_keys:
if source is parent_maps[0]:
@@ -1443,7 +1444,8 @@
buffered = True
if not buffered:
self._index.add_records([index_entry])
- elif record.storage_kind == 'fulltext':
+ elif (record.storage_kind == 'fulltext'
+ or record.storage_kind == 'chunked'):
self.add_lines(record.key, parents,
split_lines(record.get_bytes_as('fulltext')))
else:
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2008-12-03 21:05:01 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2008-12-11 00:54:36 +0000
@@ -1558,8 +1558,9 @@
"""Assert that storage_kind is a valid storage_kind."""
self.assertSubset([storage_kind],
['mpdiff', 'knit-annotated-ft', 'knit-annotated-delta',
- 'knit-ft', 'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
- 'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'])
+ 'knit-ft', 'knit-delta', 'chunked', 'fulltext',
+ 'knit-annotated-ft-gz', 'knit-annotated-delta-gz', 'knit-ft-gz',
+ 'knit-delta-gz'])
def capture_stream(self, f, entries, on_seen, parents):
"""Capture a stream for testing."""
@@ -1636,9 +1637,11 @@
[None, files.get_sha1s([factory.key])[factory.key]])
self.assertEqual(parent_map[factory.key], factory.parents)
# self.assertEqual(files.get_text(factory.key),
- self.assertIsInstance(factory.get_bytes_as('fulltext'), str)
- self.assertIsInstance(factory.get_bytes_as(factory.storage_kind),
- str)
+ ft_bytes = factory.get_bytes_as('fulltext')
+ self.assertIsInstance(ft_bytes, str)
+ chunked_bytes = factory.get_bytes_as('chunked')
+ self.assertEqualDiff(ft_bytes, ''.join(chunked_bytes))
+
self.assertStreamOrder(sort_order, seen, keys)
def assertStreamOrder(self, sort_order, seen, keys):
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-12-03 21:05:01 +0000
+++ b/bzrlib/versionedfile.py 2008-12-11 00:54:36 +0000
@@ -59,6 +59,8 @@
'bzrlib.knit', 'FTAnnotatedToUnannotated')
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
'bzrlib.knit', 'FTAnnotatedToFullText')
+# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
+# 'bzrlib.knit', 'FTAnnotatedToChunked')
class ContentFactory(object):
@@ -84,12 +86,46 @@
self.parents = None
+class ChunkedContentFactory(ContentFactory):
+ """Static data content factory.
+
+ This takes a 'chunked' list of strings. The only requirement on 'chunked' is
+ that ''.join(lines) becomes a valid fulltext. A tuple of a single string
+ satisfies this, as does a list of lines.
+
+ :ivar sha1: None, or the sha1 of the content fulltext.
+ :ivar storage_kind: The native storage kind of this factory. Always
+ 'fulltext' (for compatibility with clients that don't know 'chunked')
+ :ivar key: The key of this content. Each key is a tuple with a single
+ string in it.
+ :ivar parents: A tuple of parent keys for self.key. If the object has
+ no parent information, None (as opposed to () for an empty list of
+ parents).
+ """
+
+ def __init__(self, key, parents, sha1, chunks):
+ """Create a ContentFactory."""
+ self.sha1 = sha1
+ self.storage_kind = 'fulltext' #XXX: This should really be 'chunked'
+ self.key = key
+ self.parents = parents
+ self._chunks = chunks
+
+ def get_bytes_as(self, storage_kind):
+ if storage_kind == 'chunked':
+ return self._chunks
+ elif storage_kind == 'fulltext':
+ return ''.join(self._chunks)
+ raise errors.UnavailableRepresentation(self.key, storage_kind,
+ self.storage_kind)
+
+
class FulltextContentFactory(ContentFactory):
"""Static data content factory.
This takes a fulltext when created and just returns that during
get_bytes_as('fulltext').
-
+
:ivar sha1: None, or the sha1 of the content fulltext.
:ivar storage_kind: The native storage kind of this factory. Always
'fulltext'.
@@ -111,6 +147,8 @@
def get_bytes_as(self, storage_kind):
if storage_kind == self.storage_kind:
return self._text
+ elif storage_kind == 'chunked':
+ return (self._text,)
raise errors.UnavailableRepresentation(self.key, storage_kind,
self.storage_kind)
@@ -1251,8 +1289,7 @@
lines = self._lines[key]
parents = self._parents[key]
pending.remove(key)
- yield FulltextContentFactory(key, parents, None,
- ''.join(lines))
+ yield ChunkedContentFactory(key, parents, None, lines)
for versionedfile in self.fallback_versionedfiles:
for record in versionedfile.get_record_stream(
pending, 'unordered', True):
@@ -1422,9 +1459,9 @@
if lines is not None:
if not isinstance(lines, list):
raise AssertionError
- yield FulltextContentFactory((k,), None,
+ yield ChunkedContentFactory((k,), None,
sha1=osutils.sha_strings(lines),
- text=''.join(lines))
+ chunks=lines)
else:
yield AbsentContentFactory((k,))
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2008-10-01 05:40:45 +0000
+++ b/bzrlib/weave.py 2008-12-11 00:54:36 +0000
@@ -122,6 +122,8 @@
def get_bytes_as(self, storage_kind):
if storage_kind == 'fulltext':
return self._weave.get_text(self.key[-1])
+ elif storage_kind == 'chunked':
+ return self._weave.get_lines(self.key[-1])
else:
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
@@ -357,7 +359,8 @@
raise RevisionNotPresent([record.key[0]], self)
# adapt to non-tuple interface
parents = [parent[0] for parent in record.parents]
- if record.storage_kind == 'fulltext':
+ if (record.storage_kind == 'fulltext'
+ or record.storage_kind == 'chunked'):
self.add_lines(record.key[0], parents,
split_lines(record.get_bytes_as('fulltext')))
else:
More information about the bazaar-commits
mailing list