Rev 3384: get_record_stream for fulltexts working (but note extreme memory use!). in http://people.ubuntu.com/~robertc/baz2.0/shallow-branch
Robert Collins
robertc at robertcollins.net
Mon Jun 23 02:13:25 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/shallow-branch
------------------------------------------------------------
revno: 3384
revision-id: robertc at robertcollins.net-20080623011320-c5952pwuvi9m6hpp
parent: robertc at robertcollins.net-20080623001926-8vos9qr0xgbm74wu
committer: Robert Collins <robertc at robertcollins.net>
branch nick: stacking-knits
timestamp: Mon 2008-06-23 11:13:20 +1000
message:
get_record_stream for fulltexts working (but note extreme memory use!).
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-06-23 00:19:26 +0000
+++ b/bzrlib/knit.py 2008-06-23 01:13:20 +0000
@@ -964,9 +964,13 @@
text_map, contents_map = self._get_content_maps([key])
return contents_map[key]
- def _get_content_maps(self, keys):
+ def _get_content_maps(self, keys, nonlocal_keys=None):
"""Produce maps of text and KnitContents
+ :param keys: The keys to produce content maps for.
+ :param nonlocal_keys: An iterable of keys(possibly intersecting keys)
+ which are known to not be in this knit, but rather in one of the
+ fallback knits.
:return: (text_map, content_map) where text_map contains the texts for
the requested versions and content_map contains the KnitContents.
"""
@@ -976,12 +980,32 @@
# final output.
keys = list(keys)
multiple_versions = len(keys) != 1
- record_map = self._get_record_map(keys)
+ record_map = self._get_record_map(keys, allow_missing=True)
text_map = {}
content_map = {}
final_content = {}
+ if nonlocal_keys is None:
+ nonlocal_keys = set()
+ else:
+ nonlocal_keys = frozenset(nonlocal_keys)
+ missing_keys = set(nonlocal_keys)
+ for source in self._fallback_vfs:
+ if not missing_keys:
+ break
+ for record in source.get_record_stream(missing_keys,
+ 'unordered', True):
+ if record.storage_kind == 'absent':
+ continue
+ missing_keys.remove(record.key)
+ bytes = record.get_bytes_as('fulltext')
+ lines = split_lines(record.get_bytes_as('fulltext'))
+ text_map[record.key] = lines
+ final_content[record.key] = PlainKnitContent(lines, record.key)
for key in keys:
+ if key in nonlocal_keys:
+ # already handled
+ continue
components = []
cursor = key
while cursor is not None:
@@ -1173,7 +1197,8 @@
if include_delta_closure:
# XXX: get_content_maps performs its own index queries; allow state
# to be passed in.
- text_map, _ = self._get_content_maps(present_keys)
+ text_map, _ = self._get_content_maps(present_keys,
+ needed_from_fallback - absent_keys)
for key in present_keys:
yield FulltextContentFactory(key, global_map[key], None,
''.join(text_map[key]))
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2008-06-23 00:19:26 +0000
+++ b/bzrlib/tests/test_knit.py 2008-06-23 01:13:20 +0000
@@ -1424,6 +1424,92 @@
self.assertEqual([("get_parent_map", set([key_basis, key_missing]))],
basis.calls)
+ def test_get_record_stream_unordered_fulltexts(self):
+ # records from the test knit are answered without asking the basis:
+ basis, test = self.get_basis_and_test_knit()
+ key = ('foo',)
+ key_basis = ('bar',)
+ key_missing = ('missing',)
+ test.add_lines(key, (), ['foo\n'])
+ records = list(test.get_record_stream([key], 'unordered', True))
+ self.assertEqual(1, len(records))
+ self.assertEqual([], basis.calls)
+ # Missing (from test knit) objects are retrieved from the basis:
+ basis.add_lines(key_basis, (), ['foo\n', 'bar\n'])
+ basis.calls = []
+ records = list(test.get_record_stream([key_basis, key_missing],
+ 'unordered', True))
+ self.assertEqual(2, len(records))
+ calls = list(basis.calls)
+ for record in records:
+ self.assertSubset([record.key], (key_basis, key_missing))
+ if record.key == key_missing:
+ self.assertIsInstance(record, AbsentContentFactory)
+ else:
+ reference = list(basis.get_record_stream([key_basis],
+ 'unordered', True))[0]
+ self.assertEqual(reference.key, record.key)
+ self.assertEqual(reference.sha1, record.sha1)
+ self.assertEqual(reference.storage_kind, record.storage_kind)
+ self.assertEqual(reference.get_bytes_as(reference.storage_kind),
+ record.get_bytes_as(record.storage_kind))
+ self.assertEqual(reference.get_bytes_as('fulltext'),
+ record.get_bytes_as('fulltext'))
+ # Its not strictly minimal, but it seems reasonable for now for it to
+ # ask which fallbacks have which parents.
+ self.assertEqual([
+ ("get_parent_map", set([key_basis, key_missing])),
+ ("get_record_stream", [key_basis], 'unordered', True)],
+ calls)
+
+ def test_get_record_stream_ordered_fulltexts(self):
+ # ordering is preserved down into the fallback store.
+ basis, test = self.get_basis_and_test_knit()
+ key = ('foo',)
+ key_basis = ('bar',)
+ key_basis_2 = ('quux',)
+ key_missing = ('missing',)
+ test.add_lines(key, (key_basis,), ['foo\n'])
+ # Missing (from test knit) objects are retrieved from the basis:
+ basis.add_lines(key_basis, (key_basis_2,), ['foo\n', 'bar\n'])
+ basis.add_lines(key_basis_2, (), ['quux\n'])
+ basis.calls = []
+ # ask for in non-topological order
+ records = list(test.get_record_stream(
+ [key, key_basis, key_missing, key_basis_2], 'topological', True))
+ self.assertEqual(4, len(records))
+ results = []
+ for record in records:
+ self.assertSubset([record.key],
+ (key_basis, key_missing, key_basis_2, key))
+ if record.key == key_missing:
+ self.assertIsInstance(record, AbsentContentFactory)
+ else:
+ results.append((record.key, record.sha1, record.storage_kind,
+ record.get_bytes_as('fulltext')))
+ calls = list(basis.calls)
+ order = [record[0] for record in results]
+ self.assertEqual([key_basis_2, key_basis, key], order)
+ for result in results:
+ if result[0] == key:
+ source = test
+ else:
+ source = basis
+ record = source.get_record_stream([result[0]], 'unordered',
+ True).next()
+ self.assertEqual(record.key, result[0])
+ self.assertEqual(record.sha1, result[1])
+ self.assertEqual(record.storage_kind, result[2])
+ self.assertEqual(record.get_bytes_as('fulltext'), result[3])
+ # Its not strictly minimal, but it seems reasonable for now for it to
+ # ask which fallbacks have which parents.
+ self.assertEqual([
+ ("get_parent_map", set([key_basis, key_basis_2, key_missing])),
+ # unordered is asked for by the underlying worker as it still
+ # buffers everything while answering - which is a problem!
+ ("get_record_stream", [key_basis_2, key_basis], 'unordered', True)],
+ calls)
+
def test_get_record_stream_unordered_deltas(self):
# records from the test knit are answered without asking the basis:
basis, test = self.get_basis_and_test_knit()
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-06-19 21:54:06 +0000
+++ b/bzrlib/versionedfile.py 2008-06-23 01:13:20 +0000
@@ -540,7 +540,7 @@
return self._backing_vf.get_parent_map(keys)
def get_record_stream(self, keys, sort_order, include_delta_closure):
- self.calls.append(("get_record_stream", keys, sort_order,
+ self.calls.append(("get_record_stream", list(keys), sort_order,
include_delta_closure))
return self._backing_vf.get_record_stream(keys, sort_order,
include_delta_closure)
More information about the bazaar-commits
mailing list