Rev 3879: KVF.get_record_stream('unordered') now returns the records based on I/O ordering. in http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 4 20:41:57 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream
------------------------------------------------------------
revno: 3879
revision-id: john at arbash-meinel.com-20081204204153-xtyxecjor7itx91r
parent: pqm at pqm.ubuntu.com-20081202015700-3mc9dola31w7h5h4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: sorted_get_record_stream
timestamp: Thu 2008-12-04 14:41:53 -0600
message:
KVF.get_record_stream('unordered') now returns the records based on I/O ordering.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2008-12-01 23:20:22 +0000
+++ b/NEWS 2008-12-04 20:41:53 +0000
@@ -36,6 +36,12 @@
INTERNALS:
+ * ``KnitVersionedFiles.get_record_stream()`` will now chose a
+ more optimal ordering when the keys are requested 'unordered'.
+ Previously the order was fully random, now the records should be
+ returned from each pack in turn, in forward I/O order.
+ (John Arbash Meinel)
+
bzr 1.10rc1 2008-11-28
----------------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-11-27 08:26:50 +0000
+++ b/bzrlib/knit.py 2008-12-04 20:41:53 +0000
@@ -1262,6 +1262,21 @@
for key in parent_map:
present_keys.append(key)
source_keys[-1][1].append(key)
+ # We have been requested to return these records in an order that
+ # suits us. So we sort by the index_memo. Generally each entry is
+ # (index, start, length). We want to group by index so that we read
+ # from one pack at a time, and then we group by start so that we
+ # read from the beginning of the file to the end.
+ # This is a little bit iffy, because index_memo is technically
+ # "opaque", but it works with current formats and would otherwise
+ # require passing our custom dict 'positions' back to self._index
+ # for it to figure out that we could sort based on index_memo.
+ def get_index_memo(key):
+ return positions[key][1]
+ for source, sub_keys in source_keys:
+ if source is parent_maps[0]:
+ # We don't need to sort the keys for fallbacks
+ sub_keys.sort(key=get_index_memo)
absent_keys = keys - set(global_map)
for key in absent_keys:
yield AbsentContentFactory(key)
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2008-11-27 07:25:52 +0000
+++ b/bzrlib/tests/test_knit.py 2008-12-04 20:41:53 +0000
@@ -659,6 +659,44 @@
vf.iter_lines_added_or_present_in_keys, keys)
self.assertEqual([2, 1, 1], reload_counter)
+ def test_get_record_stream_yields_disk_sorted_order(self):
+ # if we get 'unordered' pick a semi-optimal order for reading. The
+ # order should be grouped by pack file, and then by position in file
+ repo = self.make_repository('test', format='pack-0.92')
+ repo.lock_write()
+ self.addCleanup(repo.unlock)
+ repo.start_write_group()
+ vf = repo.texts
+ vf.add_lines(('f-id', 'rev-5'), [('f-id', 'rev-4')], ['lines\n'])
+ vf.add_lines(('f-id', 'rev-1'), [], ['lines\n'])
+ vf.add_lines(('f-id', 'rev-2'), [('f-id', 'rev-1')], ['lines\n'])
+ repo.commit_write_group()
+ # We inserted them as rev-5, rev-1, rev-2, we should get them back in
+ # the same order
+ stream = vf.get_record_stream([('f-id', 'rev-1'), ('f-id', 'rev-5'),
+ ('f-id', 'rev-2')], 'unordered', False)
+ keys = [r.key for r in stream]
+ self.assertEqual([('f-id', 'rev-5'), ('f-id', 'rev-1'),
+ ('f-id', 'rev-2')], keys)
+ repo.start_write_group()
+ vf.add_lines(('f-id', 'rev-4'), [('f-id', 'rev-3')], ['lines\n'])
+ vf.add_lines(('f-id', 'rev-3'), [('f-id', 'rev-2')], ['lines\n'])
+ vf.add_lines(('f-id', 'rev-6'), [('f-id', 'rev-5')], ['lines\n'])
+ repo.commit_write_group()
+ # Request in random order, to make sure the output order isn't based on
+ # the request
+ request_keys = set(('f-id', 'rev-%d' % i) for i in range(1, 7))
+ stream = vf.get_record_stream(request_keys, 'unordered', False)
+ keys = [r.key for r in stream]
+ # We want to get the keys back in disk order, but it doesn't matter
+ # which pack we read from first. So this can come back in 2 orders
+ alt1 = [('f-id', 'rev-%d' % i) for i in [4, 3, 6, 5, 1, 2]]
+ alt2 = [('f-id', 'rev-%d' % i) for i in [5, 1, 2, 4, 3, 6]]
+ if keys != alt1 and keys != alt2:
+ self.fail('Returned key order did not match either expected order.'
+ ' expected %s or %s, not %s'
+ % (alt1, alt2, keys))
+
class LowLevelKnitDataTests(TestCase):
More information about the bazaar-commits
mailing list