Rev 3880: Move the sorting into each index, and customize it for Kndx access. in http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 4 20:59:16 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream
------------------------------------------------------------
revno: 3880
revision-id: john at arbash-meinel.com-20081204205915-0ubzh41iryx4f0t1
parent: john at arbash-meinel.com-20081204204153-xtyxecjor7itx91r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: sorted_get_record_stream
timestamp: Thu 2008-12-04 14:59:15 -0600
message:
Move the sorting into each index, and customize it for Kndx access.
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-12-04 20:41:53 +0000
+++ b/bzrlib/knit.py 2008-12-04 20:59:15 +0000
@@ -1263,20 +1263,12 @@
present_keys.append(key)
source_keys[-1][1].append(key)
# We have been requested to return these records in an order that
- # suits us. So we sort by the index_memo. Generally each entry is
- # (index, start, length). We want to group by index so that we read
- # from one pack at a time, and then we group by start so that we
- # read from the beginning of the file to the end.
- # This is a little bit iffy, because index_memo is technically
- # "opaque", but it works with current formats and would otherwise
- # require passing our custom dict 'positions' back to self._index
- # for it to figure out that we could sort based on index_memo.
- def get_index_memo(key):
- return positions[key][1]
+ # suits us. So we ask the index to give us an optimally sorted
+ # order.
for source, sub_keys in source_keys:
if source is parent_maps[0]:
- # We don't need to sort the keys for fallbacks
- sub_keys.sort(key=get_index_memo)
+ # Only sort the keys for this VF
+ self._index._sort_keys_by_io(sub_keys, positions)
absent_keys = keys - set(global_map)
for key in absent_keys:
yield AbsentContentFactory(key)
@@ -2136,6 +2128,26 @@
else:
self._mode = 'r'
+ def _sort_keys_by_io(self, keys, positions):
+ """Figure out an optimal order to read the records for the given keys.
+
+ Sort keys, grouped by index and sorted by position.
+
+ :param keys: A list of keys whose records we want to read. This will be
+ sorted 'in-place'.
+ :param positions: A dict, such as the one returned by
+ _get_components_positions()
+ :return: None
+ """
+ def get_index_memo(key):
+ index_memo = positions[key][1]
+ # Group by prefix and position. index_memo[0] is the key, so it is
+ # (file_id, revision_id) and we don't want to sort on revision_id,
+ # index_memo[1] is the position, and index_memo[2] is the size,
+ # which doesn't matter for the sort
+ return index_memo[0][:-1], index_memo[1]
+ return keys.sort(key=get_index_memo)
+
def _split_key(self, key):
"""Split key into a prefix and suffix."""
return key[:-1], key[-1]
@@ -2395,6 +2407,21 @@
bits = node[2][1:].split(' ')
return node[0], int(bits[0]), int(bits[1])
+ def _sort_keys_by_io(self, keys, positions):
+ """Figure out an optimal order to read the records for the given keys.
+
+ Sort keys, grouped by index and sorted by position.
+
+ :param keys: A list of keys whose records we want to read. This will be
+ sorted 'in-place'.
+ :param positions: A dict, such as the one returned by
+ _get_components_positions()
+ :return: None
+ """
+ def get_index_memo(key):
+ return positions[key][1]
+ return keys.sort(key=get_index_memo)
+
class _KnitKeyAccess(object):
"""Access to records in .knit files."""
More information about the bazaar-commits
mailing list