Rev 3880: Move the sorting into each index, and customize it for Kndx access. in http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream

John Arbash Meinel john at arbash-meinel.com
Thu Dec 4 20:59:16 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/sorted_get_record_stream

------------------------------------------------------------
revno: 3880
revision-id: john at arbash-meinel.com-20081204205915-0ubzh41iryx4f0t1
parent: john at arbash-meinel.com-20081204204153-xtyxecjor7itx91r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: sorted_get_record_stream
timestamp: Thu 2008-12-04 14:59:15 -0600
message:
  Move the sorting into each index, and customize it for Kndx access.
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-12-04 20:41:53 +0000
+++ b/bzrlib/knit.py	2008-12-04 20:59:15 +0000
@@ -1263,20 +1263,12 @@
                     present_keys.append(key)
                     source_keys[-1][1].append(key)
             # We have been requested to return these records in an order that
-            # suits us. So we sort by the index_memo. Generally each entry is
-            # (index, start, length). We want to group by index so that we read
-            # from one pack at a time, and then we group by start so that we
-            # read from the beginning of the file to the end.
-            # This is a little bit iffy, because index_memo is technically
-            # "opaque", but it works with current formats and would otherwise
-            # require passing our custom dict 'positions' back to self._index
-            # for it to figure out that we could sort based on index_memo.
-            def get_index_memo(key):
-                return positions[key][1]
+            # suits us. So we ask the index to give us an optimally sorted
+            # order.
             for source, sub_keys in source_keys:
                 if source is parent_maps[0]:
-                    # We don't need to sort the keys for fallbacks
-                    sub_keys.sort(key=get_index_memo)
+                    # Only sort the keys for this VF
+                    self._index._sort_keys_by_io(sub_keys, positions)
         absent_keys = keys - set(global_map)
         for key in absent_keys:
             yield AbsentContentFactory(key)
@@ -2136,6 +2128,26 @@
         else:
             self._mode = 'r'
 
+    def _sort_keys_by_io(self, keys, positions):
+        """Figure out an optimal order to read the records for the given keys.
+
+        Sort keys, grouped by index and sorted by position.
+
+        :param keys: A list of keys whose records we want to read. This will be
+            sorted 'in-place'.
+        :param positions: A dict, such as the one returned by
+            _get_components_positions()
+        :return: None
+        """
+        def get_index_memo(key):
+            index_memo = positions[key][1]
+            # Group by prefix and position. index_memo[0] is the key, so it is
+            # (file_id, revision_id) and we don't want to sort on revision_id,
+            # index_memo[1] is the position, and index_memo[2] is the size,
+            # which doesn't matter for the sort
+            return index_memo[0][:-1], index_memo[1]
+        return keys.sort(key=get_index_memo)
+
     def _split_key(self, key):
         """Split key into a prefix and suffix."""
         return key[:-1], key[-1]
@@ -2395,6 +2407,21 @@
         bits = node[2][1:].split(' ')
         return node[0], int(bits[0]), int(bits[1])
 
+    def _sort_keys_by_io(self, keys, positions):
+        """Figure out an optimal order to read the records for the given keys.
+
+        Sort keys, grouped by index and sorted by position.
+
+        :param keys: A list of keys whose records we want to read. This will be
+            sorted 'in-place'.
+        :param positions: A dict, such as the one returned by
+            _get_components_positions()
+        :return: None
+        """
+        def get_index_memo(key):
+            return positions[key][1]
+        return keys.sort(key=get_index_memo)
+
 
 class _KnitKeyAccess(object):
     """Access to records in .knit files."""



More information about the bazaar-commits mailing list