Rev 3772: Finish up the algorithm to stay within a given layer. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer

John Arbash Meinel john at arbash-meinel.com
Tue Oct 14 20:31:53 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer

------------------------------------------------------------
revno: 3772
revision-id: john at arbash-meinel.com-20081014193134-yi1otoetaq96obxf
parent: john at arbash-meinel.com-20081014190433-81nbtayr41p7vr33
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_buffer
timestamp: Tue 2008-10-14 14:31:34 -0500
message:
  Finish up the algorithm to stay within a given layer.
  
  Now when expanding requests, we will only go between layers when we
  the whole thing is small enough that we can read everything.
  In all other cases, we will only prefetch up until the layer boundary.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2008-10-14 19:02:26 +0000
+++ b/bzrlib/btree_index.py	2008-10-14 19:31:34 +0000
@@ -695,6 +695,21 @@
             cached_nodes.add(0)
         return cached_nodes
 
+    def _find_layer_first_and_end(self, offset):
+        """Find the start/stop nodes for the layer corresponding to offset.
+
+        :return: (first, end)
+            first is the first node in this layer
+            end is the first node of the next layer
+        """
+        first = end = 0
+        for roffset in self._row_offsets:
+            first = end
+            end = roffset
+            if offset < roffset:
+                break
+        return first, end
+
     def _expand_nodes(self, node_indexes):
         """Find extra nodes to download.
 
@@ -756,21 +771,29 @@
         else:
             # Expand requests to neighbors until we have at least
             # recommended_pages to request. We only want to expand requests
-            # within a given layer.
+            # within a given layer. We cheat a little bit and assume all
+            # requests will be in the same layer. This is true given the
+            # current design, but if it changes this algorithm may perform
+            # oddly.
+            first = end = None
             new_tips = set(final_nodes)
             while len(final_nodes) < self._recommended_pages and new_tips:
                 next_tips = set()
                 for pos in new_tips:
-                    if pos > 0:
-                        previous = pos - 1
-                        if (previous not in cached_nodes
-                            and previous not in final_nodes):
-                            next_tips.add(previous)
+                    if first is None:
+                        first, end = self._find_layer_first_and_end(pos)
+                    previous = pos - 1
+                    if (previous > 0
+                        and previous not in cached_nodes
+                        and previous not in final_nodes
+                        and previous >= first):
+                        next_tips.add(previous)
                     after = pos + 1
-                    if after < num_pages:
-                        if (after not in cached_nodes
-                            and after not in final_nodes):
-                            next_tips.add(after)
+                    if (after < num_pages
+                        and after not in cached_nodes
+                        and after not in final_nodes
+                        and after < end):
+                        next_tips.add(after)
                     # This would keep us from going bigger than
                     # recommended_pages by only expanding the first nodes.
                     # However, if we are making a 'wide' request, it is

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2008-10-14 19:04:33 +0000
+++ b/bzrlib/tests/test_btree_index.py	2008-10-14 19:31:34 +0000
@@ -1037,6 +1037,13 @@
                            cached_nodes=[0])
         return index
 
+    def make_1000_node_index(self):
+        index = self.make_index(4096*1000, 6)
+        self.prepare_index(index, node_ref_lists=0, key_length=1,
+                           key_count=90000, row_lengths=[1, 9, 990],
+                           cached_nodes=[0])
+        return index
+
     def assertNumPages(self, expected_pages, index, size):
         index._size = size
         self.assertEqual(expected_pages, index._compute_num_pages())
@@ -1058,6 +1065,15 @@
         self.assertNumPages(2, index, 8192)
         self.assertNumPages(76, index, 4096*75 + 10)
 
+    def test__find_layer_start_and_stop(self):
+        index = self.make_1000_node_index()
+        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
+        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
+        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
+        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
+        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
+        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
+
     def test_unknown_size(self):
         # We should not expand if we don't know the file size
         index = self.make_index(None, 10)
@@ -1123,3 +1139,16 @@
         index = self.make_100_node_index()
         self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [12, 13])
         self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [11, 14])
+
+    def test_stay_within_layer(self):
+        index = self.make_1000_node_index()
+        # When expanding a request, we won't read nodes from the next layer
+        self.assertExpandNodes([1, 2, 3, 4, 5, 6], index, [2])
+        self.assertExpandNodes([3, 4, 5, 6, 7, 8, 9], index, [6])
+        self.assertExpandNodes([4, 5, 6, 7, 8, 9], index, [9])
+        self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [10])
+        self.assertExpandNodes([10, 11, 12, 13, 14, 15, 16], index, [13])
+
+        self.set_cached_nodes(index, [0, 4, 12])
+        self.assertExpandNodes([5, 6, 7, 8, 9], index, [7])
+        self.assertExpandNodes([10, 11], index, [11])



More information about the bazaar-commits mailing list