Rev 3775: Code cleanup. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer

Tue Oct 14 21:27:41 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer

------------------------------------------------------------
revno: 3775
revision-id: john at arbash-meinel.com-20081014202722-svyfvmyhcieabhyd
parent: john at arbash-meinel.com-20081014201953-gmbxn75fjj8vcie2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_buffer
timestamp: Tue 2008-10-14 15:27:22 -0500
message:
  Code cleanup.
  
  Move some functions around to better areas. Fix some function names, so that
  they make more sense.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'

--- a/bzrlib/btree_index.py	2008-10-14 20:19:53 +0000
+++ b/bzrlib/btree_index.py	2008-10-14 20:27:22 +0000
@@ -347,12 +347,6 @@
                 # First key triggers the first row
                 rows.append(_LeafBuilderRow())
             key_count += 1
-            # TODO: Flattening the node into a string key and a line should
-            #       probably be put into a pyrex function. We can do a quick
-            #       iter over all the entries to determine the final length,
-            #       and then do a single malloc() rather than lots of
-            #       intermediate mallocs as we build everything up.
-            #       ATM 3 / 13s are spent flattening nodes (10s is compressing)
             string_key, line = _btree_serializer._flatten_node(node,
                                     self.reference_lists)
             self._add_key(string_key, line, rows)
@@ -628,13 +622,7 @@
     def __ne__(self, other):
         return not self.__eq__(other)
 
-    def _get_root_node(self):
-        if self._root_node is None:
-            # We may not have a root node yet
-            self._get_internal_nodes([0])
-        return self._root_node
-
-    def _cache_nodes(self, nodes, cache):
+    def _get_and_cache_nodes(self, nodes):
         """Read nodes and cache them in the lru.
 
         The nodes list supplied is sorted and then read from disk, each node
@@ -647,9 +635,6 @@
 
         :return: A dict of {node_pos: node}
         """
-        if len(nodes) > cache._max_cache:
-            trace.mutter('Requesting %s > %s nodes, not all will be cached',
-                         len(nodes), cache._max_cache)
         found = {}
         start_of_leaves = None
         for node_pos, node in self._read_nodes(sorted(nodes)):
@@ -662,7 +647,6 @@
                     self._internal_node_cache.add(node_pos, node)
                 else:
                     self._leaf_node_cache.add(node_pos, node)
-                cache.add(node_pos, node)
             found[node_pos] = node
         return found
 
@@ -686,29 +670,6 @@
         num_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
         return num_pages
 
-    def _get_cached_nodes(self):
-        """Determine what nodes we already have cached."""
-        cached_nodes = set(self._internal_node_cache.keys())
-        cached_nodes.update(self._leaf_node_cache.keys())
-        if self._root_node is not None:
-            cached_nodes.add(0)
-        return cached_nodes
-
-    def _find_layer_first_and_end(self, offset):
-        """Find the start/stop nodes for the layer corresponding to offset.
-
-        :return: (first, end)
-            first is the first node in this layer
-            end is the first node of the next layer
-        """
-        first = end = 0
-        for roffset in self._row_offsets:
-            first = end
-            end = roffset
-            if offset < roffset:
-                break
-        return first, end
-
     def _expand_nodes(self, node_indexes):
         """Find extra nodes to download.
 
@@ -734,23 +695,22 @@
             if 'index' in debug.debug_flags:
                 trace.mutter('  not expanding without knowing index size')
             return node_indexes
-        # TODO: Should this be cached instead?
         num_pages = self._compute_num_pages()
         # The idea here is to get nodes "next to" the ones we are already
         # getting.
-        cached_nodes = self._get_cached_nodes()
+        cached_keys = self._get_cached_keys()
 
-        # if len(cached_nodes) < 2:
+        # if len(cached_keys) < 2:
         #     # We haven't read enough to justify expansion
         #     return node_indexes
 
         # If reading recommended_pages would read the rest of the index, just
         # do so.
-        if num_pages - len(cached_nodes) <= self._recommended_pages:
+        if num_pages - len(cached_keys) <= self._recommended_pages:
             # Read whatever is left
-            if cached_nodes:
+            if cached_keys:
                 expanded = [x for x in xrange(num_pages)
-                               if x not in cached_nodes]
+                               if x not in cached_keys]
             else:
                 expanded = range(num_pages)
             if 'index' in debug.debug_flags:
@@ -783,13 +743,13 @@
                         first, end = self._find_layer_first_and_end(pos)
                     previous = pos - 1
                     if (previous > 0
-                        and previous not in cached_nodes
+                        and previous not in cached_keys
                         and previous not in final_nodes
                         and previous >= first):
                         next_tips.add(previous)
                     after = pos + 1
                     if (after < num_pages
-                        and after not in cached_nodes
+                        and after not in cached_keys
                         and after not in final_nodes
                         and after < end):
                         next_tips.add(after)
@@ -807,6 +767,35 @@
             trace.mutter('expanded:  %s', final_nodes)
         return final_nodes
 
+    def _find_layer_first_and_end(self, offset):
+        """Find the start/stop nodes for the layer corresponding to offset.
+
+        :return: (first, end)
+            first is the first node in this layer
+            end is the first node of the next layer
+        """
+        first = end = 0
+        for roffset in self._row_offsets:
+            first = end
+            end = roffset
+            if offset < roffset:
+                break
+        return first, end
+
+    def _get_cached_keys(self):
+        """Determine what nodes we already have cached."""
+        cached_keys = set(self._internal_node_cache.keys())
+        cached_keys.update(self._leaf_node_cache.keys())
+        if self._root_node is not None:
+            cached_keys.add(0)
+        return cached_keys
+
+    def _get_root_node(self):
+        if self._root_node is None:
+            # We may not have a root node yet
+            self._get_internal_nodes([0])
+        return self._root_node
+
     def _get_nodes(self, cache, node_indexes):
         found = {}
         needed = []
@@ -819,10 +808,9 @@
             except KeyError:
                 needed.append(idx)
         if not needed:
-            # trace.note('cached: %s\tnodes: %s', self._name[-14:], node_indexes)
             return found
         needed = self._expand_nodes(needed)
-        found.update(self._cache_nodes(needed, cache))
+        found.update(self._get_and_cache_nodes(needed))
         return found
 
     def _get_internal_nodes(self, node_indexes):

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2008-10-14 19:31:34 +0000
+++ b/bzrlib/tests/test_btree_index.py	2008-10-14 20:27:22 +0000
@@ -1012,15 +1012,15 @@
             index._recommended_pages = recommended_pages
         return index
 
-    def set_cached_nodes(self, index, cached_nodes):
-        """Modify the index to give a canned answer for _get_cached_nodes()."""
-        def _get_cached_nodes():
-            cached = set(cached_nodes)
+    def set_cached_keys(self, index, cached_keys):
+        """Modify the index to give a canned answer for _get_cached_keys()."""
+        def _get_cached_keys():
+            cached = set(cached_keys)
             return cached
-        index._get_cached_nodes = _get_cached_nodes
+        index._get_cached_keys = _get_cached_keys
 
     def prepare_index(self, index, node_ref_lists, key_length, key_count,
-                      row_lengths, cached_nodes):
+                      row_lengths, cached_keys):
         """Setup the BTreeGraphIndex with some pre-canned information."""
         index.node_ref_lists = node_ref_lists
         index._key_length = key_length
@@ -1028,20 +1028,20 @@
         index._row_lengths = row_lengths
         index._compute_row_offsets()
         index._root_node = btree_index._InternalNode('internal\noffset=0\n')
-        self.set_cached_nodes(index, cached_nodes)
+        self.set_cached_keys(index, cached_keys)
 
     def make_100_node_index(self):
         index = self.make_index(4096*100, 6)
         self.prepare_index(index, node_ref_lists=0, key_length=1,
                            key_count=1000, row_lengths=[1, 99],
-                           cached_nodes=[0])
+                           cached_keys=[0])
         return index
 
     def make_1000_node_index(self):
         index = self.make_index(4096*1000, 6)
         self.prepare_index(index, node_ref_lists=0, key_length=1,
                            key_count=90000, row_lengths=[1, 9, 990],
-                           cached_nodes=[0])
+                           cached_keys=[0])
         return index
 
     def assertNumPages(self, expected_pages, index, size):
@@ -1094,7 +1094,7 @@
         index = self.make_index(4096*10, 5)
         self.prepare_index(index, node_ref_lists=0, key_length=1,
                            key_count=1000, row_lengths=[1, 9],
-                           cached_nodes=[0, 1, 2, 5, 6])
+                           cached_keys=[0, 1, 2, 5, 6])
         # It should fill the remaining nodes, regardless of the one requested
         self.assertExpandNodes([3, 4, 7, 8, 9], index, [3])
         self.assertExpandNodes([3, 4, 7, 8, 9], index, [8])
@@ -1121,7 +1121,7 @@
 
     def test_stop_at_cached(self):
         index = self.make_100_node_index()
-        self.set_cached_nodes(index, [0, 10, 19])
+        self.set_cached_keys(index, [0, 10, 19])
         self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [11])
         self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [12])
         self.assertExpandNodes([12, 13, 14, 15, 16, 17, 18], index, [15])
@@ -1131,7 +1131,7 @@
 
     def test_cannot_fully_expand(self):
         index = self.make_100_node_index()
-        self.set_cached_nodes(index, [0, 10, 12])
+        self.set_cached_keys(index, [0, 10, 12])
         # We don't go into an endless loop if we are bound by cached nodes
         self.assertExpandNodes([11], index, [11])
 
@@ -1149,6 +1149,6 @@
         self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [10])
         self.assertExpandNodes([10, 11, 12, 13, 14, 15, 16], index, [13])
 
-        self.set_cached_nodes(index, [0, 4, 12])
+        self.set_cached_keys(index, [0, 4, 12])
         self.assertExpandNodes([5, 6, 7, 8, 9], index, [7])
         self.assertExpandNodes([10, 11], index, [11])