Rev 29: disable the alternative multi-way bisecting, and allow the leaf_value cache to be disabled. in http://bzr.arbash-meinel.com/plugins/index2

John Arbash Meinel john at arbash-meinel.com
Wed Jul 2 21:44:15 BST 2008


At http://bzr.arbash-meinel.com/plugins/index2

------------------------------------------------------------
revno: 29
revision-id: john at arbash-meinel.com-20080702204409-s512bff7tus0cb8n
parent: john at arbash-meinel.com-20080702202600-3q9fnpmwedog3j5v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 15:44:09 -0500
message:
  disable the alternative multi-way bisecting, and allow the leaf_value cache to be disabled.
  
  LRUCaches and random access do not play well together.
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-02 20:26:00 +0000
+++ b/btree_index.py	2008-07-02 20:44:09 +0000
@@ -388,7 +388,7 @@
         self._page_size = transport.recommended_page_size()
         self._root_node = None
         # Default max size is 100,000 leave values
-        self._leaf_value_cache = lru_cache.LRUCache(100*1000)
+        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
         self._leaf_node_cache = lru_cache.LRUCache()
         self._internal_node_cache = lru_cache.LRUCache()
         self._key_count = None
@@ -466,13 +466,14 @@
         """Get a bunch of nodes, from cache or disk."""
         found = self._get_nodes(self._leaf_node_cache, node_indexes,
                                 leaf_node_hits)
-        for node in found.itervalues():
-            for key, value in node.keys.iteritems():
-                if key in self._leaf_value_cache:
-                    # Don't add the rest of the keys, we've seen this node
-                    # before.
-                    break
-                self._leaf_value_cache[key] = value
+        if self._leaf_value_cache is not None:
+            for node in found.itervalues():
+                for key, value in node.keys.iteritems():
+                    if key in self._leaf_value_cache:
+                        # Don't add the rest of the keys, we've seen this node
+                        # before.
+                        break
+                    self._leaf_value_cache[key] = value
         return found
 
     def iter_all_entries(self):
@@ -524,20 +525,18 @@
         #       based on which has the fewer number of steps.
         #       There is also the argument that bisect_right is a compiled
         #       function, so there is even more to be gained.
-        # if (len(in_keys) + len(fixed_keys)
-        #     > len(in_keys) * math.log(len(fixed_keys), 2)):
-        iter_steps = len(in_keys) + len(fixed_keys)
-        bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
+        # iter_steps = len(in_keys) + len(fixed_keys)
+        # bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
         if len(in_keys) == 1: # Bisect will always be faster for M = 1
             bisect_shortcut[0] += 1
             return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
-        elif bisect_steps < iter_steps:
-            bisect_shortcut[0] += len(in_keys)
-            offsets = {}
-            for key in in_keys:
-                offsets.setdefault(bisect_right(fixed_keys, key),
-                                   []).append(key)
-            return [(o, offsets[o]) for o in sorted(offsets)]
+        # elif bisect_steps < iter_steps:
+        #     bisect_shortcut[0] += len(in_keys)
+        #     offsets = {}
+        #     for key in in_keys:
+        #         offsets.setdefault(bisect_right(fixed_keys, key),
+        #                            []).append(key)
+        #     return [(o, offsets[o]) for o in sorted(offsets)]
         else:
             bisect_shortcut[1] += len(in_keys)
         in_keys_iter = iter(in_keys)
@@ -611,19 +610,22 @@
         # Find all the keys that are already in our value cache
         global leaf_value_hits, miss_attempts
         needed_keys = []
-        for key in keys:
-            value = self._leaf_value_cache.get(key, None)
-            if value is not None:
-                leaf_value_hits[0] += 1
-                # This key is known not to be here, skip it
-                value, refs = value
-                if self.node_ref_lists:
-                    yield (self, key, value, refs)
+        if self._leaf_value_cache is None:
+            needed_keys = keys
+        else:
+            for key in keys:
+                value = self._leaf_value_cache.get(key, None)
+                if value is not None:
+                    leaf_value_hits[0] += 1
+                    # This key is known not to be here, skip it
+                    value, refs = value
+                    if self.node_ref_lists:
+                        yield (self, key, value, refs)
+                    else:
+                        yield (self, key, value)
                 else:
-                    yield (self, key, value)
-            else:
-                leaf_value_hits[1] += 1
-                needed_keys.append(key)
+                    leaf_value_hits[1] += 1
+                    needed_keys.append(key)
         needed_keys = sorted(needed_keys)
 
         nodes_and_keys = [(0, needed_keys)]



More information about the bazaar-commits mailing list