Rev 28: Consider bisecting more often when relevant, but it doesn't seem to help a whole lot. in http://bzr.arbash-meinel.com/plugins/index2

John Arbash Meinel john at arbash-meinel.com
Wed Jul 2 21:26:06 BST 2008


At http://bzr.arbash-meinel.com/plugins/index2

------------------------------------------------------------
revno: 28
revision-id: john at arbash-meinel.com-20080702202600-3q9fnpmwedog3j5v
parent: john at arbash-meinel.com-20080702201701-f40mgcxxf229qhei
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 15:26:00 -0500
message:
  Consider bisecting more often when relevant, but it doesn't seem to help a whole lot.
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-02 20:17:01 +0000
+++ b/btree_index.py	2008-07-02 20:26:00 +0000
@@ -20,6 +20,7 @@
 import array
 from bisect import bisect_right
 from copy import deepcopy
+import math
 import sha
 import struct
 import tempfile
@@ -525,9 +526,18 @@
         #       function, so there is even more to be gained.
         # if (len(in_keys) + len(fixed_keys)
         #     > len(in_keys) * math.log(len(fixed_keys), 2)):
+        iter_steps = len(in_keys) + len(fixed_keys)
+        bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
         if len(in_keys) == 1: # Bisect will always be faster for M = 1
             bisect_shortcut[0] += 1
             return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
+        elif bisect_steps < iter_steps:
+            bisect_shortcut[0] += len(in_keys)
+            offsets = {}
+            for key in in_keys:
+                offsets.setdefault(bisect_right(fixed_keys, key),
+                                   []).append(key)
+            return [(o, offsets[o]) for o in sorted(offsets)]
         else:
             bisect_shortcut[1] += len(in_keys)
         in_keys_iter = iter(in_keys)
@@ -541,6 +551,11 @@
         output = []
         cur_out = []
 
+        # TODO: Another possibility is that rather than iterating on each side,
+        #       we could use a combination of bisecting and iterating. For
+        #       example, while cur_in_key < fixed_key, bisect to find its
+        #       point, then iterate all matching keys, then bisect (restricted
+        #       to only the remainder) for the next one, etc.
         try:
             while True:
                 if cur_in_key < cur_fixed_key:



More information about the bazaar-commits mailing list