Rev 28: Consider bisecting more often when relevant, but it doesn't seem to help a whole lot. in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 2 21:26:06 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 28
revision-id: john at arbash-meinel.com-20080702202600-3q9fnpmwedog3j5v
parent: john at arbash-meinel.com-20080702201701-f40mgcxxf229qhei
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-02 15:26:00 -0500
message:
Consider bisecting more often when relevant, but it doesn't seem to help a whole lot.
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 20:17:01 +0000
+++ b/btree_index.py 2008-07-02 20:26:00 +0000
@@ -20,6 +20,7 @@
import array
from bisect import bisect_right
from copy import deepcopy
+import math
import sha
import struct
import tempfile
@@ -525,9 +526,18 @@
# function, so there is even more to be gained.
# if (len(in_keys) + len(fixed_keys)
# > len(in_keys) * math.log(len(fixed_keys), 2)):
+ iter_steps = len(in_keys) + len(fixed_keys)
+ bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
if len(in_keys) == 1: # Bisect will always be faster for M = 1
bisect_shortcut[0] += 1
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
+ elif bisect_steps < iter_steps:
+ bisect_shortcut[0] += len(in_keys)
+ offsets = {}
+ for key in in_keys:
+ offsets.setdefault(bisect_right(fixed_keys, key),
+ []).append(key)
+ return [(o, offsets[o]) for o in sorted(offsets)]
else:
bisect_shortcut[1] += len(in_keys)
in_keys_iter = iter(in_keys)
@@ -541,6 +551,11 @@
output = []
cur_out = []
+ # TODO: Another possibility is that rather than iterating on each side,
+ # we could use a combination of bisecting and iterating. For
+ # example, while cur_in_key < fixed_key, bisect to find its
+ # point, then iterate all matching keys, then bisect (restricted
+ # to only the remainder) for the next one, etc.
try:
while True:
if cur_in_key < cur_fixed_key:
More information about the bazaar-commits
mailing list