Rev 4636: small tweaks in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-multibisect
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 24 19:34:01 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-multibisect
------------------------------------------------------------
revno: 4636
revision-id: john at arbash-meinel.com-20090824183343-1luof2rjpp8pw950
parent: john at arbash-meinel.com-20090820204851-f320qk0or7revyvr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-multibisect
timestamp: Mon 2009-08-24 13:33:43 -0500
message:
small tweaks
For some reason, it doesn't seem to be faster than the pure python version...
very strange.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-08-20 20:48:51 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-08-24 18:33:43 +0000
@@ -31,7 +31,6 @@
int PyList_Append(object lst, object item) except -1
int PyList_CheckExact(object)
PyObject *PyList_GET_ITEM(object, Py_ssize_t i)
- PyObject *PyList_GetItem(object, Py_ssize_t i) except NULL
Py_ssize_t PyList_GET_SIZE(object)
char *PyString_AsString(object p) except NULL
@@ -55,9 +54,6 @@
Py_ssize_t PySequence_GetItem(object, Py_ssize_t i)
-import bisect
-
-
cdef extern from "string.h":
void *memcpy(void *dest, void *src, size_t n)
void *memchr(void *s, int c, size_t n)
@@ -426,10 +422,6 @@
return string_key, line
-cdef object bisect_right
-bisect_right = bisect.bisect_right
-
-
def _multi_bisect_right(in_keys, fixed_keys):
"""Find the positions where each 'in_key' would fit in fixed_keys.
@@ -442,6 +434,7 @@
"""
cdef PyObject *temp
cdef long in_pos, fixed_pos, in_length, fixed_length
+ cdef long low, high
if not PyList_CheckExact(in_keys):
raise TypeError('in_keys must be a list')
@@ -457,15 +450,29 @@
# fall to the left.
return [(0, in_keys)]
+ in_pos = 0
+ temp = PyList_GET_ITEM(in_keys, in_pos)
+ in_key = <object>temp
# Find the possible location for the first entry using bisection
- pos = bisect_right(fixed_keys, in_keys[0])
+ # This is inlining the bisect_right function, because we don't want to cast
+ # back-and-forth to python objects.
+ # First, we do a single check to see if in_keys[0] is actually <=
+ # fixed_keys, this handles a common case where we are looking for lots of
+ # keys at once.
if PyList_GET_SIZE(in_keys) == 1:
- # If there is only one key, then we are done.
- return [(pos, in_keys)]
- fixed_pos = pos
- if fixed_pos >= fixed_length:
- # All the keys occur after the last fixed_key
- return [(fixed_length, in_keys)]
+ # If there is only one key, then we bisect
+ low = 0
+ high = fixed_length
+ while low < high:
+ fixed_pos = (low + high) / 2;
+ temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
+ fixed_key = <object>temp
+ if in_key < fixed_key:
+ high = fixed_pos
+ else:
+ low = fixed_pos + 1
+ fixed_pos = low
+ return [(fixed_pos, in_keys)]
output = []
@@ -474,10 +481,8 @@
# example, while cur_in_key < fixed_key, bisect to find its
# point, then iterate all matching keys, then bisect (restricted
# to only the remainder) for the next one, etc.
- in_pos = 0
- temp = PyList_GetItem(in_keys, in_pos)
- in_key = <object>temp
- temp = PyList_GetItem(fixed_keys, fixed_pos)
+ fixed_pos = 0
+ temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
fixed_key = <object>temp
while in_pos < in_length and fixed_pos < fixed_length:
if in_key < fixed_key:
@@ -489,7 +494,7 @@
if in_pos >= in_length:
# We have handled all of the keys
return output
- temp = PyList_GetItem(in_keys, in_pos)
+ temp = PyList_GET_ITEM(in_keys, in_pos)
in_key = <object>temp
# At this point cur_in_key must be >= cur_fixed_key
# step the cur_fixed_key until we pass the cur key, or walk off
@@ -498,7 +503,7 @@
fixed_pos = fixed_pos + 1
if fixed_pos >= fixed_length:
break
- temp = PyList_GetItem(fixed_keys, fixed_pos)
+ temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
fixed_key = <object>temp
if in_pos < in_length:
@@ -508,7 +513,7 @@
in_pos = in_pos + 1
PyList_Append(output, (fixed_length, cur_keys))
while in_pos < in_length:
- temp = PyList_GetItem(in_keys, in_pos)
+ temp = PyList_GET_ITEM(in_keys, in_pos)
in_key = <object>temp
PyList_Append(cur_keys, in_key)
in_pos = in_pos + 1
More information about the bazaar-commits
mailing list