Rev 4636: small tweaks in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-multibisect

John Arbash Meinel john at arbash-meinel.com
Mon Aug 24 19:34:01 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-multibisect

------------------------------------------------------------
revno: 4636
revision-id: john at arbash-meinel.com-20090824183343-1luof2rjpp8pw950
parent: john at arbash-meinel.com-20090820204851-f320qk0or7revyvr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-multibisect
timestamp: Mon 2009-08-24 13:33:43 -0500
message:
  small tweaks
  
  For some reason, it doesn't seem to be faster than the pure python version...
  very strange.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-08-20 20:48:51 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-08-24 18:33:43 +0000
@@ -31,7 +31,6 @@
     int PyList_Append(object lst, object item) except -1
     int PyList_CheckExact(object)
     PyObject *PyList_GET_ITEM(object, Py_ssize_t i)
-    PyObject *PyList_GetItem(object, Py_ssize_t i) except NULL
     Py_ssize_t PyList_GET_SIZE(object)
 
     char *PyString_AsString(object p) except NULL
@@ -55,9 +54,6 @@
     Py_ssize_t PySequence_GetItem(object, Py_ssize_t i)
 
 
-import bisect
-
-
 cdef extern from "string.h":
     void *memcpy(void *dest, void *src, size_t n)
     void *memchr(void *s, int c, size_t n)
@@ -426,10 +422,6 @@
     return string_key, line
 
 
-cdef object bisect_right
-bisect_right = bisect.bisect_right
-
-
 def _multi_bisect_right(in_keys, fixed_keys):
     """Find the positions where each 'in_key' would fit in fixed_keys.
 
@@ -442,6 +434,7 @@
     """
     cdef PyObject *temp
     cdef long in_pos, fixed_pos, in_length, fixed_length
+    cdef long low, high
 
     if not PyList_CheckExact(in_keys):
         raise TypeError('in_keys must be a list')
@@ -457,15 +450,29 @@
         # fall to the left.
         return [(0, in_keys)]
 
+    in_pos = 0
+    temp = PyList_GET_ITEM(in_keys, in_pos)
+    in_key = <object>temp
     # Find the possible location for the first entry using bisection
-    pos = bisect_right(fixed_keys, in_keys[0])
+    # This is inlining the bisect_right function, because we don't want to cast
+    # back-and-forth to python objects.
+    # First, we do a single check to see if in_keys[0] is actually <=
+    # fixed_keys, this handles a common case where we are looking for lots of
+    # keys at once.
     if PyList_GET_SIZE(in_keys) == 1:
-        # If there is only one key, then we are done.
-        return [(pos, in_keys)]
-    fixed_pos = pos
-    if fixed_pos >= fixed_length:
-        # All the keys occur after the last fixed_key
-        return [(fixed_length, in_keys)]
+        # If there is only one key, then we bisect
+        low = 0
+        high = fixed_length
+        while low < high:
+            fixed_pos = (low + high) / 2;
+            temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
+            fixed_key = <object>temp
+            if in_key < fixed_key:
+                high = fixed_pos
+            else:
+                low = fixed_pos + 1
+        fixed_pos = low
+        return [(fixed_pos, in_keys)]
 
     output = []
 
@@ -474,10 +481,8 @@
     #       example, while cur_in_key < fixed_key, bisect to find its
     #       point, then iterate all matching keys, then bisect (restricted
     #       to only the remainder) for the next one, etc.
-    in_pos = 0
-    temp = PyList_GetItem(in_keys, in_pos)
-    in_key = <object>temp
-    temp = PyList_GetItem(fixed_keys, fixed_pos)
+    fixed_pos = 0
+    temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
     fixed_key = <object>temp
     while in_pos < in_length and fixed_pos < fixed_length:
         if in_key < fixed_key:
@@ -489,7 +494,7 @@
                 if in_pos >= in_length:
                     # We have handled all of the keys
                     return output
-                temp = PyList_GetItem(in_keys, in_pos)
+                temp = PyList_GET_ITEM(in_keys, in_pos)
                 in_key = <object>temp
         # At this point cur_in_key must be >= cur_fixed_key
         # step the cur_fixed_key until we pass the cur key, or walk off
@@ -498,7 +503,7 @@
             fixed_pos = fixed_pos + 1
             if fixed_pos >= fixed_length:
                 break
-            temp = PyList_GetItem(fixed_keys, fixed_pos)
+            temp = PyList_GET_ITEM(fixed_keys, fixed_pos)
             fixed_key = <object>temp
 
     if in_pos < in_length:
@@ -508,7 +513,7 @@
         in_pos = in_pos + 1
         PyList_Append(output, (fixed_length, cur_keys))
         while in_pos < in_length:
-            temp = PyList_GetItem(in_keys, in_pos)
+            temp = PyList_GET_ITEM(in_keys, in_pos)
             in_key = <object>temp
             PyList_Append(cur_keys, in_key)
             in_pos = in_pos + 1



More information about the bazaar-commits mailing list