Rev 3666: Shave off some more time by using exact accessors. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

John Arbash Meinel john at arbash-meinel.com
Thu Aug 21 23:28:41 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

------------------------------------------------------------
revno: 3666
revision-id: john at arbash-meinel.com-20080821222839-xy2yufgx6m6zzzt2
parent: john at arbash-meinel.com-20080821215857-xfk04aivsp0ntvpd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 17:28:39 -0500
message:
  Shave off some more time by using exact accessors.
  Basically, the inner access is using PyTuple and PyString functions
  instead of their PyObject counterparts.
modified:
  bzrlib/_parse_btree_c.pyx      _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
-------------- next part --------------
=== modified file 'bzrlib/_parse_btree_c.pyx'
--- a/bzrlib/_parse_btree_c.pyx	2008-08-21 21:58:57 +0000
+++ b/bzrlib/_parse_btree_c.pyx	2008-08-21 22:28:39 +0000
@@ -20,13 +20,21 @@
     ctypedef unsigned size_t
 
 cdef extern from "Python.h":
+    ctypedef struct PyObject:
+        pass
     int PyList_Append(object lst, object item) except -1
 
-    char *PyString_AsString(object p)
+    char *PyString_AsString(object p) except NULL
     object PyString_FromStringAndSize(char *, Py_ssize_t)
     int PyString_CheckExact(object s)
+    int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *)
     Py_ssize_t PyString_Size(object p)
+    Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *)
+    char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *)
+    int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len)
     int PyTuple_CheckExact(object t)
+    Py_ssize_t PyTuple_GET_SIZE(object t)
+    PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index)
 
 cdef extern from "string.h":
     void *memcpy(void *dest, void *src, size_t n)
@@ -125,6 +133,9 @@
                                                    last - self._start)))
                     raise AssertionError(failure_string)
             # capture the key string
+            # TODO: Consider using PyIntern_FromString, the only caveat is that
+            # it assumes a NULL-terminated string, so we have to check if
+            # temp_ptr[0] == c'\0' or some other char.
             key_element = safe_string_from_size(self._start,
                                                 temp_ptr - self._start)
             # advance our pointer
@@ -257,17 +268,32 @@
     """
     cdef Py_ssize_t flat_len
     cdef Py_ssize_t key_len
+    cdef Py_ssize_t node_len
+    cdef PyObject * val
     cdef char * value
     cdef Py_ssize_t value_len
     cdef char * out
-    cdef Py_ssize_t ref_len
+    cdef Py_ssize_t refs_len
     cdef Py_ssize_t next_len
     cdef int first_ref_list
     cdef int first_reference
-    cdef int first_bit
+    cdef int i
+    cdef PyObject *ref_bit
+    cdef Py_ssize_t ref_bit_len
 
+    if not PyTuple_CheckExact(node):
+        raise TypeError('We expected a tuple() for node not: %s'
+            % type(node))
+    node_len = PyTuple_GET_SIZE(node)
+    if reference_lists:
+        if node_len != 4:
+            raise ValueError('With ref_lists, we expected 4 entries not: %s'
+                % len(node))
+    elif node_len < 3:
+        raise ValueError('Without ref_lists, we need at least 3 entries not: %s'
+            % len(node))
     # I don't expect that we can do faster than string.join()
-    string_key = '\0'.join(node[1])
+    string_key = '\0'.join(<object>PyTuple_GET_ITEM_ptr_object(node, 1))
 
     # TODO: instead of using string joins, precompute the final string length,
     #       and then malloc a single string and copy everything in.
@@ -281,7 +307,7 @@
     # ref_list := ref (CR ref)*
     # ref := BYTES (NULL BYTES)*
     # value := BYTES
-    ref_len = 0
+    refs_len = 0
     if reference_lists:
         # Figure out how many bytes it will take to store the references
         ref_lists = node[3]
@@ -290,27 +316,40 @@
             # If there are no nodes, we don't need to do any work
             # Otherwise we will need (len - 1) '\t' characters to separate
             # the reference lists
-            ref_len = ref_len + (next_len - 1)
+            refs_len = refs_len + (next_len - 1)
             for ref_list in ref_lists:
                 next_len = len(ref_list)
                 if next_len > 0:
                     # We will need (len - 1) '\r' characters to separate the
                     # references
-                    ref_len = ref_len + (next_len - 1)
+                    refs_len = refs_len + (next_len - 1)
                     for reference in ref_list:
-                        next_len = len(reference)
+                        if not PyTuple_CheckExact(reference):
+                            raise TypeError(
+                                'We expect references to be tuples not: %s'
+                                % type(reference))
+                        next_len = PyTuple_GET_SIZE(reference)
                         if next_len > 0:
                             # We will need (len - 1) '\x00' characters to
                             # separate the reference key
-                            ref_len = ref_len + (next_len - 1)
-                            for ref in reference:
-                                ref_len = ref_len + PyString_Size(ref)
+                            refs_len = refs_len + (next_len - 1)
+                            for i from 0 <= i < next_len:
+                                ref_bit = PyTuple_GET_ITEM_ptr_object(reference, i)
+                                if not PyString_CheckExact(<object>ref_bit):
+                                    raise TypeError('We expect reference bits'
+                                        ' to be strings not: %s'
+                                        % type(<object>ref_bit))
+                                refs_len = refs_len + PyString_GET_SIZE_ptr(ref_bit)
 
     # So we have the (key NULL refs NULL value LF)
     key_len = PyString_Size(string_key)
-    value = PyString_AsString(node[2])
-    value_len = PyString_Size(node[2])
-    flat_len = (key_len + 1 + ref_len + 1 + value_len + 1)
+    val = PyTuple_GET_ITEM_ptr_object(node, 2)
+    if not PyString_CheckExact(<object>val):
+        raise TypeError('Expected a plain str for value not: %s'
+                        % type(<object>val))
+    value = PyString_AS_STRING_ptr(val)
+    value_len = PyString_GET_SIZE_ptr(val)
+    flat_len = (key_len + 1 + refs_len + 1 + value_len + 1)
     line = PyString_FromStringAndSize(NULL, flat_len)
     # Get a pointer to the new buffer
     out = PyString_AsString(line)
@@ -318,7 +357,7 @@
     out = out + key_len
     out[0] = c'\0'
     out = out + 1
-    if ref_len > 0:
+    if refs_len > 0:
         first_ref_list = 1
         for ref_list in ref_lists:
             if first_ref_list == 0:
@@ -331,15 +370,15 @@
                     out[0] = c'\r'
                     out = out + 1
                 first_reference = 0
-                first_bit = 1
-                for bit in reference:
-                    if first_bit == 0:
+                next_len = PyTuple_GET_SIZE(reference)
+                for i from 0 <= i < next_len:
+                    if i != 0:
                         out[0] = c'\x00'
                         out = out + 1
-                    first_bit = 0
-                    next_len = PyString_Size(bit)
-                    memcpy(out, PyString_AsString(bit), next_len)
-                    out = out + next_len
+                    ref_bit = PyTuple_GET_ITEM_ptr_object(reference, i)
+                    ref_bit_len = PyString_GET_SIZE_ptr(ref_bit)
+                    memcpy(out, PyString_AS_STRING_ptr(ref_bit), ref_bit_len)
+                    out = out + ref_bit_len
     out[0] = c'\0'
     out = out  + 1
     memcpy(out, value, value_len)



More information about the bazaar-commits mailing list