Rev 4675: (jam) Tweak BTreeLeafParser.extract_key to improve index read in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Sat Sep 5 19:43:20 BST 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4675 [merge]
revision-id: pqm at pqm.ubuntu.com-20090905184318-tw9odqqk4fh93qrv
parent: pqm at pqm.ubuntu.com-20090904165031-snnr7lgjuzlova80
parent: john at arbash-meinel.com-20090905135056-t0hcivl2905zlljy
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sat 2009-09-05 19:43:18 +0100
message:
  (jam) Tweak BTreeLeafParser.extract_key to improve index read
  	performance 10%
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/_btree_serializer_pyx.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
=== modified file 'NEWS'
--- a/NEWS	2009-09-04 15:42:26 +0000
+++ b/NEWS	2009-09-05 18:43:18 +0000
@@ -62,6 +62,11 @@
 Internals
 *********
 
+* ``BTreeLeafParser.extract_key`` has been tweaked slightly to reduce
+  mallocs while parsing the index (approx 3=>1 mallocs per key read).
+  This results in a 10% speedup while reading an index.
+  (John Arbash Meinel)
+
 * The ``bzrlib.lsprof`` module has a new class ``BzrProfiler`` which makes
   profiling in some situations like callbacks and generators easier.
   (Robert Collins)

=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-06-22 12:52:39 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-09-04 21:16:14 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -41,8 +41,11 @@
     int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len)
     void PyString_InternInPlace(PyObject **)
     int PyTuple_CheckExact(object t)
+    object PyTuple_New(Py_ssize_t n_entries)
+    void PyTuple_SET_ITEM(object, Py_ssize_t offset, object) # steals the ref
     Py_ssize_t PyTuple_GET_SIZE(object t)
     PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index)
+    void Py_INCREF(object)
     void Py_DECREF_ptr "Py_DECREF" (PyObject *)
 
 cdef extern from "string.h":
@@ -140,14 +143,12 @@
         cdef char *temp_ptr
         cdef int loop_counter
         # keys are tuples
-        loop_counter = 0
-        key_segments = []
-        while loop_counter < self.key_length:
-            loop_counter = loop_counter + 1
+        key = PyTuple_New(self.key_length)
+        for loop_counter from 0 <= loop_counter < self.key_length:
             # grab a key segment
             temp_ptr = <char*>memchr(self._start, c'\0', last - self._start)
             if temp_ptr == NULL:
-                if loop_counter == self.key_length:
+                if loop_counter + 1 == self.key_length:
                     # capture to last
                     temp_ptr = last
                 else:
@@ -164,8 +165,9 @@
                                                          temp_ptr - self._start)
             # advance our pointer
             self._start = temp_ptr + 1
-            PyList_Append(key_segments, key_element)
-        return tuple(key_segments)
+            Py_INCREF(key_element)
+            PyTuple_SET_ITEM(key, loop_counter, key_element)
+        return key
 
     cdef int process_line(self) except -1:
         """Process a line in the bytes."""




More information about the bazaar-commits mailing list