Rev 2490: Use direct functions when possible, and avoid extra dict lookups. in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

Wed May 9 19:45:33 BST 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

------------------------------------------------------------
revno: 2490
revision-id: john at arbash-meinel.com-20070509184520-hd6la926t5hfshbr
parent: john at arbash-meinel.com-20070509182208-7xygjy8m8nwdfhm2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_index_pyrex
timestamp: Wed 2007-05-09 13:45:20 -0500
message:
  Use direct functions when possible, and avoid extra dict lookups.
    test_read_50k_index_c           338ms
    test_read_50k_index_c_again     351ms
    test_read_50k_index_py          901ms
    test_read_50k_index_py_again   1023ms
modified:
  bzrlib/benchmarks/bench_knit.py bench_knit.py-20070509145850-pan5jnd3hl7mfdya-1
  bzrlib/knit_c.pyx              knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
-------------- next part --------------
=== modified file 'bzrlib/benchmarks/bench_knit.py'

--- a/bzrlib/benchmarks/bench_knit.py	2007-05-09 15:20:53 +0000
+++ b/bzrlib/benchmarks/bench_knit.py	2007-05-09 18:45:20 +0000
@@ -75,14 +75,14 @@
         knit._load_data = knit._load_data_py
 
     def test_read_50k_index_c(self):
+        self.setup_load_data_c()
         self.create_50k_index()
-        self.setup_load_data_c()
         t = self.get_transport()
         kndx = self.time(knit._KnitIndex, t, 'test.kndx', 'r')
 
     def test_read_50k_index_c_again(self):
+        self.setup_load_data_c()
         self.create_50k_index()
-        self.setup_load_data_c()
         t = self.get_transport()
         kndx = self.time(knit._KnitIndex, t, 'test.kndx', 'r')
 

=== modified file 'bzrlib/knit_c.pyx'
--- a/bzrlib/knit_c.pyx	2007-05-09 18:22:08 +0000
+++ b/bzrlib/knit_c.pyx	2007-05-09 18:45:20 +0000
@@ -25,7 +25,7 @@
 
 cdef extern from "Python.h":
     int PyDict_CheckExact(object)
-    void *PyDict_GetItem(object p, object key)
+    void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
     int PyDict_SetItem(object p, object key, object val) except -1
 
     int PyList_Append(object lst, object item) except -1
@@ -113,14 +113,13 @@
     cdef object process_options(self, char *option_str, char *end):
         """Process the options string into a list."""
         cdef char *next
-        cdef char *orig
 
-        # options = PyString_FromStringAndSize(option_str, <int>(end-option_str))
+        # options = PyString_FromStringAndSize(option_str,
+        #                                      end-option_str)
         # return options.split(',')
 
         final_options = []
 
-        orig = option_str
         while option_str < end:
             # Using strchr here actually hurts performance dramatically.
             # Because you aren't guaranteed to have a ',' any time soon,
@@ -129,7 +128,7 @@
             # GNU extension.
             next = self._end_of_option(option_str, end)
             next_option = PyString_FromStringAndSize(option_str,
-                                                     <int>(next - option_str))
+                                                     next - option_str)
             PyList_Append(final_options, next_option)
                           
             # Move past the ','
@@ -143,7 +142,7 @@
         cdef int parent_len
 
         # parents = PyString_FromStringAndSize(parent_str,
-        #                                      <int>(end - parent_str))
+        #                                      end - parent_str)
         # real_parents = []
         # for parent in parents.split():
         #     if parent[0].startswith('.'):
@@ -154,6 +153,8 @@
 
         parents = []
         while parent_str <= end and parent_str != NULL:
+            # strchr is safe here, because our lines always end
+            # with ' :'
             next = strchr(parent_str, c' ')
             if next == NULL or next >= end or next == parent_str:
                 break
@@ -172,8 +173,10 @@
                     raise IndexError('Parent index refers to a revision which'
                         ' does not exist yet.'
                         ' %d > %d' % (int_parent, self.history_len))
-                parent = self.history[int_parent]
-            parents.append(parent)
+                parent = PyList_GET_ITEM(self.history, int_parent)
+                # PyList_GET_ITEM steals a reference
+                Py_INCREF(parent)
+            PyList_Append(parents, parent)
             parent_str = next + 1
         return parents
 
@@ -189,6 +192,7 @@
         cdef int size
         cdef char *parent_str
         cdef int parent_size
+        cdef void *cache_entry
 
         version_id_str = start
         option_str = strchr(version_id_str, c' ')
@@ -229,20 +233,22 @@
 
         parents = self.process_parents(parent_str, end)
 
-        if version_id not in self.cache:
-            self.history.append(version_id)
+        cache_entry = PyDict_GetItem_void(self.cache, version_id)
+        if cache_entry == NULL:
+            PyList_Append(self.history, version_id)
             index = self.history_len
             self.history_len = self.history_len + 1
         else:
-            index = self.cache[version_id][5]
+            index = <object>PyTuple_GetItem_void_void(cache_entry, 5)
 
-        self.cache[version_id] = (version_id,
-                                  options,
-                                  pos,
-                                  size,
-                                  parents,
-                                  index,
-                                 )
+        PyDict_SetItem(self.cache, version_id,
+                       (version_id,
+                        options,
+                        pos,
+                        size,
+                        parents,
+                        index,
+                       ))
         return 1
 
     cdef int process_next_record(self) except -1:
@@ -257,12 +263,12 @@
             # Process until the end of the file
             last = self.end_str-1
             self.cur_str = self.end_str
-            line = PyString_FromStringAndSize(start, <int>(last - start))
+            line = PyString_FromStringAndSize(start, last - start)
             ending = PyString_FromStringAndSize(last, 1)
         else:
             # The last character is right before the '\n'
             # And the next string is right after it
-            line = PyString_FromStringAndSize(start, <int>(last - start))
+            line = PyString_FromStringAndSize(start, last - start)
             self.cur_str = last + 1
             last = last - 1
             ending = PyString_FromStringAndSize(last, 3)