Rev 2500: Switch from strchr to memchr to ensure we never search all of memory. in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

John Arbash Meinel john at arbash-meinel.com
Fri Jun 29 16:39:12 BST 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

------------------------------------------------------------
revno: 2500
revision-id: john at arbash-meinel.com-20070629153901-nxydiwh8t8ug76yl
parent: john at arbash-meinel.com-20070629010927-jf6ybo016kpmb6y4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_index_pyrex
timestamp: Fri 2007-06-29 10:39:01 -0500
message:
  Switch from strchr to memchr to ensure we never search all of memory.
modified:
  bzrlib/_knit_load_data_c.pyx   knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
-------------- next part --------------
=== modified file 'bzrlib/_knit_load_data_c.pyx'
--- a/bzrlib/_knit_load_data_c.pyx	2007-06-29 00:31:00 +0000
+++ b/bzrlib/_knit_load_data_c.pyx	2007-06-29 15:39:01 +0000
@@ -22,6 +22,7 @@
 
 
 cdef extern from "stdlib.h":
+    ctypedef unsigned size_t
     long int strtol(char *nptr, char **endptr, int base)
     unsigned long int strtoul(char *nptr, char **endptr, int base)
 
@@ -56,9 +57,7 @@
 
 
 cdef extern from "string.h":
-    char *strchr(char *s1, char c)
-    int strncmp(char *s1, char *s2, int len)
-    int strcmp(char *s1, char *s2)
+    void *memchr(void *s, int c, size_t n)
 
 
 cdef class KnitIndexReader:
@@ -91,38 +90,24 @@
         if not PyList_CheckExact(self.history):
             raise TypeError('kndx._history must be a python list')
 
-    cdef char * _end_of_option(self, char *option_str, char *end):
-        """Find the end of this option string.
-
-        This is similar to doing ``strchr(option_str, ',')``, except
-        it knows to stop if it hits 'end' first.
-        """
-        cdef char * cur
-
-        cur = option_str
-        while cur < end:
-            if cur[0] == c',' or cur[0] == c' ':
-                return cur
-            cur = cur + 1
-        return end
-
     cdef object process_options(self, char *option_str, char *end):
         """Process the options string into a list."""
         cdef char *next
 
+        # This is alternative code which creates a python string and splits it.
+        # It is "correct" and more obvious, but slower than the following code.
+        # It can be uncommented to switch in case the other code is seen as
+        # suspect.
         # options = PyString_FromStringAndSize(option_str,
-        #                                      end-option_str)
+        #                                      end - option_str)
         # return options.split(',')
 
         final_options = []
 
         while option_str < end:
-            # Using strchr here actually hurts performance dramatically.
-            # Because you aren't guaranteed to have a ',' any time soon,
-            # so it may have to search for a long time.
-            # The closest function is memchr, but that seems to be a
-            # GNU extension.
-            next = self._end_of_option(option_str, end)
+            next = <char*>memchr(option_str, c',', end - option_str)
+            if next == NULL:
+                next = end
             next_option = PyString_FromStringAndSize(option_str,
                                                      next - option_str)
             PyList_Append(final_options, next_option)
@@ -137,6 +122,8 @@
         cdef int int_parent
         cdef char *parent_end
 
+        # Alternative, correct but slower code.
+        #
         # parents = PyString_FromStringAndSize(parent_str,
         #                                      end - parent_str)
         # real_parents = []
@@ -148,17 +135,18 @@
         # return real_parents
 
         parents = []
-        while parent_str <= end and parent_str != NULL:
-            # strchr is safe here, because our lines always end
-            # with ' :'
-            next = strchr(parent_str, c' ')
+        if parent_str == NULL:
+            return []
+        while parent_str <= end:
+            next = <char*>memchr(parent_str, c' ', end - parent_str)
             if next == NULL or next >= end or next == parent_str:
                 break
 
             if parent_str[0] == c'.':
                 # This is an explicit revision id
                 parent_str = parent_str + 1
-                parent = PyString_FromStringAndSize(parent_str, next-parent_str)
+                parent = PyString_FromStringAndSize(parent_str,
+                                                    next - parent_str)
             else:
                 # This in an integer mapping to original
                 # TODO: ensure that we are actually parsing the string
@@ -204,7 +192,7 @@
         cdef void *cache_entry
 
         version_id_str = start
-        option_str = strchr(version_id_str, c' ')
+        option_str = <char*>memchr(version_id_str, c' ', end - version_id_str)
         if option_str == NULL or option_str >= end:
             # Short entry
             return 0
@@ -212,21 +200,21 @@
         # Move past the space character
         option_str = option_str + 1
 
-        pos_str = strchr(option_str, c' ')
+        pos_str = <char*>memchr(option_str, c' ', end - option_str)
         if pos_str == NULL or pos_str >= end:
             # Short entry
             return 0
         option_end = pos_str
         pos_str = pos_str + 1
 
-        size_str = strchr(pos_str, c' ')
+        size_str = <char*>memchr(pos_str, c' ', end - pos_str)
         if size_str == NULL or size_str >= end:
             # Short entry
             return 0
         size_str = size_str + 1
 
         # TODO: Make sure this works when there are no parents
-        parent_str = strchr(size_str, c' ')
+        parent_str = <char*>memchr(size_str, c' ', end - size_str)
         if parent_str == NULL or parent_str >= end:
             # Missing parents
             return 0
@@ -272,10 +260,10 @@
 
         start = self.cur_str
         # Find the next newline
-        last = strchr(start, c'\n')
+        last = <char*>memchr(start, c'\n', self.end_str - start)
         if last == NULL:
             # Process until the end of the file
-            last = self.end_str-1
+            last = self.end_str - 1
             self.cur_str = self.end_str
             line = PyString_FromStringAndSize(start, last - start)
             ending = PyString_FromStringAndSize(last, 1)



More information about the bazaar-commits mailing list