Rev 3897: A Pyrex extension is about 5x faster than the fastest python code I could write. in http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

John Arbash Meinel john at arbash-meinel.com
Thu Dec 11 02:19:25 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

------------------------------------------------------------
revno: 3897
revision-id: john at arbash-meinel.com-20081211021859-3ds8cwdqiq387t83
parent: john at arbash-meinel.com-20081211020207-rrgdcyqc344zo5q1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_record_stream_chunked
timestamp: Wed 2008-12-10 20:18:59 -0600
message:
  A Pyrex extension is about 5x faster than the fastest python code I could write.
  
  Seems worth having after all.
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore	2008-09-23 23:28:27 +0000
+++ b/.bzrignore	2008-12-11 02:18:59 +0000
@@ -39,6 +39,7 @@
 doc/**/*.html
 doc/developers/performance.png
 bzrlib/_btree_serializer_c.c
+bzrlib/_chunks_to_lines_pyx.c
 bzrlib/_dirstate_helpers_c.c
 bzrlib/_knit_load_data_c.c
 bzrlib/_readdir_pyx.c

=== added file 'bzrlib/_chunks_to_lines_pyx.pyx'
--- a/bzrlib/_chunks_to_lines_pyx.pyx	1970-01-01 00:00:00 +0000
+++ b/bzrlib/_chunks_to_lines_pyx.pyx	2008-12-11 02:18:59 +0000
@@ -0,0 +1,59 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+"""Pyrex extensions for converting chunks to lines."""
+
+#python2.4 support
+cdef extern from "python-compat.h":
+    pass
+
+cdef extern from "stdlib.h":
+    ctypedef unsigned size_t
+
+cdef extern from "Python.h":
+    ctypedef int Py_ssize_t # Required for older pyrex versions
+    ctypedef struct PyObject:
+        pass
+    int PyList_Append(object lst, object item) except -1
+
+    char *PyString_AsString(object p) except NULL
+    int PyString_AsStringAndSize(object s, char **buf, Py_ssize_t *len) except -1
+
+cdef extern from "string.h":
+    void *memchr(void *s, int c, size_t n)
+
+
+def chunks_to_lines(chunks):
+    cdef char *c_str
+    cdef char *newline
+    cdef char *c_last
+    cdef Py_ssize_t the_len
+
+    # Check to see if the chunks are already lines
+    for chunk in chunks:
+        PyString_AsStringAndSize(chunk, &c_str, &the_len)
+        if the_len == 0:
+            break
+        c_last = c_str + the_len - 1
+        newline = <char *>memchr(c_str, c'\n', the_len)
+        if newline == NULL or newline != c_last:
+            break
+    else:
+        return chunks
+
+    from bzrlib import osutils
+    return osutils.split_lines(''.join(chunks))

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2008-12-11 02:02:07 +0000
+++ b/bzrlib/osutils.py	2008-12-11 02:18:59 +0000
@@ -854,6 +854,12 @@
     return split_lines(''.join(chunks))
 
 
+try:
+    from bzrlib._chunks_to_lines_pyx import chunks_to_lines
+except ImportError:
+    pass
+
+
 def split_lines(s):
     """Split s into lines, but without removing the newline characters."""
     lines = s.split('\n')

=== modified file 'setup.py'
--- a/setup.py	2008-10-16 03:58:42 +0000
+++ b/setup.py	2008-12-11 02:18:59 +0000
@@ -258,6 +258,7 @@
 
 
 add_pyrex_extension('bzrlib._btree_serializer_c')
+add_pyrex_extension('bzrlib._chunks_to_lines_pyx')
 add_pyrex_extension('bzrlib._knit_load_data_c')
 if sys.platform == 'win32':
     add_pyrex_extension('bzrlib._dirstate_helpers_c',



More information about the bazaar-commits mailing list