Rev 2512: Finally, faster than text.split() (156ms) in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex
John Arbash Meinel
john at arbash-meinel.com
Sat May 5 05:58:04 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex
------------------------------------------------------------
revno: 2512
revision-id: john at arbash-meinel.com-20070505045753-1fwhap6q0jyb18vt
parent: john at arbash-meinel.com-20070505043606-lw7bjxwzcnjbls9v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate_pyrex
timestamp: Fri 2007-05-04 23:57:53 -0500
message:
Finally, faster than text.split() (156ms)
By iterating over the fields directly, we don't have to create Python strings
for the dirname field (only when it changes), or for the size field or is_executable
fields.
A lot fewer python objects means faster parsing.
modified:
bzrlib/compiled/dirstate_helpers.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
-------------- next part --------------
=== modified file 'bzrlib/compiled/dirstate_helpers.pyx'
--- a/bzrlib/compiled/dirstate_helpers.pyx 2007-05-05 04:36:06 +0000
+++ b/bzrlib/compiled/dirstate_helpers.pyx 2007-05-05 04:57:53 +0000
@@ -272,53 +272,58 @@
PyList_Append(fields, self.get_next_str())
return fields
- cdef object _fields_to_entry_0_parents(self,
- PyListObject *fields, int offset,
- void **p_current_dirname,
- int *new_block):
+ cdef object _get_entry_0_parents(self, void **p_current_dirname,
+ int *new_block):
cdef object path_name_file_id_key
- cdef char *size_str
- cdef unsigned long int size
+ cdef char *entry_size_str
+ cdef unsigned long int entry_size
cdef char* executable_str
cdef int is_executable
- cdef PyObject **base
- cdef void* dirname
cdef char* dirname_str
-
- # Is this too abusive?
- base = fields.ob_item + offset
-
- dirname = base[0]
- dirname_str = PyString_AS_STRING_void(dirname)
-
- if strcmp(dirname_str,
- PyString_AS_STRING_void(p_current_dirname[0])) != 0:
- Py_INCREF_PyObject(<PyObject *>dirname)
- p_current_dirname[0] = dirname
+ cdef char* trailing
+ cdef int cur_size
+ cdef object minikind
+ cdef object fingerprint
+ cdef object info
+
+ dirname_str = self.get_next(&cur_size)
+ if strncmp(dirname_str,
+ PyString_AS_STRING_void(p_current_dirname[0]),
+ cur_size+1) != 0:
+ dirname = PyString_FromStringAndSize(dirname_str, cur_size)
+ p_current_dirname[0] = <void*>dirname
new_block[0] = 1
else:
new_block[0] = 0
path_name_file_id_key = (<object>p_current_dirname[0],
- <object>(base[1]),
- <object>(base[2]),
+ self.get_next_str(),
+ self.get_next_str(),
)
- size_str = PyString_AS_STRING_void(<void*>(base[5]))
- size = strtoul(size_str, NULL, 10)
- executable_str = PyString_AS_STRING_void(<void*>(base[6]))
- if executable_str[0] == c'y':
- is_executable = 0
- else:
- is_executable = 0
- return (path_name_file_id_key, [
+ minikind = self.get_next_str()
+ fingerprint = self.get_next_str()
+ entry_size_str = self.get_next(&cur_size)
+ entry_size = strtoul(entry_size_str, NULL, 10)
+ executable_str = self.get_next(&cur_size)
+ is_executable = (executable_str[0] == c'y')
+ info = self.get_next_str()
+
+ ret = (path_name_file_id_key, [
( # Current tree
- <object>(base[3]),# minikind
- <object>(base[4]),# fingerprint
- size, # size
- is_executable, # executable
- <object>(base[7]),# packed_stat or revision_id
+ minikind, # minikind
+ fingerprint, # fingerprint
+ entry_size, # size
+ is_executable,# executable
+ info, # packed_stat or revision_id
)])
+ # Ignore the trailing newline
+ trailing = self.get_next(&cur_size)
+ if cur_size != 1 or trailing[0] != c'\n':
+ raise AssertionError(
+ 'Bad parse, we expected to end on \\n, not: %d %s: %s'
+ % (cur_size, PyString_FromString(trailing), ret))
+ return ret
def _parse_dirblocks_0_parents(self, state, entry_size):
cdef object current_block
@@ -336,12 +341,7 @@
new_block = 0
while not self.done():
- fields = self.get_entry(entry_size)
- # entry = self._get_entry_0_parents(¤t_dirname, &new_block)
- entry = self._fields_to_entry_0_parents(<PyListObject *>fields,
- 0,
- ¤t_dirname,
- &new_block)
+ entry = self._get_entry_0_parents(¤t_dirname, &new_block)
if new_block:
# new block - different dirname
current_block = []
More information about the bazaar-commits
mailing list