Rev 2504: Start working towards a parser which uses a Reader (producer) in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex
John Arbash Meinel
john at arbash-meinel.com
Fri May 4 22:41:58 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex
------------------------------------------------------------
revno: 2504
revision-id: john at arbash-meinel.com-20070504214147-ckrxzu7bepvcs4ct
parent: john at arbash-meinel.com-20070504210438-cvtzgzh4xbad7kww
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate_pyrex
timestamp: Fri 2007-05-04 16:41:47 -0500
message:
Start working towards a parser which uses a Reader (producer)
rather than working on a list of fields. Currently slower than text.split('\0'),
but should be possible to avoid the intermediate list entirely.
modified:
bzrlib/compiled/dirstate_helpers.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
-------------- next part --------------
=== modified file 'bzrlib/compiled/dirstate_helpers.pyx'
--- a/bzrlib/compiled/dirstate_helpers.pyx 2007-05-04 21:04:38 +0000
+++ b/bzrlib/compiled/dirstate_helpers.pyx 2007-05-04 21:41:47 +0000
@@ -25,6 +25,15 @@
cdef extern from *:
ctypedef int size_t
+
+cdef extern from "stdlib.h":
+ struct _FILE:
+ pass
+ ctypedef _FILE FILE
+ size_t fread(void *ptr, size_t size, size_t count, FILE *stream)
+ unsigned long int strtoul(char *nptr, char **endptr, int base)
+
+
cdef extern from "Python.h":
# GetItem returns a borrowed reference
struct _PyObject:
@@ -53,6 +62,7 @@
char *PyString_AsString(object p)
char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
+ object PyString_FromString(char *)
int PyString_Size(object p)
int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
int PyString_CheckExact(object p)
@@ -61,9 +71,8 @@
void Py_INCREF_PyObject "Py_INCREF" (PyObject *)
void Py_DECREF(object)
+ FILE *PyFile_AsFile(object p)
-cdef extern from "stdlib.h":
- unsigned long int strtoul(char *nptr, char **endptr, int base)
cdef extern from "string.h":
char *strchr(char *s1, char c)
@@ -270,6 +279,46 @@
state._split_root_dirblock_into_contents()
+cdef class Reader:
+ """Maintain the current location, and return fields as you parse them."""
+
+ cdef object text # The overall string object
+ cdef char *text_str # Pointer to the beginning of text
+ cdef int text_size # Length of text
+
+ cdef char *end_str # End of text
+ cdef char *cur # Pointer to the current record
+ cdef char *next # Pointer to the end of this record
+
+ def __new__(self, text):
+ self.text = text
+ self.text_str = PyString_AsString(text)
+ self.text_size = PyString_Size(text)
+ self.end_str = self.text_str + self.text_size
+ self.cur = self.text_str
+
+ cdef int done(self):
+ return self.cur >= self.end_str
+
+ cdef char *get_next(self):
+ """Return a pointer to the start of the next field."""
+ cdef char *next
+ next = self.cur
+ self.cur = strchr(next, c'\0') + 1
+ return next
+
+ def get_next_str(self):
+ """Get the next field as a Python string."""
+ return PyString_FromString(self.get_next())
+
+ def get_all_fields(self):
+ """Get a list of all fields"""
+ fields = []
+ while not self.done():
+ PyList_Append(fields, self.get_next_str())
+ return fields
+
+
def _c_read_dirblocks(state):
"""Read in the dirblocks for the given DirState object.
@@ -285,19 +334,19 @@
cdef int entry_size
cdef int field_count
cdef int num_present_parents
+ cdef char *next_field
state._state_file.seek(state._end_of_header)
text = state._state_file.read()
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
- fields = text.split('\0')
- # Remove the last blank entry
- trailing = fields.pop()
- assert trailing == ''
- # consider turning fields into a tuple.
+ reader = Reader(text)
+
+ fields = reader.get_all_fields()
# skip the first field which is the trailing null from the header.
cur = 1
+
# Each line now has an extra '\n' field which is not used
# so we just skip over it
# entry size:
More information about the bazaar-commits
mailing list