Rev 3643: Clean up some variable names, add some documentation. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
John Arbash Meinel
john at arbash-meinel.com
Wed Aug 20 17:49:07 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
------------------------------------------------------------
revno: 3643
revision-id: john at arbash-meinel.com-20080820164905-mqtajmbxbxlr078g
parent: john at arbash-meinel.com-20080819231201-hc361se0f7okf08y
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Wed 2008-08-20 11:49:05 -0500
message:
Clean up some variable names, add some documentation.
modified:
bzrlib/_parse_btree_c.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
-------------- next part --------------
=== modified file 'bzrlib/_parse_btree_c.pyx'
--- a/bzrlib/_parse_btree_c.pyx 2008-08-19 23:12:01 +0000
+++ b/bzrlib/_parse_btree_c.pyx 2008-08-20 16:49:05 +0000
@@ -16,38 +16,24 @@
"""Pyrex extensions to btree node parsing."""
-import sys
-
cdef extern from "stdlib.h":
ctypedef unsigned size_t
- long int strtol(char *nptr, char **endptr, int base)
-
cdef extern from "Python.h":
- int PyDict_CheckExact(object)
- void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
- int PyDict_SetItem(object p, object key, object val) except -1
-
int PyList_Append(object lst, object item) except -1
- object PyList_GET_ITEM(object lst, int index)
- int PyList_CheckExact(object)
-
- void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
char *PyString_AsString(object p)
- object PyString_FromStringAndSize(char *, int)
- object PyString_FromString(char *)
- int PyString_Size(object p)
-
- void Py_INCREF(object)
-
+ object PyString_FromStringAndSize(char *, Py_ssize_t)
+ Py_ssize_t PyString_Size(object p)
cdef extern from "string.h":
void *memchr(void *s, int c, size_t n)
+ # GNU extension
# void *memrchr(void *s, int c, size_t n)
int strncmp(char *s1, char *s2, size_t n)
+# TODO: Find some way to import this from _dirstate_helpers
cdef void* _my_memrchr(void *s, int c, size_t n):
# memrchr seems to be a GNU extension, so we have to implement it ourselves
# It is not present in any win32 standard library
@@ -62,34 +48,59 @@
pos = pos - 1
return NULL
+# TODO: Import this from _dirstate_helpers when it is merged
+cdef object safe_string_from_size(char *s, Py_ssize_t size):
+ if size < 0:
+ raise AssertionError(
+ 'tried to create a string with an invalid size: %d @0x%x'
+ % (size, <int>s))
+ return PyString_FromStringAndSize(s, size)
+
cdef class BTreeLeafParser:
+ """Parse the leaf nodes of a BTree index.
+
+ :ivar bytes: The PyString object containing the uncompressed text for the
+ node.
+ :ivar key_length: An integer describing how many pieces the keys have for
+ this index.
+ :ivar ref_list_length: An integer describing how many references this index
+ contains.
+ :ivar keys: A PyList of keys found in this node.
+
+ :ivar _cur_str: A pointer to the start of the next line to parse
+ :ivar _end_str: A pointer to the end of bytes
+ :ivar _start: Pointer to the location within the current line while
+ parsing.
+ :ivar _header_found: True when we have parsed the header for this node
+ """
cdef object bytes
cdef int key_length
cdef int ref_list_length
cdef object keys
- cdef char * cur_str
- cdef char * end_str
+ cdef char * _cur_str
+ cdef char * _end_str
# The current start point for parsing
- cdef char * start
+ cdef char * _start
- cdef int header_found
+ cdef int _header_found
def __init__(self, bytes, key_length, ref_list_length):
self.bytes = bytes
self.key_length = key_length
self.ref_list_length = ref_list_length
self.keys = []
- self.cur_str = NULL
- self.end_str = NULL
- self.header_found = 0
+ self._cur_str = NULL
+ self._end_str = NULL
+ self._header_found = 0
cdef extract_key(self, char * last):
"""Extract a key.
- :param last: points at the byte after the last byte permitted for the key.
+ :param last: points at the byte after the last byte permitted for the
+ key.
"""
cdef char *temp_ptr
cdef int loop_counter
@@ -99,7 +110,7 @@
while loop_counter < self.key_length:
loop_counter = loop_counter + 1
# grab a key segment
- temp_ptr = <char*>memchr(self.start, c'\0', last - self.start)
+ temp_ptr = <char*>memchr(self._start, c'\0', last - self._start)
if temp_ptr == NULL:
if loop_counter == self.key_length:
# capture to last
@@ -107,12 +118,14 @@
else:
# Invalid line
failure_string = ("invalid key, wanted segment from " +
- repr(PyString_FromStringAndSize(self.start, last-self.start)))
+ repr(safe_string_from_size(self._start,
+ last - self._start)))
raise AssertionError(failure_string)
# capture the key string
- key_element = PyString_FromStringAndSize(self.start, temp_ptr - self.start)
+ key_element = safe_string_from_size(self._start,
+ temp_ptr - self._start)
# advance our pointer
- self.start = temp_ptr + 1
+ self._start = temp_ptr + 1
PyList_Append(key_segments, key_element)
return tuple(key_segments)
@@ -124,42 +137,44 @@
cdef char *next_start
cdef int loop_counter
- self.start = self.cur_str
+ self._start = self._cur_str
# Find the next newline
- last = <char*>memchr(self.start, c'\n', self.end_str - self.start)
+ last = <char*>memchr(self._start, c'\n', self._end_str - self._start)
if last == NULL:
# Process until the end of the file
- last = self.end_str
- self.cur_str = self.end_str
+ last = self._end_str
+ self._cur_str = self._end_str
else:
# And the next string is right after it
- self.cur_str = last + 1
+ self._cur_str = last + 1
# The last character is right before the '\n'
last = last
- if last == self.start:
+ if last == self._start:
# parsed it all.
return 0
- if last < self.start:
+ if last < self._start:
# Unexpected error condition - fail
return -1
- if 0 == self.header_found:
- if strncmp("type=leaf", self.start, last-self.start) == 0:
- self.header_found = 1
+ if 0 == self._header_found:
+ # The first line in a leaf node is the header "type=leaf\n"
+ if strncmp("type=leaf", self._start, last - self._start) == 0:
+ self._header_found = 1
return 0
else:
- print "failed strncmp", repr(PyString_FromStringAndSize(self.start, last-self.start))
+ raise AssertionError('Node did not start with "type=leaf": %r'
+ % (safe_string_from_size(self._start, last - self._start)))
return -1
key = self.extract_key(last)
# find the value area
- temp_ptr = <char*>_my_memrchr(self.start, c'\0', last - self.start)
+ temp_ptr = <char*>_my_memrchr(self._start, c'\0', last - self._start)
if temp_ptr == NULL:
# Invalid line
return -1
else:
# capture the value string
- value = PyString_FromStringAndSize(temp_ptr + 1, last - temp_ptr - 1)
+ value = safe_string_from_size(temp_ptr + 1, last - temp_ptr - 1)
# shrink the references end point
last = temp_ptr
if self.ref_list_length:
@@ -169,10 +184,10 @@
ref_list = []
# extract a reference list
loop_counter = loop_counter + 1
- if last < self.start:
+ if last < self._start:
return -1
# find the next reference list end point:
- temp_ptr = <char*>memchr(self.start, c'\t', last - self.start)
+ temp_ptr = <char*>memchr(self._start, c'\t', last - self._start)
if temp_ptr == NULL:
# Only valid for the last list
if loop_counter != self.ref_list_length:
@@ -188,20 +203,21 @@
ref_ptr = temp_ptr
next_start = temp_ptr + 1
# Now, there may be multiple keys in the ref list.
- while self.start < ref_ptr:
+ while self._start < ref_ptr:
# loop finding keys and extracting them
- temp_ptr = <char*>memchr(self.start, c'\r', ref_ptr - self.start)
+ temp_ptr = <char*>memchr(self._start, c'\r',
+ ref_ptr - self._start)
if temp_ptr == NULL:
# key runs to the end
temp_ptr = ref_ptr
PyList_Append(ref_list, self.extract_key(temp_ptr))
PyList_Append(ref_lists, tuple(ref_list))
# prepare for the next reference list
- self.start = next_start
+ self._start = next_start
ref_lists = tuple(ref_lists)
node_value = (value, ref_lists)
else:
- if last != self.start:
+ if last != self._start:
# unexpected reference data present
return -1
node_value = (value, ())
@@ -211,10 +227,10 @@
def parse(self):
cdef int byte_count
byte_count = PyString_Size(self.bytes)
- self.cur_str = PyString_AsString(self.bytes)
+ self._cur_str = PyString_AsString(self.bytes)
# This points to the last character in the string
- self.end_str = self.cur_str + byte_count
- while self.cur_str < self.end_str:
+ self._end_str = self._cur_str + byte_count
+ while self._cur_str < self._end_str:
self.process_line()
return self.keys
More information about the bazaar-commits
mailing list