Rev 4505: Start using the _LeafLineEntries directly in http://bazaar.launchpad.net/~jameinel/bzr/1.17-btree-faster
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 1 23:13:04 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-btree-faster
------------------------------------------------------------
revno: 4505
revision-id: john at arbash-meinel.com-20090701221254-5d901cn8py29zdes
parent: john at arbash-meinel.com-20090701215405-vu7kdewhxj51lh75
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-btree-faster
timestamp: Wed 2009-07-01 17:12:54 -0500
message:
Start using the _LeafLineEntries directly
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-07-01 21:54:05 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-07-01 22:12:54 +0000
@@ -49,6 +49,8 @@
void Py_INCREF(object)
PyObject *PyDict_GetItem(object, object)
+ int PyDict_SetItem(object d, object k, object v) except -1
+
object PyTuple_New(Py_ssize_t n)
void PyTuple_SET_ITEM(object, Py_ssize_t offset, object)
@@ -311,6 +313,71 @@
self.data_start = NULL
self.data_end = NULL
+ cdef _ensure_value(self):
+ cdef char *temp_ptr
+
+ if self.value is not None:
+ return
+ temp_ptr = <char*>_my_memrchr(self.data_start, c'\0',
+ self.data_end - self.data_start)
+ if temp_ptr == NULL:
+ # Invalid line
+ raise ValueError('Could not find the value section for %s'
+ % (self.key,))
+ # capture the value string
+ self.value = safe_string_from_size(temp_ptr + 1,
+ self.data_end - temp_ptr - 1)
+ # shrink the references end point
+ self.data_end = temp_ptr
+
+ cdef _ensure_refs(self, int key_length, int ref_list_length):
+ cdef int i
+ cdef char *temp_ptr, *ref_ptr, *start, *next_start, *last
+
+ if self.refs is not None:
+ return
+ # refs come before value, so make sure data_end has been updated
+ self._ensure_value()
+ if not ref_list_length:
+ self.refs = ()
+ return
+ ref_lists = PyTuple_New(ref_list_length)
+ last = self.data_end
+ start = self.data_start
+ for i from 0 <= i < ref_list_length:
+ # extract a reference list
+ if last < start:
+ raise ValueError('failed to find proper reference lists')
+ # find the next reference list end point:
+ temp_ptr = <char*>memchr(start, c'\t', last - start)
+ if temp_ptr == NULL:
+ # Only valid for the last list
+ if i + 1 != ref_list_length:
+ raise AssertionError("invalid key")
+ else:
+ # scan to the end of the ref list area
+ ref_ptr = last
+ next_start = last
+ else:
+ # scan to the end of this ref list
+ ref_ptr = temp_ptr
+ next_start = temp_ptr + 1
+ # Now, there may be multiple keys in the ref list.
+ ref_list = []
+ while start < ref_ptr:
+ # loop finding keys and extracting them
+ temp_ptr = <char*>memchr(start, c'\r', ref_ptr - start)
+ if temp_ptr == NULL:
+ # key runs to the end
+ temp_ptr = ref_ptr
+ key = _extract_key(&start, temp_ptr, key_length)
+ PyList_Append(ref_list, key)
+ ref_list = tuple(ref_list)
+ Py_INCREF(ref_list)
+ PyTuple_SET_ITEM(ref_lists, i, ref_list)
+ start = next_start
+ self.refs = ref_lists
+
cdef class _LeafNode:
@@ -356,14 +423,20 @@
next_line = _LeafLineEntry(key)
next_line.data_start = start
next_line.data_end = c_bytes + pos
- self._lines[key] = next_line
+ PyDict_SetItem(self._lines, key, next_line)
start = c_bytes + pos + 1
# Do we check that the very last byte is '\n'?
cdef _populate_keys(self):
cdef _LeafLineEntry entry
- keys = dict(_parse_leaf_lines(self._bytes, self._key_length,
- self._ref_list_length))
+ keys = {}
+ for entry in self._lines.itervalues():
+ entry._ensure_refs(self._key_length, self._ref_list_length)
+ PyDict_SetItem(keys, entry.key, (entry.value, entry.refs))
+ # keys = dict(_parse_leaf_lines(self._bytes, self._key_length,
+ # self._ref_list_length))
+ # if keys1 != keys:
+ # raise ValueError('%s != %s')
self._keys = keys
property keys:
@@ -377,10 +450,20 @@
# return len(self._keys)
def get(self, key):
- if self._keys is None:
- self._populate_keys()
- # PyDict_GetItem...
- return self._keys.get(key, None)
+ cdef _LeafLineEntry entry
+ cdef PyObject *maybe
+ if self._keys is not None:
+ maybe = PyDict_GetItem(self._keys, key)
+ if maybe == NULL:
+ return None
+ return <object> maybe
+ else:
+ maybe = PyDict_GetItem(self._lines, key)
+ if maybe == NULL:
+ return None
+ entry = <_LeafLineEntry>maybe
+ entry._ensure_refs(self._key_length, self._ref_list_length)
+ return (entry.value, entry.refs)
def _flatten_node(node, reference_lists):
More information about the bazaar-commits
mailing list