Rev 4765: Merge in the static-tuple-no-use branch, and bring back the chk_map use. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
John Arbash Meinel
john at arbash-meinel.com
Thu Oct 8 17:02:14 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
------------------------------------------------------------
revno: 4765 [merge]
revision-id: john at arbash-meinel.com-20091008160153-krcoicv53v517v7f
parent: john at arbash-meinel.com-20091008041043-579p0as1c3xq1q6w
parent: john at arbash-meinel.com-20091008145954-v365z5qkqi6eghpd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple-chk-map
timestamp: Thu 2009-10-08 11:01:53 -0500
message:
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
modified:
bzrlib/_btree_serializer_pyx.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
bzrlib/_simple_set_pyx.pxd _static_tuple_intern-20091002191442-ix6rkqbw8ktoj7r5-1
bzrlib/_simple_set_pyx.pyx _static_tuple_intern-20091002053806-sid67p8spedru51w-1
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test__simple_set.py test__static_tuple_i-20091002053806-sid67p8spedru51w-2
setup.py setup.py-20050314065409-02f8a0a6e3f9bc70
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-10-08 04:10:43 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-10-08 05:12:01 +0000
@@ -38,8 +38,6 @@
Py_ssize_t PyString_Size(object p)
Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *)
char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *)
- char * PyString_AS_STRING(object)
- Py_ssize_t PyString_GET_SIZE(object)
int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len)
void PyString_InternInPlace(PyObject **)
int PyTuple_CheckExact(object t)
@@ -57,12 +55,6 @@
# void *memrchr(void *s, int c, size_t n)
int strncmp(char *s1, char *s2, size_t n)
-# It seems we need to import the definitions so that the pyrex compiler has
-# local names to access them.
-from _static_tuple_c cimport StaticTuple, \
- import_static_tuple_c, StaticTuple_New, \
- StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
-
# TODO: Find some way to import this from _dirstate_helpers
cdef void* _my_memrchr(void *s, int c, size_t n):
@@ -79,7 +71,6 @@
pos = pos - 1
return NULL
-
# TODO: Import this from _dirstate_helpers when it is merged
cdef object safe_string_from_size(char *s, Py_ssize_t size):
if size < 0:
@@ -103,12 +94,6 @@
Py_DECREF_ptr(py_str)
return result
-from bzrlib import _static_tuple_c
-cdef object _ST
-_ST = _static_tuple_c.StaticTuple
-# This sets up the StaticTuple C_API functionality
-import_static_tuple_c()
-
cdef class BTreeLeafParser:
"""Parse the leaf nodes of a BTree index.
@@ -148,7 +133,6 @@
self._cur_str = NULL
self._end_str = NULL
self._header_found = 0
- # keys are tuples
cdef extract_key(self, char * last):
"""Extract a key.
@@ -158,9 +142,8 @@
"""
cdef char *temp_ptr
cdef int loop_counter
- cdef StaticTuple key
-
- key = StaticTuple_New(self.key_length)
+ # keys are tuples
+ key = PyTuple_New(self.key_length)
for loop_counter from 0 <= loop_counter < self.key_length:
# grab a key segment
temp_ptr = <char*>memchr(self._start, c'\0', last - self._start)
@@ -175,19 +158,15 @@
last - self._start)))
raise AssertionError(failure_string)
# capture the key string
- if (self.key_length == 1
- and (temp_ptr - self._start) == 45
- and strncmp(self._start, 'sha1:', 5) == 0):
- key_element = safe_string_from_size(self._start,
- temp_ptr - self._start)
- else:
- key_element = safe_interned_string_from_size(self._start,
+ # TODO: Consider using PyIntern_FromString, the only caveat is that
+ # it assumes a NULL-terminated string, so we have to check if
+ # temp_ptr[0] == c'\0' or some other char.
+ key_element = safe_interned_string_from_size(self._start,
temp_ptr - self._start)
# advance our pointer
self._start = temp_ptr + 1
Py_INCREF(key_element)
- StaticTuple_SET_ITEM(key, loop_counter, key_element)
- key = StaticTuple_Intern(key)
+ PyTuple_SET_ITEM(key, loop_counter, key_element)
return key
cdef int process_line(self) except -1:
@@ -197,7 +176,6 @@
cdef char *ref_ptr
cdef char *next_start
cdef int loop_counter
- cdef Py_ssize_t str_len
self._start = self._cur_str
# Find the next newline
@@ -233,25 +211,12 @@
# Invalid line
raise AssertionError("Failed to find the value area")
else:
- # Because of how conversions were done, we ended up with *lots* of
- # values that are identical. These are all of the 0-length nodes
- # that are referred to by the TREE_ROOT (and likely some other
- # directory nodes.) For example, bzr has 25k references to
- # something like '12607215 328306 0 0', which ends up consuming 1MB
- # of memory, just for those strings.
- str_len = last - temp_ptr - 1
- if (str_len > 4
- and strncmp(" 0 0", last - 4, 4) == 0):
- # This drops peak mem for bzr.dev from 87.4MB => 86.2MB
- # For Launchpad 236MB => 232MB
- value = safe_interned_string_from_size(temp_ptr + 1, str_len)
- else:
- value = safe_string_from_size(temp_ptr + 1, str_len)
+ # capture the value string
+ value = safe_string_from_size(temp_ptr + 1, last - temp_ptr - 1)
# shrink the references end point
last = temp_ptr
-
if self.ref_list_length:
- ref_lists = StaticTuple_New(self.ref_list_length)
+ ref_lists = []
loop_counter = 0
while loop_counter < self.ref_list_length:
ref_list = []
@@ -283,20 +248,18 @@
if temp_ptr == NULL:
# key runs to the end
temp_ptr = ref_ptr
-
PyList_Append(ref_list, self.extract_key(temp_ptr))
- ref_list = StaticTuple_Intern(StaticTuple(*ref_list))
- Py_INCREF(ref_list)
- StaticTuple_SET_ITEM(ref_lists, loop_counter - 1, ref_list)
+ PyList_Append(ref_lists, tuple(ref_list))
# prepare for the next reference list
self._start = next_start
- node_value = StaticTuple(value, ref_lists)
+ ref_lists = tuple(ref_lists)
+ node_value = (value, ref_lists)
else:
if last != self._start:
# unexpected reference data present
raise AssertionError("unexpected reference data present")
- node_value = StaticTuple(value, StaticTuple())
- PyList_Append(self.keys, StaticTuple(key, node_value))
+ node_value = (value, ())
+ PyList_Append(self.keys, (key, node_value))
return 0
def parse(self):
@@ -331,6 +294,7 @@
cdef Py_ssize_t flat_len
cdef Py_ssize_t key_len
cdef Py_ssize_t node_len
+ cdef PyObject * val
cdef char * value
cdef Py_ssize_t value_len
cdef char * out
@@ -339,12 +303,13 @@
cdef int first_ref_list
cdef int first_reference
cdef int i
+ cdef PyObject *ref_bit
cdef Py_ssize_t ref_bit_len
- if not PyTuple_CheckExact(node) and not StaticTuple_CheckExact(node):
- raise TypeError('We expected a tuple() or StaticTuple() for node not: %s'
+ if not PyTuple_CheckExact(node):
+ raise TypeError('We expected a tuple() for node not: %s'
% type(node))
- node_len = len(node)
+ node_len = PyTuple_GET_SIZE(node)
have_reference_lists = reference_lists
if have_reference_lists:
if node_len != 4:
@@ -354,7 +319,7 @@
raise ValueError('Without ref_lists, we need at least 3 entries not: %s'
% len(node))
# I don't expect that we can do faster than string.join()
- string_key = '\0'.join(node[1])# <object>PyTuple_GET_ITEM_ptr_object(node, 1))
+ string_key = '\0'.join(<object>PyTuple_GET_ITEM_ptr_object(node, 1))
# TODO: instead of using string joins, precompute the final string length,
# and then malloc a single string and copy everything in.
@@ -371,7 +336,7 @@
refs_len = 0
if have_reference_lists:
# Figure out how many bytes it will take to store the references
- ref_lists = node[3]# <object>PyTuple_GET_ITEM_ptr_object(node, 3)
+ ref_lists = <object>PyTuple_GET_ITEM_ptr_object(node, 3)
next_len = len(ref_lists) # TODO: use a Py function
if next_len > 0:
# If there are no nodes, we don't need to do any work
@@ -385,32 +350,31 @@
# references
refs_len = refs_len + (next_len - 1)
for reference in ref_list:
- if (not PyTuple_CheckExact(reference)
- and not StaticTuple_CheckExact(reference)):
+ if not PyTuple_CheckExact(reference):
raise TypeError(
'We expect references to be tuples not: %s'
% type(reference))
- next_len = len(reference)
+ next_len = PyTuple_GET_SIZE(reference)
if next_len > 0:
# We will need (len - 1) '\x00' characters to
# separate the reference key
refs_len = refs_len + (next_len - 1)
for i from 0 <= i < next_len:
- ref_bit = reference[i]
- if not PyString_CheckExact(ref_bit):
+ ref_bit = PyTuple_GET_ITEM_ptr_object(reference, i)
+ if not PyString_CheckExact_ptr(ref_bit):
raise TypeError('We expect reference bits'
' to be strings not: %s'
% type(<object>ref_bit))
- refs_len = refs_len + PyString_GET_SIZE(ref_bit)
+ refs_len = refs_len + PyString_GET_SIZE_ptr(ref_bit)
# So we have the (key NULL refs NULL value LF)
key_len = PyString_Size(string_key)
- val = node[2] # PyTuple_GET_ITEM_ptr_object(node, 2)
- if not PyString_CheckExact(val):
+ val = PyTuple_GET_ITEM_ptr_object(node, 2)
+ if not PyString_CheckExact_ptr(val):
raise TypeError('Expected a plain str for value not: %s'
- % type(val))
- value = PyString_AS_STRING(val)
- value_len = PyString_GET_SIZE(val)
+ % type(<object>val))
+ value = PyString_AS_STRING_ptr(val)
+ value_len = PyString_GET_SIZE_ptr(val)
flat_len = (key_len + 1 + refs_len + 1 + value_len + 1)
line = PyString_FromStringAndSize(NULL, flat_len)
# Get a pointer to the new buffer
@@ -432,14 +396,14 @@
out[0] = c'\r'
out = out + 1
first_reference = 0
- next_len = len(reference)
+ next_len = PyTuple_GET_SIZE(reference)
for i from 0 <= i < next_len:
if i != 0:
out[0] = c'\x00'
out = out + 1
- ref_bit = reference[i] #PyTuple_GET_ITEM_ptr_object(reference, i)
- ref_bit_len = PyString_GET_SIZE(ref_bit)
- memcpy(out, PyString_AS_STRING(ref_bit), ref_bit_len)
+ ref_bit = PyTuple_GET_ITEM_ptr_object(reference, i)
+ ref_bit_len = PyString_GET_SIZE_ptr(ref_bit)
+ memcpy(out, PyString_AS_STRING_ptr(ref_bit), ref_bit_len)
out = out + ref_bit_len
out[0] = c'\0'
out = out + 1
=== modified file 'bzrlib/_simple_set_pyx.pxd'
--- a/bzrlib/_simple_set_pyx.pxd 2009-10-07 19:31:39 +0000
+++ b/bzrlib/_simple_set_pyx.pxd 2009-10-08 04:40:16 +0000
@@ -27,6 +27,7 @@
ctypedef struct PyObject:
pass
+
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
"""A class similar to PySet, but with simpler implementation.
@@ -53,7 +54,14 @@
cdef int _insert_clean(self, PyObject *key) except -1
cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
+
# TODO: might want to export the C api here, though it is all available from
# the class object...
cdef api object SimpleSet_Add(object self, object key)
cdef api SimpleSet SimpleSet_New()
+cdef api object SimpleSet_Add(object self, object key)
+cdef api int SimpleSet_Contains(object self, object key) except -1
+cdef api int SimpleSet_Discard(object self, object key) except -1
+cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL
+cdef api Py_ssize_t SimpleSet_Size(object self) except -1
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key)
=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx 2009-10-07 21:17:47 +0000
+++ b/bzrlib/_simple_set_pyx.pyx 2009-10-08 04:40:16 +0000
@@ -38,6 +38,7 @@
void PyMem_Free(void *)
void memset(void *, int, size_t)
+
cdef object _dummy_obj
cdef PyObject *_dummy
_dummy_obj = object()
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-10-07 15:27:24 +0000
+++ b/bzrlib/index.py 2009-10-08 04:35:01 +0000
@@ -40,7 +40,6 @@
debug,
errors,
)
-from bzrlib._static_tuple_c import StaticTuple
_HEADER_READV = (0, 200)
_OPTION_KEY_ELEMENTS = "key_elements="
@@ -103,7 +102,7 @@
def _check_key(self, key):
"""Raise BadIndexKey if key is not a valid key for this index."""
- if type(key) not in (tuple, StaticTuple):
+ if type(key) != tuple:
raise errors.BadIndexKey(key)
if self._key_length != len(key):
raise errors.BadIndexKey(key)
=== modified file 'bzrlib/tests/test__simple_set.py'
--- a/bzrlib/tests/test__simple_set.py 2009-10-07 19:31:39 +0000
+++ b/bzrlib/tests/test__simple_set.py 2009-10-08 04:35:01 +0000
@@ -24,39 +24,32 @@
tests,
)
-from bzrlib.tests import (
- test__static_tuple,
- )
-try:
- from bzrlib import _simple_set_pyx as _module
-except ImportError:
- _module = None
-try:
- from bzrlib._static_tuple_c import StaticTuple
-except ImportError:
- from bzrlib._static_tuple_py import StaticTuple
+try:
+ from bzrlib import _simple_set_pyx
+except ImportError:
+ _simple_set_pyx = None
# Even though this is an extension, we don't permute the tests for a python
# version. As the plain python version is just a dict or set
-class _CompiledStaticTupleInterned(tests.Feature):
+class _CompiledSimpleSet(tests.Feature):
def _probe(self):
- if _module is None:
+ if _simple_set_pyx is None:
return False
return True
def feature_name(self):
return 'bzrlib._simple_set_pyx'
-CompiledStaticTupleInterned = _CompiledStaticTupleInterned()
-
-
-class TestStaticTupleInterned(tests.TestCase):
-
- _test_needs_features = [CompiledStaticTupleInterned,
- test__static_tuple.CompiledStaticTuple]
+CompiledSimpleSet = _CompiledSimpleSet()
+
+
+class TestSimpleSet(tests.TestCase):
+
+ _test_needs_features = [CompiledSimpleSet]
+ module = _simple_set_pyx
def assertIn(self, obj, container):
self.assertTrue(obj in container,
@@ -76,17 +69,15 @@
assertRefcount actually creates a new pointer, as does calling
sys.getrefcount. So pass the expected value *before* the call.
"""
- # I don't understand why it is count+3 here, but it seems to be
- # correct. If I check in the calling function, with:
- # self.assertEqual(count+1, sys.getrefcount(obj))
- # Then it works fine. Something about passing it to assertRefcount is
- # actually double-incrementing (and decrementing) the refcount
- self.assertEqual(count+3, sys.getrefcount(obj))
+ # I'm not sure why the offset is 3, but I've check that in the caller,
+ # an offset of 1 works, which is expected. Not sure why assertRefcount
+ # is incrementing/decrementing 2 times
+ self.assertEqual(count, sys.getrefcount(obj)-3)
def test_initial(self):
- obj = _module.SimpleSet()
+ obj = self.module.SimpleSet()
self.assertEqual(0, len(obj))
- st = StaticTuple('foo', 'bar')
+ st = ('foo', 'bar')
self.assertFillState(0, 0, 0x3ff, obj)
def test__lookup(self):
@@ -98,23 +89,23 @@
# ('a', 'a'), ('f', '4'), ('p', 'r'), ('q', '1'), ('F', 'T'),
# ('Q', 'Q'), ('V', 'd'), ('7', 'C')
# all collide @ 643
- obj = _module.SimpleSet()
- offset, val = obj._test_lookup(StaticTuple('a', 'a'))
- self.assertEqual(643, offset)
- self.assertEqual('<null>', val)
- offset, val = obj._test_lookup(StaticTuple('f', '4'))
- self.assertEqual(643, offset)
- self.assertEqual('<null>', val)
- offset, val = obj._test_lookup(StaticTuple('p', 'r'))
+ obj = self.module.SimpleSet()
+ offset, val = obj._test_lookup(('a', 'a'))
+ self.assertEqual(643, offset)
+ self.assertEqual('<null>', val)
+ offset, val = obj._test_lookup(('f', '4'))
+ self.assertEqual(643, offset)
+ self.assertEqual('<null>', val)
+ offset, val = obj._test_lookup(('p', 'r'))
self.assertEqual(643, offset)
self.assertEqual('<null>', val)
def test_get_set_del_with_collisions(self):
- obj = _module.SimpleSet()
- k1 = StaticTuple('a', 'a')
- k2 = StaticTuple('f', '4') # collides
- k3 = StaticTuple('p', 'r')
- k4 = StaticTuple('q', '1')
+ obj = self.module.SimpleSet()
+ k1 = ('a', 'a')
+ k2 = ('f', '4') # collides
+ k3 = ('p', 'r')
+ k4 = ('q', '1')
self.assertEqual((643, '<null>'), obj._test_lookup(k1))
self.assertEqual((643, '<null>'), obj._test_lookup(k2))
self.assertEqual((643, '<null>'), obj._test_lookup(k3))
@@ -160,9 +151,12 @@
self.assertNotIn(k4, obj)
def test_add(self):
- obj = _module.SimpleSet()
+ obj = self.module.SimpleSet()
self.assertFillState(0, 0, 0x3ff, obj)
- k1 = StaticTuple('foo')
+ # We use this clumsy notation, because otherwise the refcounts are off.
+ # I'm guessing the python compiler sees it is a static tuple, and adds
+ # it to the function variables, or somesuch
+ k1 = tuple(['foo'])
self.assertRefcount(1, k1)
self.assertIs(k1, obj.add(k1))
self.assertFillState(1, 1, 0x3ff, obj)
@@ -172,7 +166,7 @@
self.assertIs(k1, ktest)
del ktest
self.assertRefcount(2, k1)
- k2 = StaticTuple('foo')
+ k2 = tuple(['foo'])
self.assertRefcount(1, k2)
self.assertIsNot(k1, k2)
# doesn't add anything, so the counters shouldn't be adjusted
@@ -188,7 +182,7 @@
del obj[k1]
self.assertFillState(0, 1, 0x3ff, obj)
self.assertRefcount(1, k1)
- k3 = StaticTuple('bar')
+ k3 = tuple(['bar'])
self.assertRefcount(1, k3)
self.assertIs(k3, obj.add(k3))
self.assertFillState(1, 2, 0x3ff, obj)
@@ -200,10 +194,10 @@
self.assertRefcount(2, k3)
def test_discard(self):
- obj = _module.SimpleSet()
- k1 = StaticTuple('foo')
- k2 = StaticTuple('foo')
- k3 = StaticTuple('bar')
+ obj = self.module.SimpleSet()
+ k1 = tuple(['foo'])
+ k2 = tuple(['foo'])
+ k3 = tuple(['bar'])
self.assertRefcount(1, k1)
self.assertRefcount(1, k2)
self.assertRefcount(1, k3)
@@ -217,10 +211,10 @@
self.assertRefcount(1, k3)
def test__delitem__(self):
- obj = _module.SimpleSet()
- k1 = StaticTuple('foo')
- k2 = StaticTuple('foo')
- k3 = StaticTuple('bar')
+ obj = self.module.SimpleSet()
+ k1 = tuple(['foo'])
+ k2 = tuple(['foo'])
+ k3 = tuple(['bar'])
self.assertRefcount(1, k1)
self.assertRefcount(1, k2)
self.assertRefcount(1, k3)
@@ -234,10 +228,10 @@
self.assertRefcount(1, k3)
def test__resize(self):
- obj = _module.SimpleSet()
- k1 = StaticTuple('foo')
- k2 = StaticTuple('bar')
- k3 = StaticTuple('baz')
+ obj = self.module.SimpleSet()
+ k1 = ('foo',)
+ k2 = ('bar',)
+ k3 = ('baz',)
obj.add(k1)
obj.add(k2)
obj.add(k3)
@@ -264,18 +258,18 @@
self.assertEqual((591, '<null>'), obj._test_lookup(k2))
def test_add_and_remove_lots_of_items(self):
- obj = _module.SimpleSet()
+ obj = self.module.SimpleSet()
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
for i in chars:
for j in chars:
- k = StaticTuple(i, j)
+ k = (i, j)
obj.add(k)
num = len(chars)*len(chars)
self.assertFillState(num, num, 0x1fff, obj)
# Now delete all of the entries and it should shrink again
for i in chars:
for j in chars:
- k = StaticTuple(i, j)
+ k = (i, j)
obj.discard(k)
# It should be back to 1024 wide mask, though there may still be some
# dummy values in there
@@ -284,10 +278,10 @@
self.assertTrue(obj.fill < 1024 / 5)
def test__iter__(self):
- obj = _module.SimpleSet()
- k1 = StaticTuple('1')
- k2 = StaticTuple('1', '2')
- k3 = StaticTuple('3', '4')
+ obj = self.module.SimpleSet()
+ k1 = ('1',)
+ k2 = ('1', '2')
+ k3 = ('3', '4')
obj.add(k1)
obj.add(k2)
obj.add(k3)
@@ -297,7 +291,7 @@
self.assertEqual(sorted([k1, k2, k3]), sorted(all))
iterator = iter(obj)
iterator.next()
- obj.add(StaticTuple('foo'))
+ obj.add(('foo',))
# Set changed size
self.assertRaises(RuntimeError, iterator.next)
# And even removing an item still causes it to fail
=== modified file 'setup.py'
--- a/setup.py 2009-10-07 15:48:32 +0000
+++ b/setup.py 2009-10-08 14:59:54 +0000
@@ -265,6 +265,7 @@
add_pyrex_extension('bzrlib._annotator_pyx')
add_pyrex_extension('bzrlib._bencode_pyx')
+add_pyrex_extension('bzrlib._btree_serializer_pyx')
add_pyrex_extension('bzrlib._chunks_to_lines_pyx')
add_pyrex_extension('bzrlib._groupcompress_pyx',
extra_source=['bzrlib/diff-delta.c'])
@@ -297,8 +298,6 @@
add_pyrex_extension('bzrlib._simple_set_pyx')
ext_modules.append(Extension('bzrlib._static_tuple_c',
['bzrlib/_static_tuple_c.c']))
-add_pyrex_extension('bzrlib._btree_serializer_pyx')
-
if unavailable_files:
print 'C extension(s) not found:'
More information about the bazaar-commits
mailing list