Rev 4742: Implement resizing. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Fri Oct 2 20:59:57 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4742
revision-id: john at arbash-meinel.com-20091002195951-y2w0a7y2c5i2vl7d
parent: john at arbash-meinel.com-20091002191539-8hhe3na35zc10ff0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Fri 2009-10-02 14:59:51 -0500
message:
Implement resizing.
It can resize up or down, but right now we never shrink.
-------------- next part --------------
=== modified file 'bzrlib/_static_tuple_interned_pyx.pxd'
--- a/bzrlib/_static_tuple_interned_pyx.pxd 2009-10-02 19:15:39 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pxd 2009-10-02 19:59:51 +0000
@@ -36,3 +36,5 @@
cdef PyObject *_get(self, object key) except? NULL
cpdef object add(self, key)
cpdef int discard(self, key) except -1
+ cdef int _insert_clean(self, PyObject *key) except -1
+ cpdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
=== modified file 'bzrlib/_static_tuple_interned_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx 2009-10-02 19:15:39 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pyx 2009-10-02 19:59:51 +0000
@@ -100,7 +100,8 @@
self.fill = 0
n_bytes = sizeof(PyObject*) * size;
self.table = <PyObject **>PyMem_Malloc(n_bytes)
- # TODO: Raise MemoryError if malloc fails
+ if self.table == NULL:
+ raise MemoryError()
memset(self.table, 0, n_bytes)
def __dealloc__(self):
@@ -155,6 +156,88 @@
# assert key == value
# self._add(key)
+ cdef int _insert_clean(self, PyObject *key) except -1:
+ """Insert a key into self.table.
+
+ This is only meant to be used during times like '_resize',
+ as it makes a lot of assuptions about keys not already being present,
+ and there being no dummy entries.
+ """
+ cdef size_t i, perturb, mask
+ cdef long the_hash
+ cdef PyObject **table, **entry
+
+ mask = self.mask
+ table = self.table
+
+ the_hash = Py_TYPE(key).tp_hash(key)
+ i = the_hash & mask
+ entry = &table[i]
+ perturb = the_hash
+ # Because we know that we made all items unique before, we can just
+ # iterate as long as the target location is not empty, we don't have to
+ # do any comparison, etc.
+ while entry[0] != NULL:
+ i = (i << 2) + i + perturb + 1
+ entry = &table[i & mask]
+ perturb >>= PERTURB_SHIFT
+ entry[0] = key
+ self.fill += 1
+ self.used += 1
+
+ cpdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
+ """Resize the internal table.
+
+ The final table will be big enough to hold at least min_used entries.
+ We will copy the data from the existing table over, leaving out dummy
+ entries.
+
+ :return: The new size of the internal table
+ """
+ cdef Py_ssize_t new_size, n_bytes, remaining
+ cdef PyObject **new_table, **old_table, **entry
+
+ new_size = DEFAULT_SIZE
+ while new_size <= min_used and new_size > 0:
+ new_size = new_size << 1
+ # We rolled over our signed size field
+ if new_size <= 0:
+ raise MemoryError()
+ if new_size == (self.mask + 1):
+ # Nothing to do
+ return new_size
+ # TODO: Test this
+ # if new_size < self.used:
+ # raise RuntimeError('cannot shrink StaticTupleInterner to something'
+ # ' smaller than the number of used slots.')
+ n_bytes = sizeof(PyObject*) * new_size;
+ new_table = <PyObject **>PyMem_Malloc(n_bytes)
+ if new_table == NULL:
+ raise MemoryError()
+
+ old_table = self.table
+ self.table = new_table
+ memset(self.table, 0, n_bytes)
+ self.mask = new_size - 1
+ self.used = 0
+ remaining = self.fill
+ self.fill = 0
+
+ # Moving everything to the other table is refcount neutral, so we don't
+ # worry about it.
+ entry = old_table
+ while remaining > 0:
+ if entry[0] == NULL: # unused slot
+ pass
+ elif entry[0] == _dummy: # dummy slot
+ remaining -= 1
+ else: # active slot
+ remaining -= 1
+ self._insert_clean(entry[0])
+ entry += 1
+ PyMem_Free(old_table)
+ return new_size
+
cpdef object add(self, key):
"""Similar to set.add(), start tracking this key.
@@ -163,7 +246,10 @@
dict.setdefault() functionality.)
"""
cdef PyObject **slot, *py_key
+ cdef int added = 0
+ # We need at least one empty slot
+ assert self.used < self.mask
slot = _lookup(self, key)
py_key = <PyObject *>key
if (slot[0] == NULL):
@@ -171,13 +257,23 @@
self.fill += 1
self.used += 1
slot[0] = py_key
+ added = 1
elif (slot[0] == _dummy):
Py_INCREF(py_key)
self.used += 1
slot[0] = py_key
+ added = 1
# No else: clause. If _lookup returns a pointer to
# a live object, then we already have a value at this location.
- return <object>(slot[0])
+ retval = <object>(slot[0])
+ # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
+ if added and (self.fill * 3) >= ((self.mask + 1) * 2):
+ # However, we always work for a load factor of 2:1
+ self._resize(self.used * 2)
+ # Even if we resized and ended up moving retval into a different slot,
+ # it is still the value that is held at the slot equivalent to 'key',
+ # so we can still return it
+ return retval
cpdef int discard(self, key) except -1:
"""Remove key from the dict, whether it exists or not.
=== modified file 'bzrlib/tests/test__static_tuple_interned.py'
--- a/bzrlib/tests/test__static_tuple_interned.py 2009-10-02 15:51:54 +0000
+++ b/bzrlib/tests/test__static_tuple_interned.py 2009-10-02 19:59:51 +0000
@@ -233,3 +233,39 @@
self.assertRefcount(2, k3)
del obj[k3]
self.assertRefcount(1, k3)
+
+ def test__resize(self):
+ obj = _module.StaticTupleInterner()
+ k1 = StaticTuple('foo')
+ k2 = StaticTuple('bar')
+ k3 = StaticTuple('baz')
+ obj.add(k1)
+ obj.add(k2)
+ obj.add(k3)
+ del obj[k2]
+ self.assertFillState(2, 3, 0x3ff, obj)
+ self.assertEqual(1024, obj._resize(500))
+ self.assertFillState(2, 3, 0x3ff, obj)
+ self.assertEqual(4096, obj._resize(4095))
+ self.assertFillState(2, 2, 0xfff, obj)
+ self.assertIn(k1, obj)
+ self.assertNotIn(k2, obj)
+ self.assertIn(k3, obj)
+ obj.add(k2)
+ self.assertIn(k2, obj)
+ del obj[k2]
+ self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
+ self.assertFillState(2, 3, 0xfff, obj)
+ self.assertEqual(2048, obj._resize(1024))
+ self.assertFillState(2, 2, 0x7ff, obj)
+ self.assertEqual((591, '<null>'), obj._test_lookup(k2))
+
+ def test_add_lots_of_items(self):
+ obj = _module.StaticTupleInterner()
+ chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
+ for i in chars:
+ for j in chars:
+ k = StaticTuple(i, j)
+ obj.add(k)
+ num = len(chars)*len(chars)
+ self.assertFillState(num, num, 0x1fff, obj)
More information about the bazaar-commits
mailing list