Rev 4742: Implement resizing. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Fri Oct 2 20:59:57 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4742
revision-id: john at arbash-meinel.com-20091002195951-y2w0a7y2c5i2vl7d
parent: john at arbash-meinel.com-20091002191539-8hhe3na35zc10ff0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Fri 2009-10-02 14:59:51 -0500
message:
  Implement resizing.
  
  It can resize up or down, but right now we never shrink.
-------------- next part --------------
=== modified file 'bzrlib/_static_tuple_interned_pyx.pxd'
--- a/bzrlib/_static_tuple_interned_pyx.pxd	2009-10-02 19:15:39 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pxd	2009-10-02 19:59:51 +0000
@@ -36,3 +36,5 @@
     cdef PyObject *_get(self, object key) except? NULL
     cpdef object add(self, key)
     cpdef int discard(self, key) except -1
+    cdef int _insert_clean(self, PyObject *key) except -1
+    cpdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1

=== modified file 'bzrlib/_static_tuple_interned_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 19:15:39 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 19:59:51 +0000
@@ -100,7 +100,8 @@
         self.fill = 0
         n_bytes = sizeof(PyObject*) * size;
         self.table = <PyObject **>PyMem_Malloc(n_bytes)
-        # TODO: Raise MemoryError if malloc fails
+        if self.table == NULL:
+            raise MemoryError()
         memset(self.table, 0, n_bytes)
 
     def __dealloc__(self):
@@ -155,6 +156,88 @@
     #     assert key == value
     #     self._add(key)
 
+    cdef int _insert_clean(self, PyObject *key) except -1:
+        """Insert a key into self.table.
+
+        This is only meant to be used during times like '_resize',
+        as it makes a lot of assuptions about keys not already being present,
+        and there being no dummy entries.
+        """
+        cdef size_t i, perturb, mask
+        cdef long the_hash
+        cdef PyObject **table, **entry
+
+        mask = self.mask
+        table = self.table
+
+        the_hash = Py_TYPE(key).tp_hash(key)
+        i = the_hash & mask
+        entry = &table[i]
+        perturb = the_hash
+        # Because we know that we made all items unique before, we can just
+        # iterate as long as the target location is not empty, we don't have to
+        # do any comparison, etc.
+        while entry[0] != NULL:
+            i = (i << 2) + i + perturb + 1
+            entry = &table[i & mask]
+            perturb >>= PERTURB_SHIFT
+        entry[0] = key
+        self.fill += 1
+        self.used += 1
+
+    cpdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
+        """Resize the internal table.
+
+        The final table will be big enough to hold at least min_used entries.
+        We will copy the data from the existing table over, leaving out dummy
+        entries.
+
+        :return: The new size of the internal table
+        """
+        cdef Py_ssize_t new_size, n_bytes, remaining
+        cdef PyObject **new_table, **old_table, **entry
+
+        new_size = DEFAULT_SIZE
+        while new_size <= min_used and new_size > 0:
+            new_size = new_size << 1
+        # We rolled over our signed size field
+        if new_size <= 0:
+            raise MemoryError()
+        if new_size == (self.mask + 1):
+            # Nothing to do
+            return new_size
+        # TODO: Test this
+        # if new_size < self.used:
+        #     raise RuntimeError('cannot shrink StaticTupleInterner to something'
+        #                        ' smaller than the number of used slots.')
+        n_bytes = sizeof(PyObject*) * new_size;
+        new_table = <PyObject **>PyMem_Malloc(n_bytes)
+        if new_table == NULL:
+            raise MemoryError()
+
+        old_table = self.table
+        self.table = new_table
+        memset(self.table, 0, n_bytes)
+        self.mask = new_size - 1
+        self.used = 0
+        remaining = self.fill
+        self.fill = 0
+
+        # Moving everything to the other table is refcount neutral, so we don't
+        # worry about it.
+        entry = old_table
+        while remaining > 0:
+            if entry[0] == NULL: # unused slot
+                pass 
+            elif entry[0] == _dummy: # dummy slot
+                remaining -= 1
+            else: # active slot
+                remaining -= 1
+                self._insert_clean(entry[0])
+            entry += 1
+        PyMem_Free(old_table)
+        return new_size
+
     cpdef object add(self, key):
         """Similar to set.add(), start tracking this key.
         
@@ -163,7 +246,10 @@
         dict.setdefault() functionality.)
         """
         cdef PyObject **slot, *py_key
+        cdef int added = 0
 
+        # We need at least one empty slot
+        assert self.used < self.mask
         slot = _lookup(self, key)
         py_key = <PyObject *>key
         if (slot[0] == NULL):
@@ -171,13 +257,23 @@
             self.fill += 1
             self.used += 1
             slot[0] = py_key
+            added = 1
         elif (slot[0] == _dummy):
             Py_INCREF(py_key)
             self.used += 1
             slot[0] = py_key
+            added = 1
         # No else: clause. If _lookup returns a pointer to
         # a live object, then we already have a value at this location.
-        return <object>(slot[0])
+        retval = <object>(slot[0])
+        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
+        if added and (self.fill * 3) >= ((self.mask + 1) * 2):
+            # However, we always work for a load factor of 2:1
+            self._resize(self.used * 2)
+        # Even if we resized and ended up moving retval into a different slot,
+        # it is still the value that is held at the slot equivalent to 'key',
+        # so we can still return it
+        return retval
 
     cpdef int discard(self, key) except -1:
         """Remove key from the dict, whether it exists or not.

=== modified file 'bzrlib/tests/test__static_tuple_interned.py'
--- a/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 15:51:54 +0000
+++ b/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 19:59:51 +0000
@@ -233,3 +233,39 @@
         self.assertRefcount(2, k3)
         del obj[k3]
         self.assertRefcount(1, k3)
+
+    def test__resize(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('bar')
+        k3 = StaticTuple('baz')
+        obj.add(k1)
+        obj.add(k2)
+        obj.add(k3)
+        del obj[k2]
+        self.assertFillState(2, 3, 0x3ff, obj)
+        self.assertEqual(1024, obj._resize(500))
+        self.assertFillState(2, 3, 0x3ff, obj)
+        self.assertEqual(4096, obj._resize(4095))
+        self.assertFillState(2, 2, 0xfff, obj)
+        self.assertIn(k1, obj)
+        self.assertNotIn(k2, obj)
+        self.assertIn(k3, obj)
+        obj.add(k2)
+        self.assertIn(k2, obj)
+        del obj[k2]
+        self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
+        self.assertFillState(2, 3, 0xfff, obj)
+        self.assertEqual(2048, obj._resize(1024))
+        self.assertFillState(2, 2, 0x7ff, obj)
+        self.assertEqual((591, '<null>'), obj._test_lookup(k2))
+
+    def test_add_lots_of_items(self):
+        obj = _module.StaticTupleInterner()
+        chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
+        for i in chars:
+            for j in chars:
+                k = StaticTuple(i, j)
+                obj.add(k)
+        num = len(chars)*len(chars)
+        self.assertFillState(num, num, 0x1fff, obj)



More information about the bazaar-commits mailing list