Rev 4743: Add functionality for shrinking the table. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Fri Oct 2 21:15:58 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4743
revision-id: john at arbash-meinel.com-20091002201552-tzrgn1g92ujxt3wu
parent: john at arbash-meinel.com-20091002195951-y2w0a7y2c5i2vl7d
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Fri 2009-10-02 15:15:52 -0500
message:
  Add functionality for shrinking the table.
-------------- next part --------------
=== modified file 'bzrlib/_static_tuple_interned_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 19:59:51 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 20:15:52 +0000
@@ -203,9 +203,9 @@
         # We rolled over our signed size field
         if new_size <= 0:
             raise MemoryError()
-        if new_size == (self.mask + 1):
-            # Nothing to do
-            return new_size
+        # Even if min_used == self.mask + 1, and we aren't changing the actual
+        # size, we will still run the algorithm so that dummy entries are
+        # removed
         # TODO: Test this
         # if new_size < self.used:
         #     raise RuntimeError('cannot shrink StaticTupleInterner to something'
@@ -288,9 +288,20 @@
         self.used -= 1
         Py_DECREF(slot[0])
         slot[0] = _dummy
+        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
+        #                           them away
+        #   if ((so->fill - so->used) * 5 < so->mask)
+        # However, we are planning on using this as an interning structure, in
+        # which we will be putting a lot of objects. And we expect that large
+        # groups of them are going to have the same lifetime.
+        # Dummy entries hurt a little bit because they cause the lookup to keep
+        # searching, but resizing is also rather expensive
+        # For now, we'll just use their algorithm, but we may want to revisit
+        # it
+        if ((self.fill - self.used) * 5 > self.mask):
+            self._resize(self.used * 2)
         return 1
 
-
     def __delitem__(self, key):
         """Remove the given item from the dict.
 

=== modified file 'bzrlib/tests/test__static_tuple_interned.py'
--- a/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 19:59:51 +0000
+++ b/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 20:15:52 +0000
@@ -245,22 +245,26 @@
         del obj[k2]
         self.assertFillState(2, 3, 0x3ff, obj)
         self.assertEqual(1024, obj._resize(500))
+        # Doesn't change the size, but does change the content
+        self.assertFillState(2, 2, 0x3ff, obj)
+        obj.add(k2)
+        del obj[k3]
         self.assertFillState(2, 3, 0x3ff, obj)
         self.assertEqual(4096, obj._resize(4095))
         self.assertFillState(2, 2, 0xfff, obj)
         self.assertIn(k1, obj)
-        self.assertNotIn(k2, obj)
-        self.assertIn(k3, obj)
+        self.assertIn(k2, obj)
+        self.assertNotIn(k3, obj)
         obj.add(k2)
         self.assertIn(k2, obj)
         del obj[k2]
         self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
-        self.assertFillState(2, 3, 0xfff, obj)
+        self.assertFillState(1, 2, 0xfff, obj)
         self.assertEqual(2048, obj._resize(1024))
-        self.assertFillState(2, 2, 0x7ff, obj)
+        self.assertFillState(1, 1, 0x7ff, obj)
         self.assertEqual((591, '<null>'), obj._test_lookup(k2))
 
-    def test_add_lots_of_items(self):
+    def test_add_and_remove_lots_of_items(self):
         obj = _module.StaticTupleInterner()
         chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
         for i in chars:
@@ -269,3 +273,13 @@
                 obj.add(k)
         num = len(chars)*len(chars)
         self.assertFillState(num, num, 0x1fff, obj)
+        # Now delete all of the entries and it should shrink again
+        for i in chars:
+            for j in chars:
+                k = StaticTuple(i, j)
+                obj.discard(k)
+        # It should be back to 1024 wide mask, though there may still be some
+        # dummy values in there
+        self.assertFillState(0, obj.fill, 0x3ff, obj)
+        # but there should be fewer than 1/5th dummy entries
+        self.assertTrue(obj.fill < 1024 / 5)



More information about the bazaar-commits mailing list