Rev 4739: Start working on more of the C api for StaticTupleInterner. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Fri Oct 2 16:52:00 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4739
revision-id: john at arbash-meinel.com-20091002155154-ouf4jb02y0wxfklv
parent: john at arbash-meinel.com-20091002155103-6wgtx9aosbx99m77
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Fri 2009-10-02 10:51:54 -0500
message:
  Start working on more of the C api for StaticTupleInterner.
  
  It is really nice to have the except -1 clauses, as suddenly you don't have
  to manually check the return values of all of your functions. \o/
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore	2009-10-02 05:41:24 +0000
+++ b/.bzrignore	2009-10-02 15:51:54 +0000
@@ -52,6 +52,8 @@
 bzrlib/_readdir_pyx.c
 bzrlib/_rio_pyx.c
 bzrlib/_static_tuple_interned_pyx.c
+bzrlib/_static_tuple_interned_pyx.h
+bzrlib/_static_tuple_interned_pyx_api.h
 bzrlib/_walkdirs_win32.c
 doc/en/release-notes/NEWS.txt
 doc/en/developer-guide/HACKING.txt

=== modified file 'bzrlib/_static_tuple_interned_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 05:41:24 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 15:51:54 +0000
@@ -56,6 +56,10 @@
     if res == Py_True:
         Py_DECREF(res)
         return 1
+    # Only handled for now because we are testing with stuff like tuples versus
+    # StaticTuple objects. If we decide to limit StaticTupleInterner to
+    # strictly only allowing StaticTuple objects, then this is no longer
+    # required, and Py_NotImplemented => not equal
     if res == Py_NotImplemented:
         Py_DECREF(res)
         res = other.ob_type.tp_richcompare(other, this, Py_EQ)
@@ -66,7 +70,8 @@
     return 0
 
 
-cdef class StaticTupleInterner:
+cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
+                                           type StaticTupleInterner_type]:
     """This class tracks the canonical forms for StaticTuples.
 
     It is similar in function to the interned dictionary that is used by
@@ -111,60 +116,10 @@
     def __len__(self):
         return self.used
 
-    cdef PyObject **_lookup(self, key, long hash) except NULL:
-        """Find the slot where 'key' would fit.
-
-        This is the same as a dicts 'lookup' function
-
-        :param key: An object we are looking up
-        :param hash: The hash for key
-        :return: The location in self.table where key should be put
-            should never be NULL, but may reference a NULL (PyObject*)
-        """
-        cdef size_t i, perturb
-        cdef Py_ssize_t mask
-        cdef long this_hash
-        cdef PyObject **table, **cur, **free_slot, *py_key
-
-        mask = self.mask
-        table = self.table
-        i = hash & mask
-        cur = &table[i]
-        py_key = <PyObject *>key
-        if cur[0] == NULL:
-            # Found a blank spot, or found the exact key
-            return cur
-        if cur[0] == py_key:
-            return cur
-        if cur[0] == _dummy:
-            free_slot = cur
-        else:
-            if _is_equal(py_key, hash, cur[0]):
-                # Both py_key and cur[0] belong in this slot, return it
-                return cur
-            free_slot = NULL
-        # size_t is unsigned, hash is signed...
-        perturb = hash
-        while True:
-            i = (i << 2) + i + perturb + 1
-            cur = &table[i & mask]
-            if cur[0] == NULL: # Found an empty spot
-                if free_slot: # Did we find a _dummy earlier?
-                    return free_slot
-                else:
-                    return cur
-            if (cur[0] == py_key # exact match
-                or _is_equal(py_key, hash, cur[0])): # Equivalent match
-                return cur
-            if (cur[0] == _dummy and free_slot == NULL):
-                free_slot = cur
-            perturb >>= PERTURB_SHIFT
-        raise AssertionError('should never get here')
-
     def _test_lookup(self, key):
         cdef PyObject **slot
 
-        slot = self._lookup(key, hash(key))
+        slot = _StaticTupleInterner_Lookup(self, key, hash(key))
         if slot[0] == NULL:
             res = '<null>'
         elif slot[0] == _dummy:
@@ -176,37 +131,147 @@
     def __contains__(self, key):
         cdef PyObject **slot
 
-        slot = self._lookup(key, hash(key))
+        slot = _StaticTupleInterner_Lookup(self, key, hash(key))
         if slot[0] == NULL or slot[0] == _dummy:
             return False
         return True
 
     def __getitem__(self, key):
+        """Return a stored item that is equivalent to key."""
         cdef PyObject **slot
-        slot = self._lookup(key, hash(key))
+        slot = _StaticTupleInterner_Lookup(self, key, hash(key))
         if slot[0] == NULL or slot[0] == _dummy:
             raise KeyError("Key %s is not present" % key)
         val = <object>(slot[0])
         return val
 
-    def __setitem__(self, key, value):
-        cdef PyObject **slot, *py_key
-        assert key == value
-
-        slot = self._lookup(key, hash(key))
-        if (slot[0] == NULL or slot[0] == _dummy):
-            py_key = <PyObject *>key
-            Py_INCREF(py_key)
-            slot[0] = py_key
+    # def __setitem__(self, key, value):
+    #     assert key == value
+    #     self._add(key)
+
+    def add(self, key):
+        """Similar to set.add(), start tracking this key.
+        
+        There is one small difference, which is that we return the object that
+        is stored at the given location. (which is closer to the
+        dict.setdefault() functionality.)
+        """
+        return StaticTupleInterner_Add(self, key)
+
+    def discard(self, key):
+        """Remove key from the dict, whether it exists or not.
+
+        :return: 0 if the item did not exist, 1 if it did
+        """
+        return StaticTupleInterner_Discard(self, key)
+
 
     def __delitem__(self, key):
-        cdef PyObject **slot, *py_key
-
-        slot = self._lookup(key, hash(key))
-        if (slot[0] == NULL or slot[0] == _dummy):
-            pass # Raise KeyError
-            return
-        # Found it
-        # TODO: Check refcounts
-        Py_DECREF(slot[0])
-        slot[0] = _dummy
+        """Remove the given item from the dict.
+
+        Raise a KeyError if the key was not present.
+        """
+        cdef int exists
+        exists = StaticTupleInterner_Discard(self, key)
+        if not exists:
+            raise KeyError('Key %s not present' % (key,))
+
+
+cdef api inline PyObject **_StaticTupleInterner_Lookup(
+            StaticTupleInterner self, key, long hash) except NULL:
+    """Find the slot where 'key' would fit.
+
+    This is the same as a dicts 'lookup' function.
+
+    :param key: An object we are looking up
+    :param hash: The hash for key
+    :return: The location in self.table where key should be put
+        should never be NULL, but may reference a NULL (PyObject*)
+    """
+    cdef size_t i, perturb
+    cdef Py_ssize_t mask
+    cdef long this_hash
+    cdef PyObject **table, **cur, **free_slot, *py_key
+
+    mask = self.mask
+    table = self.table
+    i = hash & mask
+    cur = &table[i]
+    py_key = <PyObject *>key
+    if cur[0] == NULL:
+        # Found a blank spot, or found the exact key
+        return cur
+    if cur[0] == py_key:
+        return cur
+    if cur[0] == _dummy:
+        free_slot = cur
+    else:
+        if _is_equal(py_key, hash, cur[0]):
+            # Both py_key and cur[0] belong in this slot, return it
+            return cur
+        free_slot = NULL
+    # size_t is unsigned, hash is signed...
+    perturb = hash
+    while True:
+        i = (i << 2) + i + perturb + 1
+        cur = &table[i & mask]
+        if cur[0] == NULL: # Found an empty spot
+            if free_slot: # Did we find a _dummy earlier?
+                return free_slot
+            else:
+                return cur
+        if (cur[0] == py_key # exact match
+            or _is_equal(py_key, hash, cur[0])): # Equivalent match
+            return cur
+        if (cur[0] == _dummy and free_slot == NULL):
+            free_slot = cur
+        perturb >>= PERTURB_SHIFT
+    raise AssertionError('should never get here')
+
+
+cdef api object StaticTupleInterner_Add(StaticTupleInterner self, object key):
+    """Add a key to the StaticTupleInterner (set).
+
+    :param self: The StaticTupleInterner to add the key to.
+    :param key: The key to be added. If the key is already present,
+        self will not be modified
+    :return: The current key stored at the location defined by 'key'.
+        This may be the same object, or it may be an equivalent object.
+        (consider dict.setdefault(key, key))
+    """
+    cdef PyObject **slot, *py_key
+
+    slot = _StaticTupleInterner_Lookup(self, key, hash(key))
+    py_key = <PyObject *>key
+    if (slot[0] == NULL):
+        Py_INCREF(py_key)
+        self.fill += 1
+        self.used += 1
+        slot[0] = py_key
+    elif (slot[0] == _dummy):
+        Py_INCREF(py_key)
+        self.used += 1
+        slot[0] = py_key
+    # No else: clause. If _StaticTupleInterner_Lookup returns a pointer to
+    # a live object, then we already have a value at this location.
+    return <object>(slot[0])
+
+
+cdef api int StaticTupleInterner_Discard(StaticTupleInterner self,
+                                         object key) except -1:
+    """Remove the object referenced at location 'key'.
+
+    :param self: The StaticTupleInterner being modified
+    :param key: The key we are checking on
+    :return: 1 if there was an object present, 0 if there was not, and -1 on
+        error.
+    """
+    cdef PyObject **slot, *py_key
+
+    slot = _StaticTupleInterner_Lookup(self, key, hash(key))
+    if slot[0] == NULL or slot[0] == _dummy:
+        return 0
+    self.used -= 1
+    Py_DECREF(slot[0])
+    slot[0] = _dummy
+    return 1

=== modified file 'bzrlib/tests/test__static_tuple_interned.py'
--- a/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 05:41:24 +0000
+++ b/bzrlib/tests/test__static_tuple_interned.py	2009-10-02 15:51:54 +0000
@@ -16,6 +16,8 @@
 
 """Tests for the StaticTupleInterned type."""
 
+import sys
+
 from bzrlib import (
     # _static_tuple_py,
     errors,
@@ -65,13 +67,28 @@
         self.assertTrue(obj not in container,
             'We found %s in %s' % (obj, container))
 
+    def assertFillState(self, used, fill, mask, obj):
+        self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
+
+    def assertRefcount(self, count, obj):
+        """Assert that the refcount for obj is what we expect.
+
+        Note that this automatically adjusts for the fact that calling
+        assertRefcount actually creates a new pointer, as does calling
+        sys.getrefcount. So pass the expected value *before* the call.
+        """
+        # I don't understand why it is count+3 here, but it seems to be
+        # correct. If I check in the calling function, with:
+        # self.assertEqual(count+1, sys.getrefcount(obj))
+        # Then it works fine. Something about passing it to assertRefcount is
+        # actually double-incrementing (and decrementing) the refcount
+        self.assertEqual(count+3, sys.getrefcount(obj))
+
     def test_initial(self):
         obj = _module.StaticTupleInterner()
         self.assertEqual(0, len(obj))
         st = StaticTuple('foo', 'bar')
-        self.assertEqual(0, obj.used)
-        self.assertEqual(0, obj.fill)
-        self.assertEqual(0x3ff, obj.mask) # 1024 - 1
+        self.assertFillState(0, 0, 0x3ff, obj)
 
     def test__lookup(self):
         # The tuple hash function is rather good at entropy. For all integers
@@ -103,7 +120,7 @@
         self.assertEqual((643, '<null>'), obj._test_lookup(k2))
         self.assertEqual((643, '<null>'), obj._test_lookup(k3))
         self.assertEqual((643, '<null>'), obj._test_lookup(k4))
-        obj[k1] = k1
+        obj.add(k1)
         self.assertIn(k1, obj)
         self.assertNotIn(k2, obj)
         self.assertNotIn(k3, obj)
@@ -113,7 +130,7 @@
         self.assertEqual((787, '<null>'), obj._test_lookup(k3))
         self.assertEqual((787, '<null>'), obj._test_lookup(k4))
         self.assertIs(k1, obj[k1])
-        obj[k2] = k2
+        obj.add(k2)
         self.assertIs(k2, obj[k2])
         self.assertEqual((643, k1), obj._test_lookup(k1))
         self.assertEqual((787, k2), obj._test_lookup(k2))
@@ -126,7 +143,7 @@
         self.assertEqual((787, k2), obj._test_lookup(('f', '4')))
         self.assertEqual((660, '<null>'), obj._test_lookup(('p', 'r')))
         self.assertEqual((180, '<null>'), obj._test_lookup(('q', '1')))
-        obj[k3] = k3
+        obj.add(k3)
         self.assertIs(k3, obj[k3])
         self.assertIn(k1, obj)
         self.assertIn(k2, obj)
@@ -142,3 +159,77 @@
         self.assertIn(k2, obj)
         self.assertIn(k3, obj)
         self.assertNotIn(k4, obj)
+
+    def test_add(self):
+        obj = _module.StaticTupleInterner()
+        self.assertFillState(0, 0, 0x3ff, obj)
+        k1 = StaticTuple('foo')
+        self.assertRefcount(1, k1)
+        self.assertIs(k1, obj.add(k1))
+        self.assertFillState(1, 1, 0x3ff, obj)
+        self.assertRefcount(2, k1)
+        ktest = obj[k1]
+        self.assertRefcount(3, k1)
+        self.assertIs(k1, ktest)
+        del ktest
+        self.assertRefcount(2, k1)
+        k2 = StaticTuple('foo')
+        self.assertRefcount(1, k2)
+        self.assertIsNot(k1, k2)
+        # doesn't add anything, so the counters shouldn't be adjusted
+        self.assertIs(k1, obj.add(k2))
+        self.assertFillState(1, 1, 0x3ff, obj)
+        self.assertRefcount(2, k1) # not changed
+        self.assertRefcount(1, k2) # not incremented
+        self.assertIs(k1, obj[k1])
+        self.assertIs(k1, obj[k2])
+        self.assertRefcount(2, k1)
+        self.assertRefcount(1, k2)
+        # Deleting an entry should remove the fill, but not the used
+        del obj[k1]
+        self.assertFillState(0, 1, 0x3ff, obj)
+        self.assertRefcount(1, k1)
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k3)
+        self.assertIs(k3, obj.add(k3))
+        self.assertFillState(1, 2, 0x3ff, obj)
+        self.assertRefcount(2, k3)
+        self.assertIs(k2, obj.add(k2))
+        self.assertFillState(2, 2, 0x3ff, obj)
+        self.assertRefcount(1, k1)
+        self.assertRefcount(2, k2)
+        self.assertRefcount(2, k3)
+
+    def test_discard(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('foo')
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k1)
+        self.assertRefcount(1, k2)
+        self.assertRefcount(1, k3)
+        obj.add(k1)
+        self.assertRefcount(2, k1)
+        self.assertEqual(0, obj.discard(k3))
+        self.assertRefcount(1, k3)
+        obj.add(k3)
+        self.assertRefcount(2, k3)
+        self.assertEqual(1, obj.discard(k3))
+        self.assertRefcount(1, k3)
+
+    def test__delitem__(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('foo')
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k1)
+        self.assertRefcount(1, k2)
+        self.assertRefcount(1, k3)
+        obj.add(k1)
+        self.assertRefcount(2, k1)
+        self.assertRaises(KeyError, obj.__delitem__, k3)
+        self.assertRefcount(1, k3)
+        obj.add(k3)
+        self.assertRefcount(2, k3)
+        del obj[k3]
+        self.assertRefcount(1, k3)



More information about the bazaar-commits mailing list