Rev 4760: Fix up _simple_set_pyx.pyx to be compatible with pyrex again. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Wed Oct 7 20:31:55 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4760
revision-id: john at arbash-meinel.com-20091007193139-5etl1h2f1jad4j13
parent: john at arbash-meinel.com-20091007185341-0080bnb4ru4v2jp6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 14:31:39 -0500
message:
  Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
  
  Surprisingly not as easy as I expected, but things seem to be working.
  Found an actual bug in the dealloc code...
-------------- next part --------------
=== modified file 'bzrlib/_simple_set_pyx.pxd'
--- a/bzrlib/_simple_set_pyx.pxd	2009-10-07 15:48:32 +0000
+++ b/bzrlib/_simple_set_pyx.pxd	2009-10-07 19:31:39 +0000
@@ -23,9 +23,6 @@
      eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
 """
 
-cdef extern from "python-compat.h":
-    ctypedef long Py_ssize_t
-
 cdef extern from "Python.h":
     ctypedef struct PyObject:
         pass
@@ -43,19 +40,20 @@
     operations (difference, intersection, etc).
     """
 
-    cdef readonly Py_ssize_t used    # active
-    cdef readonly Py_ssize_t fill    # active + dummy
-    cdef readonly Py_ssize_t mask    # Table contains (mask+1) slots, a power
+    cdef Py_ssize_t _used   # active
+    cdef Py_ssize_t _fill   # active + dummy
+    cdef Py_ssize_t _mask   # Table contains (mask+1) slots, a power
                                      # of 2
-    cdef PyObject **table   # Pyrex/Cython doesn't support arrays to 'object'
+    cdef PyObject **_table  # Pyrex/Cython doesn't support arrays to 'object'
                             # so we manage it manually
 
     cdef PyObject *_get(self, object key) except? NULL
-    cpdef object add(self, key)
-    cpdef int discard(self, key) except -1
+    cdef object _add(self, key)
+    cdef int _discard(self, key) except -1
     cdef int _insert_clean(self, PyObject *key) except -1
-    cpdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
+    cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
 
 # TODO: might want to export the C api here, though it is all available from
 #       the class object...
 cdef api object SimpleSet_Add(object self, object key)
+cdef api SimpleSet SimpleSet_New()

=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx	2009-10-07 15:48:32 +0000
+++ b/bzrlib/_simple_set_pyx.pyx	2009-10-07 19:31:39 +0000
@@ -41,7 +41,7 @@
 _dummy = <PyObject *>_dummy_obj
 
 
-cdef inline int _is_equal(PyObject *this, long this_hash, PyObject *other):
+cdef int _is_equal(PyObject *this, long this_hash, PyObject *other):
     cdef long other_hash
     cdef PyObject *res
 
@@ -86,22 +86,34 @@
         cdef Py_ssize_t size, n_bytes
 
         size = DEFAULT_SIZE
-        self.mask = size - 1
-        self.used = 0
-        self.fill = 0
+        self._mask = size - 1
+        self._used = 0
+        self._fill = 0
         n_bytes = sizeof(PyObject*) * size;
-        self.table = <PyObject **>PyMem_Malloc(n_bytes)
-        if self.table == NULL:
+        self._table = <PyObject **>PyMem_Malloc(n_bytes)
+        if self._table == NULL:
             raise MemoryError()
-        memset(self.table, 0, n_bytes)
+        memset(self._table, 0, n_bytes)
 
     def __dealloc__(self):
-        if self.table != NULL:
-            PyMem_Free(self.table)
-            self.table = NULL
+        if self._table != NULL:
+            PyMem_Free(self._table)
+            self._table = NULL
+
+    property used:
+        def __get__(self):
+            return self._used
+
+    property fill:
+        def __get__(self):
+            return self._fill
+
+    property mask:
+        def __get__(self):
+            return self._mask
 
     def __len__(self):
-        return self.used
+        return self._used
 
     def _test_lookup(self, key):
         cdef PyObject **slot
@@ -113,7 +125,7 @@
             res = '<dummy>'
         else:
             res = <object>slot[0]
-        return <int>(slot - self.table), res
+        return <int>(slot - self._table), res
 
     def __contains__(self, key):
         """Is key present in this SimpleSet."""
@@ -154,8 +166,8 @@
         cdef long the_hash
         cdef PyObject **table, **entry
 
-        mask = self.mask
-        table = self.table
+        mask = self._mask
+        table = self._table
 
         the_hash = Py_TYPE(key).tp_hash(key)
         i = the_hash & mask
@@ -169,10 +181,14 @@
             entry = &table[i & mask]
             perturb >>= PERTURB_SHIFT
         entry[0] = key
-        self.fill += 1
-        self.used += 1
-
-    cpdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
+        self._fill += 1
+        self._used += 1
+
+    def _py_resize(self, min_used):
+        """Do not use this directly, it is only exposed for testing."""
+        return self._resize(min_used)
+
+    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
         """Resize the internal table.
 
         The final table will be big enough to hold at least min_used entries.
@@ -190,11 +206,11 @@
         # We rolled over our signed size field
         if new_size <= 0:
             raise MemoryError()
-        # Even if min_used == self.mask + 1, and we aren't changing the actual
+        # Even if min_used == self._mask + 1, and we aren't changing the actual
         # size, we will still run the algorithm so that dummy entries are
         # removed
         # TODO: Test this
-        # if new_size < self.used:
+        # if new_size < self._used:
         #     raise RuntimeError('cannot shrink SimpleSet to something'
         #                        ' smaller than the number of used slots.')
         n_bytes = sizeof(PyObject*) * new_size;
@@ -202,13 +218,13 @@
         if new_table == NULL:
             raise MemoryError()
 
-        old_table = self.table
-        self.table = new_table
-        memset(self.table, 0, n_bytes)
-        self.mask = new_size - 1
-        self.used = 0
-        remaining = self.fill
-        self.fill = 0
+        old_table = self._table
+        self._table = new_table
+        memset(self._table, 0, n_bytes)
+        self._mask = new_size - 1
+        self._used = 0
+        remaining = self._fill
+        self._fill = 0
 
         # Moving everything to the other table is refcount neutral, so we don't
         # worry about it.
@@ -225,59 +241,66 @@
         PyMem_Free(old_table)
         return new_size
 
-    cpdef object add(self, key):
+    def add(self, key):
         """Similar to set.add(), start tracking this key.
         
         There is one small difference, which is that we return the object that
         is stored at the given location. (which is closer to the
         dict.setdefault() functionality.)
         """
+        return self._add(key)
+
+    cdef object _add(self, key):
         cdef PyObject **slot, *py_key
-        cdef int added = 0
+        cdef int added
 
+        added = 0
         # We need at least one empty slot
-        assert self.used < self.mask
+        assert self._used < self._mask
         slot = _lookup(self, key)
         py_key = <PyObject *>key
         if (slot[0] == NULL):
             Py_INCREF(py_key)
-            self.fill += 1
-            self.used += 1
+            self._fill += 1
+            self._used += 1
             slot[0] = py_key
             added = 1
         elif (slot[0] == _dummy):
             Py_INCREF(py_key)
-            self.used += 1
+            self._used += 1
             slot[0] = py_key
             added = 1
         # No else: clause. If _lookup returns a pointer to
         # a live object, then we already have a value at this location.
         retval = <object>(slot[0])
         # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
-        if added and (self.fill * 3) >= ((self.mask + 1) * 2):
+        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
             # However, we always work for a load factor of 2:1
-            self._resize(self.used * 2)
+            self._resize(self._used * 2)
         # Even if we resized and ended up moving retval into a different slot,
         # it is still the value that is held at the slot equivalent to 'key',
         # so we can still return it
         return retval
 
-    cpdef int discard(self, key) except -1:
+    def discard(self, key):
         """Remove key from the dict, whether it exists or not.
 
         :return: 0 if the item did not exist, 1 if it did
         """
+        return self._discard(key)
+
+    cdef int _discard(self, key) except -1:
         cdef PyObject **slot, *py_key
 
         slot = _lookup(self, key)
         if slot[0] == NULL or slot[0] == _dummy:
             return 0
-        self.used -= 1
+        self._used -= 1
         Py_DECREF(slot[0])
         slot[0] = _dummy
         # PySet uses the heuristic: If more than 1/5 are dummies, then resize
         #                           them away
-        #   if ((so->fill - so->used) * 5 < so->mask)
+        #   if ((so->_fill - so->_used) * 5 < so->mask)
         # However, we are planning on using this as an interning structure, in
         # which we will be putting a lot of objects. And we expect that large
         # groups of them are going to have the same lifetime.
@@ -285,8 +308,8 @@
         # searching, but resizing is also rather expensive
         # For now, we'll just use their algorithm, but we may want to revisit
         # it
-        if ((self.fill - self.used) * 5 > self.mask):
-            self._resize(self.used * 2)
+        if ((self._fill - self._used) * 5 > self._mask):
+            self._resize(self._used * 2)
         return 1
 
     def __delitem__(self, key):
@@ -295,7 +318,7 @@
         Raise a KeyError if the key was not present.
         """
         cdef int exists
-        exists = self.discard(key)
+        exists = self._discard(key)
         if not exists:
             raise KeyError('Key %s not present' % (key,))
 
@@ -308,14 +331,14 @@
 
     cdef Py_ssize_t pos
     cdef SimpleSet set
-    cdef Py_ssize_t used # track if things have been mutated while iterating
+    cdef Py_ssize_t _used # track if things have been mutated while iterating
     cdef Py_ssize_t len # number of entries left
 
     def __init__(self, obj):
         self.set = obj
         self.pos = 0
-        self.used = self.set.used
-        self.len = self.set.used
+        self._used = self.set._used
+        self.len = self.set._used
 
     def __iter__(self):
         return self
@@ -326,13 +349,13 @@
 
         if self.set is None:
             raise StopIteration
-        if self.set.used != self.used:
+        if self.set._used != self._used:
             # Force this exception to continue to be raised
-            self.used = -1
+            self._used = -1
             raise RuntimeError("Set size changed during iteration")
         i = self.pos
-        mask = self.set.mask
-        table = self.set.table
+        mask = self.set._mask
+        table = self.set._table
         assert i >= 0
         while i <= mask and (table[i] == NULL or table[i] == _dummy):
             i += 1
@@ -347,7 +370,7 @@
         return key
 
     def __length_hint__(self):
-        if self.set is not None and self.used == self.set.used:
+        if self.set is not None and self._used == self.set._used:
             return self.len
         return 0
     
@@ -358,7 +381,7 @@
     return SimpleSet()
 
 
-cdef inline int _check_self_not_none(object self) except -1:
+cdef SimpleSet _check_self(object self):
     """Check that the parameter is not None.
 
     Pyrex/Cython will do type checking, but only to ensure that an object is
@@ -366,12 +389,14 @@
     python functions, but not for C functions.
     So this is just a helper for all the apis that need to do the check.
     """
+    cdef SimpleSet true_self
     if self is None:
         raise TypeError('self must not be None')
-
-
-cdef inline PyObject **_lookup(SimpleSet self,
-                               object key) except NULL:
+    true_self = self
+    return true_self
+
+
+cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
     """Find the slot where 'key' would fit.
 
     This is the same as a dicts 'lookup' function.
@@ -390,8 +415,8 @@
     cdef PyObject **table, **cur, **free_slot, *py_key
 
     key_hash = hash(key)
-    mask = self.mask
-    table = self.table
+    mask = self._mask
+    table = self._table
     i = key_hash & mask
     cur = &table[i]
     py_key = <PyObject *>key
@@ -436,11 +461,10 @@
 
     :param key: An object we are looking up
     :param hash: The hash for key
-    :return: The location in self.table where key should be put
+    :return: The location in self._table where key should be put
         should never be NULL, but may reference a NULL (PyObject*)
     """
-    _check_self_not_none(self)
-    return _lookup(self, key)
+    return _lookup(_check_self(self), key)
 
 
 cdef api object SimpleSet_Add(object self, object key):
@@ -453,18 +477,12 @@
         This may be the same object, or it may be an equivalent object.
         (consider dict.setdefault(key, key))
     """
-    cdef SimpleSet true_self
-    _check_self_not_none(self)
-    true_self = self
-    return true_self.add(key)
+    return _check_self(self)._add(key)
     
 
-cdef api bint SimpleSet_Contains(object self, object key) except -1:
+cdef api int SimpleSet_Contains(object self, object key) except -1:
     """Is key present in self?"""
-    cdef SimpleSet true_self
-    _check_self_not_none(self)
-    true_self = self
-    return key in true_self
+    return (key in _check_self(self))
 
 
 cdef api int SimpleSet_Discard(object self, object key) except -1:
@@ -475,14 +493,10 @@
     :return: 1 if there was an object present, 0 if there was not, and -1 on
         error.
     """
-    cdef SimpleSet true_self
-    _check_self_not_none(self)
-    true_self = self
-    return true_self.discard(key)
-
-
-cdef api PyObject *SimpleSet_Get(SimpleSet self,
-                                           object key) except? NULL:
+    return _check_self(self)._discard(key)
+
+
+cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
     """Get a pointer to the object present at location 'key'.
 
     This returns an object which is equal to key which was previously added to
@@ -492,17 +506,12 @@
     :param key: The value we are looking for
     :return: The object present at that location
     """
-    cdef SimpleSet true_self
-    _check_self_not_none(self)
-    true_self = self
-    return true_self._get(key)
+    return _check_self(self)._get(key)
 
 
 cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
     """Get the number of active entries in 'self'"""
-    cdef SimpleSet true_self = self
-    _check_self_not_none(self)
-    return true_self.used
+    return _check_self(self)._used
 
 
 # TODO: this should probably have direct tests, since it isn't used by __iter__
@@ -516,14 +525,14 @@
     :return: 0 if nothing left, 1 if we are returning a new value
     """
     cdef Py_ssize_t i, mask
-    cdef SimpleSet true_self = self
+    cdef SimpleSet true_self
     cdef PyObject **table
-    _check_self_not_none(self)
+    true_self = _check_self(self)
     i = pos[0]
     if (i < 0):
         return 0
-    mask = true_self.mask
-    table= true_self.table
+    mask = true_self._mask
+    table= true_self._table
     while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
         i += 1
     pos[0] = i + 1

=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c	2009-10-07 16:12:50 +0000
+++ b/bzrlib/_static_tuple_c.c	2009-10-07 19:31:39 +0000
@@ -76,12 +76,7 @@
 {
     PyObject *unique_key = NULL;
 
-    if (_interned_tuples == NULL) {
-        Py_INCREF(self);
-        return self;
-    }
-    if (_StaticTuple_is_interned(self)) {
-        // Already interned
+    if (_interned_tuples == NULL || _StaticTuple_is_interned(self)) {
         Py_INCREF(self);
         return self;
     }
@@ -92,6 +87,7 @@
     if (!unique_key) {
         // Suppress any error and just return the object
         PyErr_Clear();
+        Py_INCREF(self);
         return self;
     }
     if (unique_key != (PyObject *)self) {
@@ -119,10 +115,11 @@
     int i, len;
 
     if (_StaticTuple_is_interned(self)) {
-        /* revive dead object temporarily for DelItem */
-        // Py_REFCNT(self) = 2;
+        /* revive dead object temporarily for Discard */
+        Py_REFCNT(self) = 2;
         if (SimpleSet_Discard(_interned_tuples, (PyObject*)self) != 1)
             Py_FatalError("deletion of interned StaticTuple failed");
+        self->flags &= ~STATIC_TUPLE_INTERNED_FLAG;
     }
     len = self->size;
     for (i = 0; i < len; ++i) {
@@ -664,7 +661,10 @@
 
     Py_INCREF(&StaticTuple_Type);
     PyModule_AddObject(m, "StaticTuple", (PyObject *)&StaticTuple_Type);
-    import_bzrlib___simple_set_pyx();
+    if (import_bzrlib___simple_set_pyx() == -1) {
+        // We failed to set up, stop early
+        return;
+    }
     setup_interned_tuples(m);
     setup_empty_tuple(m);
     setup_c_api(m);

=== modified file 'bzrlib/tests/test__simple_set.py'
--- a/bzrlib/tests/test__simple_set.py	2009-10-07 15:48:32 +0000
+++ b/bzrlib/tests/test__simple_set.py	2009-10-07 19:31:39 +0000
@@ -243,13 +243,13 @@
         obj.add(k3)
         del obj[k2]
         self.assertFillState(2, 3, 0x3ff, obj)
-        self.assertEqual(1024, obj._resize(500))
+        self.assertEqual(1024, obj._py_resize(500))
         # Doesn't change the size, but does change the content
         self.assertFillState(2, 2, 0x3ff, obj)
         obj.add(k2)
         del obj[k3]
         self.assertFillState(2, 3, 0x3ff, obj)
-        self.assertEqual(4096, obj._resize(4095))
+        self.assertEqual(4096, obj._py_resize(4095))
         self.assertFillState(2, 2, 0xfff, obj)
         self.assertIn(k1, obj)
         self.assertIn(k2, obj)
@@ -259,7 +259,7 @@
         del obj[k2]
         self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
         self.assertFillState(1, 2, 0xfff, obj)
-        self.assertEqual(2048, obj._resize(1024))
+        self.assertEqual(2048, obj._py_resize(1024))
         self.assertFillState(1, 1, 0x7ff, obj)
         self.assertEqual((591, '<null>'), obj._test_lookup(k2))
 

=== modified file 'bzrlib/tests/test__static_tuple.py'
--- a/bzrlib/tests/test__static_tuple.py	2009-10-07 16:12:50 +0000
+++ b/bzrlib/tests/test__static_tuple.py	2009-10-07 19:31:39 +0000
@@ -78,6 +78,20 @@
 
 class TestStaticTuple(tests.TestCase):
 
+    def assertRefcount(self, count, obj):
+        """Assert that the refcount for obj is what we expect.
+
+        Note that this automatically adjusts for the fact that calling
+        assertRefcount actually creates a new pointer, as does calling
+        sys.getrefcount. So pass the expected value *before* the call.
+        """
+        # I don't understand why it is getrefcount()-3 here, but it seems to be
+        # correct. If I check in the calling function, with:
+        # self.assertEqual(count, sys.getrefcount(obj)-1)
+        # Then it works fine. Something about passing it to assertRefcount is
+        # actually double-incrementing (and decrementing) the refcount
+        self.assertEqual(count, sys.getrefcount(obj)-3)
+
     def test_create(self):
         k = self.module.StaticTuple('foo')
         k = self.module.StaticTuple('foo', 'bar')
@@ -129,19 +143,19 @@
 
     def test_refcount(self):
         f = 'fo' + 'oo'
-        num_refs = sys.getrefcount(f)
+        num_refs = sys.getrefcount(f) - 1 #sys.getrefcount() adds one
         k = self.module.StaticTuple(f)
-        self.assertEqual(num_refs + 1, sys.getrefcount(f))
-        b = k[0]
-        self.assertEqual(num_refs + 2, sys.getrefcount(f))
-        b = k[0]
-        self.assertEqual(num_refs + 2, sys.getrefcount(f))
+        self.assertRefcount(num_refs + 1, f)
+        b = k[0]
+        self.assertRefcount(num_refs + 2, f)
+        b = k[0]
+        self.assertRefcount(num_refs + 2, f)
         c = k[0]
-        self.assertEqual(num_refs + 3, sys.getrefcount(f))
+        self.assertRefcount(num_refs + 3, f)
         del b, c
-        self.assertEqual(num_refs + 1, sys.getrefcount(f))
+        self.assertRefcount(num_refs + 1, f)
         del k
-        self.assertEqual(num_refs, sys.getrefcount(f))
+        self.assertRefcount(num_refs, f)
 
     def test__repr__(self):
         k = self.module.StaticTuple('foo', 'bar', 'baz', 'bing')
@@ -313,30 +327,38 @@
     def test__c_intern_handles_refcount(self):
         if self.module is _static_tuple_py:
             return # Not applicable
+        print
         unique_str1 = 'unique str ' + osutils.rand_chars(20)
         unique_str2 = 'unique str ' + osutils.rand_chars(20)
         key = self.module.StaticTuple(unique_str1, unique_str2)
+        self.assertRefcount(1, key)
         self.assertFalse(key in self.module._interned_tuples)
         self.assertFalse(key._is_interned())
         key2 = self.module.StaticTuple(unique_str1, unique_str2)
+        self.assertRefcount(1, key)
+        self.assertRefcount(1, key2)
         self.assertEqual(key, key2)
         self.assertIsNot(key, key2)
-        refcount = sys.getrefcount(key)
-        self.assertEqual(2, refcount)
 
         key3 = key.intern()
         self.assertIs(key, key3)
         self.assertTrue(key in self.module._interned_tuples)
         self.assertEqual(key, self.module._interned_tuples[key])
-        self.assertEqual(3, sys.getrefcount(key))
+        # key and key3, but we 'hide' the one in _interned_tuples
+        self.assertRefcount(2, key)
         del key3
-        # We should not increase the refcount just via 'intern'
-        self.assertEqual(2, sys.getrefcount(key))
+        self.assertRefcount(1, key)
         self.assertTrue(key._is_interned())
-        key2 = key2.intern()
-        # We have one more ref in 'key2' but otherwise no extra refs
-        self.assertEqual(3, sys.getrefcount(key))
-        self.assertIs(key, key2)
+        self.assertRefcount(1, key2)
+        key3 = key2.intern()
+        # key3 now points to key as well, and *not* to key2
+        self.assertRefcount(2, key)
+        self.assertRefcount(1, key2)
+        self.assertIs(key, key3)
+        self.assertIsNot(key3, key2)
+        del key2
+        del key3
+        self.assertRefcount(1, key)
 
     def test__c_keys_are_not_immortal(self):
         if self.module is _static_tuple_py:
@@ -345,15 +367,15 @@
         unique_str2 = 'unique str ' + osutils.rand_chars(20)
         key = self.module.StaticTuple(unique_str1, unique_str2)
         self.assertFalse(key in self.module._interned_tuples)
-        self.assertEqual(2, sys.getrefcount(key))
+        self.assertRefcount(1, key)
         key = key.intern()
-        self.assertEqual(2, sys.getrefcount(key))
+        self.assertRefcount(1, key)
         self.assertTrue(key in self.module._interned_tuples)
         self.assertTrue(key._is_interned())
         del key
         # Create a new entry, which would point to the same location
         key = self.module.StaticTuple(unique_str1, unique_str2)
-        self.assertEqual(2, sys.getrefcount(key))
+        self.assertRefcount(1, key)
         # This old entry in _interned_tuples should be gone
         self.assertFalse(key in self.module._interned_tuples)
         self.assertFalse(key._is_interned())



More information about the bazaar-commits mailing list