Rev 4739: Start working on more of the C api for StaticTupleInterner. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Fri Oct 2 16:52:00 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4739
revision-id: john at arbash-meinel.com-20091002155154-ouf4jb02y0wxfklv
parent: john at arbash-meinel.com-20091002155103-6wgtx9aosbx99m77
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Fri 2009-10-02 10:51:54 -0500
message:
Start working on more of the C api for StaticTupleInterner.
It is really nice to have the except -1 clauses, as suddenly you don't have
to manually check the return values of all of your functions. \o/
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore 2009-10-02 05:41:24 +0000
+++ b/.bzrignore 2009-10-02 15:51:54 +0000
@@ -52,6 +52,8 @@
bzrlib/_readdir_pyx.c
bzrlib/_rio_pyx.c
bzrlib/_static_tuple_interned_pyx.c
+bzrlib/_static_tuple_interned_pyx.h
+bzrlib/_static_tuple_interned_pyx_api.h
bzrlib/_walkdirs_win32.c
doc/en/release-notes/NEWS.txt
doc/en/developer-guide/HACKING.txt
=== modified file 'bzrlib/_static_tuple_interned_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx 2009-10-02 05:41:24 +0000
+++ b/bzrlib/_static_tuple_interned_pyx.pyx 2009-10-02 15:51:54 +0000
@@ -56,6 +56,10 @@
if res == Py_True:
Py_DECREF(res)
return 1
+ # Only handled for now because we are testing with stuff like tuples versus
+ # StaticTuple objects. If we decide to limit StaticTupleInterner to
+ # strictly only allowing StaticTuple objects, then this is no longer
+ # required, and Py_NotImplemented => not equal
if res == Py_NotImplemented:
Py_DECREF(res)
res = other.ob_type.tp_richcompare(other, this, Py_EQ)
@@ -66,7 +70,8 @@
return 0
-cdef class StaticTupleInterner:
+cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
+ type StaticTupleInterner_type]:
"""This class tracks the canonical forms for StaticTuples.
It is similar in function to the interned dictionary that is used by
@@ -111,60 +116,10 @@
def __len__(self):
return self.used
- cdef PyObject **_lookup(self, key, long hash) except NULL:
- """Find the slot where 'key' would fit.
-
- This is the same as a dicts 'lookup' function
-
- :param key: An object we are looking up
- :param hash: The hash for key
- :return: The location in self.table where key should be put
- should never be NULL, but may reference a NULL (PyObject*)
- """
- cdef size_t i, perturb
- cdef Py_ssize_t mask
- cdef long this_hash
- cdef PyObject **table, **cur, **free_slot, *py_key
-
- mask = self.mask
- table = self.table
- i = hash & mask
- cur = &table[i]
- py_key = <PyObject *>key
- if cur[0] == NULL:
- # Found a blank spot, or found the exact key
- return cur
- if cur[0] == py_key:
- return cur
- if cur[0] == _dummy:
- free_slot = cur
- else:
- if _is_equal(py_key, hash, cur[0]):
- # Both py_key and cur[0] belong in this slot, return it
- return cur
- free_slot = NULL
- # size_t is unsigned, hash is signed...
- perturb = hash
- while True:
- i = (i << 2) + i + perturb + 1
- cur = &table[i & mask]
- if cur[0] == NULL: # Found an empty spot
- if free_slot: # Did we find a _dummy earlier?
- return free_slot
- else:
- return cur
- if (cur[0] == py_key # exact match
- or _is_equal(py_key, hash, cur[0])): # Equivalent match
- return cur
- if (cur[0] == _dummy and free_slot == NULL):
- free_slot = cur
- perturb >>= PERTURB_SHIFT
- raise AssertionError('should never get here')
-
def _test_lookup(self, key):
cdef PyObject **slot
- slot = self._lookup(key, hash(key))
+ slot = _StaticTupleInterner_Lookup(self, key, hash(key))
if slot[0] == NULL:
res = '<null>'
elif slot[0] == _dummy:
@@ -176,37 +131,147 @@
def __contains__(self, key):
cdef PyObject **slot
- slot = self._lookup(key, hash(key))
+ slot = _StaticTupleInterner_Lookup(self, key, hash(key))
if slot[0] == NULL or slot[0] == _dummy:
return False
return True
def __getitem__(self, key):
+ """Return a stored item that is equivalent to key."""
cdef PyObject **slot
- slot = self._lookup(key, hash(key))
+ slot = _StaticTupleInterner_Lookup(self, key, hash(key))
if slot[0] == NULL or slot[0] == _dummy:
raise KeyError("Key %s is not present" % key)
val = <object>(slot[0])
return val
- def __setitem__(self, key, value):
- cdef PyObject **slot, *py_key
- assert key == value
-
- slot = self._lookup(key, hash(key))
- if (slot[0] == NULL or slot[0] == _dummy):
- py_key = <PyObject *>key
- Py_INCREF(py_key)
- slot[0] = py_key
+ # def __setitem__(self, key, value):
+ # assert key == value
+ # self._add(key)
+
+ def add(self, key):
+ """Similar to set.add(), start tracking this key.
+
+ There is one small difference, which is that we return the object that
+ is stored at the given location. (which is closer to the
+ dict.setdefault() functionality.)
+ """
+ return StaticTupleInterner_Add(self, key)
+
+ def discard(self, key):
+ """Remove key from the dict, whether it exists or not.
+
+ :return: 0 if the item did not exist, 1 if it did
+ """
+ return StaticTupleInterner_Discard(self, key)
+
def __delitem__(self, key):
- cdef PyObject **slot, *py_key
-
- slot = self._lookup(key, hash(key))
- if (slot[0] == NULL or slot[0] == _dummy):
- pass # Raise KeyError
- return
- # Found it
- # TODO: Check refcounts
- Py_DECREF(slot[0])
- slot[0] = _dummy
+ """Remove the given item from the dict.
+
+ Raise a KeyError if the key was not present.
+ """
+ cdef int exists
+ exists = StaticTupleInterner_Discard(self, key)
+ if not exists:
+ raise KeyError('Key %s not present' % (key,))
+
+
+cdef api inline PyObject **_StaticTupleInterner_Lookup(
+ StaticTupleInterner self, key, long hash) except NULL:
+ """Find the slot where 'key' would fit.
+
+ This is the same as a dicts 'lookup' function.
+
+ :param key: An object we are looking up
+ :param hash: The hash for key
+ :return: The location in self.table where key should be put
+ should never be NULL, but may reference a NULL (PyObject*)
+ """
+ cdef size_t i, perturb
+ cdef Py_ssize_t mask
+ cdef long this_hash
+ cdef PyObject **table, **cur, **free_slot, *py_key
+
+ mask = self.mask
+ table = self.table
+ i = hash & mask
+ cur = &table[i]
+ py_key = <PyObject *>key
+ if cur[0] == NULL:
+ # Found a blank spot, or found the exact key
+ return cur
+ if cur[0] == py_key:
+ return cur
+ if cur[0] == _dummy:
+ free_slot = cur
+ else:
+ if _is_equal(py_key, hash, cur[0]):
+ # Both py_key and cur[0] belong in this slot, return it
+ return cur
+ free_slot = NULL
+ # size_t is unsigned, hash is signed...
+ perturb = hash
+ while True:
+ i = (i << 2) + i + perturb + 1
+ cur = &table[i & mask]
+ if cur[0] == NULL: # Found an empty spot
+ if free_slot: # Did we find a _dummy earlier?
+ return free_slot
+ else:
+ return cur
+ if (cur[0] == py_key # exact match
+ or _is_equal(py_key, hash, cur[0])): # Equivalent match
+ return cur
+ if (cur[0] == _dummy and free_slot == NULL):
+ free_slot = cur
+ perturb >>= PERTURB_SHIFT
+ raise AssertionError('should never get here')
+
+
+cdef api object StaticTupleInterner_Add(StaticTupleInterner self, object key):
+ """Add a key to the StaticTupleInterner (set).
+
+ :param self: The StaticTupleInterner to add the key to.
+ :param key: The key to be added. If the key is already present,
+ self will not be modified
+ :return: The current key stored at the location defined by 'key'.
+ This may be the same object, or it may be an equivalent object.
+ (consider dict.setdefault(key, key))
+ """
+ cdef PyObject **slot, *py_key
+
+ slot = _StaticTupleInterner_Lookup(self, key, hash(key))
+ py_key = <PyObject *>key
+ if (slot[0] == NULL):
+ Py_INCREF(py_key)
+ self.fill += 1
+ self.used += 1
+ slot[0] = py_key
+ elif (slot[0] == _dummy):
+ Py_INCREF(py_key)
+ self.used += 1
+ slot[0] = py_key
+ # No else: clause. If _StaticTupleInterner_Lookup returns a pointer to
+ # a live object, then we already have a value at this location.
+ return <object>(slot[0])
+
+
+cdef api int StaticTupleInterner_Discard(StaticTupleInterner self,
+ object key) except -1:
+ """Remove the object referenced at location 'key'.
+
+ :param self: The StaticTupleInterner being modified
+ :param key: The key we are checking on
+ :return: 1 if there was an object present, 0 if there was not, and -1 on
+ error.
+ """
+ cdef PyObject **slot, *py_key
+
+ slot = _StaticTupleInterner_Lookup(self, key, hash(key))
+ if slot[0] == NULL or slot[0] == _dummy:
+ return 0
+ self.used -= 1
+ Py_DECREF(slot[0])
+ slot[0] = _dummy
+ return 1
=== modified file 'bzrlib/tests/test__static_tuple_interned.py'
--- a/bzrlib/tests/test__static_tuple_interned.py 2009-10-02 05:41:24 +0000
+++ b/bzrlib/tests/test__static_tuple_interned.py 2009-10-02 15:51:54 +0000
@@ -16,6 +16,8 @@
"""Tests for the StaticTupleInterned type."""
+import sys
+
from bzrlib import (
# _static_tuple_py,
errors,
@@ -65,13 +67,28 @@
self.assertTrue(obj not in container,
'We found %s in %s' % (obj, container))
+ def assertFillState(self, used, fill, mask, obj):
+ self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
+
+ def assertRefcount(self, count, obj):
+ """Assert that the refcount for obj is what we expect.
+
+ Note that this automatically adjusts for the fact that calling
+ assertRefcount actually creates a new pointer, as does calling
+ sys.getrefcount. So pass the expected value *before* the call.
+ """
+ # I don't understand why it is count+3 here, but it seems to be
+ # correct. If I check in the calling function, with:
+ # self.assertEqual(count+1, sys.getrefcount(obj))
+ # Then it works fine. Something about passing it to assertRefcount is
+ # actually double-incrementing (and decrementing) the refcount
+ self.assertEqual(count+3, sys.getrefcount(obj))
+
def test_initial(self):
obj = _module.StaticTupleInterner()
self.assertEqual(0, len(obj))
st = StaticTuple('foo', 'bar')
- self.assertEqual(0, obj.used)
- self.assertEqual(0, obj.fill)
- self.assertEqual(0x3ff, obj.mask) # 1024 - 1
+ self.assertFillState(0, 0, 0x3ff, obj)
def test__lookup(self):
# The tuple hash function is rather good at entropy. For all integers
@@ -103,7 +120,7 @@
self.assertEqual((643, '<null>'), obj._test_lookup(k2))
self.assertEqual((643, '<null>'), obj._test_lookup(k3))
self.assertEqual((643, '<null>'), obj._test_lookup(k4))
- obj[k1] = k1
+ obj.add(k1)
self.assertIn(k1, obj)
self.assertNotIn(k2, obj)
self.assertNotIn(k3, obj)
@@ -113,7 +130,7 @@
self.assertEqual((787, '<null>'), obj._test_lookup(k3))
self.assertEqual((787, '<null>'), obj._test_lookup(k4))
self.assertIs(k1, obj[k1])
- obj[k2] = k2
+ obj.add(k2)
self.assertIs(k2, obj[k2])
self.assertEqual((643, k1), obj._test_lookup(k1))
self.assertEqual((787, k2), obj._test_lookup(k2))
@@ -126,7 +143,7 @@
self.assertEqual((787, k2), obj._test_lookup(('f', '4')))
self.assertEqual((660, '<null>'), obj._test_lookup(('p', 'r')))
self.assertEqual((180, '<null>'), obj._test_lookup(('q', '1')))
- obj[k3] = k3
+ obj.add(k3)
self.assertIs(k3, obj[k3])
self.assertIn(k1, obj)
self.assertIn(k2, obj)
@@ -142,3 +159,77 @@
self.assertIn(k2, obj)
self.assertIn(k3, obj)
self.assertNotIn(k4, obj)
+
+ def test_add(self):
+ obj = _module.StaticTupleInterner()
+ self.assertFillState(0, 0, 0x3ff, obj)
+ k1 = StaticTuple('foo')
+ self.assertRefcount(1, k1)
+ self.assertIs(k1, obj.add(k1))
+ self.assertFillState(1, 1, 0x3ff, obj)
+ self.assertRefcount(2, k1)
+ ktest = obj[k1]
+ self.assertRefcount(3, k1)
+ self.assertIs(k1, ktest)
+ del ktest
+ self.assertRefcount(2, k1)
+ k2 = StaticTuple('foo')
+ self.assertRefcount(1, k2)
+ self.assertIsNot(k1, k2)
+ # doesn't add anything, so the counters shouldn't be adjusted
+ self.assertIs(k1, obj.add(k2))
+ self.assertFillState(1, 1, 0x3ff, obj)
+ self.assertRefcount(2, k1) # not changed
+ self.assertRefcount(1, k2) # not incremented
+ self.assertIs(k1, obj[k1])
+ self.assertIs(k1, obj[k2])
+ self.assertRefcount(2, k1)
+ self.assertRefcount(1, k2)
+ # Deleting an entry should remove the fill, but not the used
+ del obj[k1]
+ self.assertFillState(0, 1, 0x3ff, obj)
+ self.assertRefcount(1, k1)
+ k3 = StaticTuple('bar')
+ self.assertRefcount(1, k3)
+ self.assertIs(k3, obj.add(k3))
+ self.assertFillState(1, 2, 0x3ff, obj)
+ self.assertRefcount(2, k3)
+ self.assertIs(k2, obj.add(k2))
+ self.assertFillState(2, 2, 0x3ff, obj)
+ self.assertRefcount(1, k1)
+ self.assertRefcount(2, k2)
+ self.assertRefcount(2, k3)
+
+ def test_discard(self):
+ obj = _module.StaticTupleInterner()
+ k1 = StaticTuple('foo')
+ k2 = StaticTuple('foo')
+ k3 = StaticTuple('bar')
+ self.assertRefcount(1, k1)
+ self.assertRefcount(1, k2)
+ self.assertRefcount(1, k3)
+ obj.add(k1)
+ self.assertRefcount(2, k1)
+ self.assertEqual(0, obj.discard(k3))
+ self.assertRefcount(1, k3)
+ obj.add(k3)
+ self.assertRefcount(2, k3)
+ self.assertEqual(1, obj.discard(k3))
+ self.assertRefcount(1, k3)
+
+ def test__delitem__(self):
+ obj = _module.StaticTupleInterner()
+ k1 = StaticTuple('foo')
+ k2 = StaticTuple('foo')
+ k3 = StaticTuple('bar')
+ self.assertRefcount(1, k1)
+ self.assertRefcount(1, k2)
+ self.assertRefcount(1, k3)
+ obj.add(k1)
+ self.assertRefcount(2, k1)
+ self.assertRaises(KeyError, obj.__delitem__, k3)
+ self.assertRefcount(1, k3)
+ obj.add(k3)
+ self.assertRefcount(2, k3)
+ del obj[k3]
+ self.assertRefcount(1, k3)
More information about the bazaar-commits
mailing list