Rev 4755: Rename StaticTupleInterner => SimpleSet. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 7 16:48:47 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4755
revision-id: john at arbash-meinel.com-20091007154832-lpipxg46lynh9wmr
parent: john at arbash-meinel.com-20091007152951-znm1sri21leolq61
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 10:48:32 -0500
message:
Rename StaticTupleInterner => SimpleSet.
This is a bit more appropriate, because the internal data type is not
specialized into StaticTuple objects only. Partially because I didn't
see a specific memory/speed tradeoff to caching the hash, and
that accessing said hash was siginficantly faster than just
calling PyObject_Hash().
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore 2009-10-07 15:27:24 +0000
+++ b/.bzrignore 2009-10-07 15:48:32 +0000
@@ -58,9 +58,9 @@
bzrlib/_known_graph_pyx.c
bzrlib/_readdir_pyx.c
bzrlib/_rio_pyx.c
-bzrlib/_static_tuple_interned_pyx.c
-bzrlib/_static_tuple_interned_pyx.h
-bzrlib/_static_tuple_interned_pyx_api.h
+bzrlib/_simple_set_pyx.c
+bzrlib/_simple_set_pyx.h
+bzrlib/_simple_set_pyx_api.h
bzrlib/_walkdirs_win32.c
# built extension modules
bzrlib/_*.dll
=== renamed file 'bzrlib/_static_tuple_interned_pyx.pxd' => 'bzrlib/_simple_set_pyx.pxd'
--- a/bzrlib/_static_tuple_interned_pyx.pxd 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_simple_set_pyx.pxd 2009-10-07 15:48:32 +0000
@@ -14,7 +14,14 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-"""Interface definition of a class to intern StaticTuple objects."""
+"""Interface definition of a class like PySet but without caching the hash.
+
+This is generally useful when you want to 'intern' objects, etc. Note that this
+differs from Set in that we:
+ 1) Don't have all of the .intersection, .difference, etc functions
+ 2) Do return the object from the set via queries
+ eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
+"""
cdef extern from "python-compat.h":
ctypedef long Py_ssize_t
@@ -23,8 +30,18 @@
ctypedef struct PyObject:
pass
-cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
- type StaticTupleInterner_type]:
+cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
+ """A class similar to PySet, but with simpler implementation.
+
+ The main advantage is that this class uses only 2N memory to store N
+ objects rather than 4N memory. The main trade-off is that we do not cache
+ the hash value of saved objects. As such, it is assumed that computing the
+ hash will be cheap (such as strings or tuples of strings, etc.)
+
+ This also differs in that you can get back the objects that are stored
+ (like a dict), but we also don't implement the complete list of 'set'
+ operations (difference, intersection, etc).
+ """
cdef readonly Py_ssize_t used # active
cdef readonly Py_ssize_t fill # active + dummy
@@ -41,4 +58,4 @@
# TODO: might want to export the C api here, though it is all available from
# the class object...
-cdef api object StaticTupleInterner_Add(object self, object key)
+cdef api object SimpleSet_Add(object self, object key)
=== renamed file 'bzrlib/_static_tuple_interned_pyx.pyx' => 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_simple_set_pyx.pyx 2009-10-07 15:48:32 +0000
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-"""Definition of a class that is used to intern StaticTuple objects."""
+"""Definition of a class that is similar to Set with some small changes."""
cdef extern from "Python.h":
ctypedef unsigned long size_t
@@ -54,10 +54,6 @@
if res == Py_True:
Py_DECREF(res)
return 1
- # Only handled for now because we are testing with stuff like tuples versus
- # StaticTuple objects. If we decide to limit StaticTupleInterner to
- # strictly only allowing StaticTuple objects, then this is no longer
- # required, and Py_NotImplemented => not equal
if res == Py_NotImplemented:
Py_DECREF(res)
res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
@@ -68,9 +64,8 @@
return 0
-cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
- type StaticTupleInterner_type]:
- """This class tracks the canonical forms for StaticTuples.
+cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
+ """This class can be used to track canonical forms for objects.
It is similar in function to the interned dictionary that is used by
strings. However:
@@ -87,10 +82,6 @@
DEF DEFAULT_SIZE=1024
DEF PERTURB_SHIFT=5
- # Note that most of the members on this class are just thunks over to the C
- # api. However, this provides a nice Python/Pyrex api, as well as making it
- # easy to test the C api from pure python.
-
def __init__(self):
cdef Py_ssize_t size, n_bytes
@@ -125,7 +116,7 @@
return <int>(slot - self.table), res
def __contains__(self, key):
- """Is key present in this StaticTupleInterner."""
+ """Is key present in this SimpleSet."""
cdef PyObject **slot
slot = _lookup(self, key)
@@ -152,10 +143,6 @@
val = <object>(py_val)
return val
- # def __setitem__(self, key, value):
- # assert key == value
- # self._add(key)
-
cdef int _insert_clean(self, PyObject *key) except -1:
"""Insert a key into self.table.
@@ -208,7 +195,7 @@
# removed
# TODO: Test this
# if new_size < self.used:
- # raise RuntimeError('cannot shrink StaticTupleInterner to something'
+ # raise RuntimeError('cannot shrink SimpleSet to something'
# ' smaller than the number of used slots.')
n_bytes = sizeof(PyObject*) * new_size;
new_table = <PyObject **>PyMem_Malloc(n_bytes)
@@ -313,22 +300,22 @@
raise KeyError('Key %s not present' % (key,))
def __iter__(self):
- return _StaticTupleInterner_iterator(self)
-
-
-cdef class _StaticTupleInterner_iterator:
- """Iterator over the StaticTupleInterner structure."""
+ return _SimpleSet_iterator(self)
+
+
+cdef class _SimpleSet_iterator:
+ """Iterator over the SimpleSet structure."""
cdef Py_ssize_t pos
- cdef StaticTupleInterner table
+ cdef SimpleSet set
cdef Py_ssize_t used # track if things have been mutated while iterating
cdef Py_ssize_t len # number of entries left
def __init__(self, obj):
- self.table = obj
+ self.set = obj
self.pos = 0
- self.used = self.table.used
- self.len = self.table.used
+ self.used = self.set.used
+ self.len = self.set.used
def __iter__(self):
return self
@@ -337,22 +324,22 @@
cdef Py_ssize_t mask, i
cdef PyObject **table
- if self.table is None:
+ if self.set is None:
raise StopIteration
- if self.table.used != self.used:
+ if self.set.used != self.used:
# Force this exception to continue to be raised
self.used = -1
raise RuntimeError("Set size changed during iteration")
i = self.pos
- mask = self.table.mask
- table = self.table.table
+ mask = self.set.mask
+ table = self.set.table
assert i >= 0
while i <= mask and (table[i] == NULL or table[i] == _dummy):
i += 1
self.pos = i + 1
if i > mask:
# we walked to the end
- self.table = None
+ self.set = None
raise StopIteration
# We must have found one
key = <object>(table[i])
@@ -360,15 +347,15 @@
return key
def __length_hint__(self):
- if self.table is not None and self.used == self.table.used:
+ if self.set is not None and self.used == self.set.used:
return self.len
return 0
-cdef api StaticTupleInterner StaticTupleInterner_New():
- """Create a new StaticTupleInterner object."""
- return StaticTupleInterner()
+cdef api SimpleSet SimpleSet_New():
+ """Create a new SimpleSet object."""
+ return SimpleSet()
cdef inline int _check_self_not_none(object self) except -1:
@@ -383,7 +370,7 @@
raise TypeError('self must not be None')
-cdef inline PyObject **_lookup(StaticTupleInterner self,
+cdef inline PyObject **_lookup(SimpleSet self,
object key) except NULL:
"""Find the slot where 'key' would fit.
@@ -439,7 +426,7 @@
raise AssertionError('should never get here')
-cdef api PyObject **_StaticTupleInterner_Lookup(object self,
+cdef api PyObject **_SimpleSet_Lookup(object self,
object key) except NULL:
"""Find the slot where 'key' would fit.
@@ -456,45 +443,45 @@
return _lookup(self, key)
-cdef api object StaticTupleInterner_Add(object self, object key):
- """Add a key to the StaticTupleInterner (set).
+cdef api object SimpleSet_Add(object self, object key):
+ """Add a key to the SimpleSet (set).
- :param self: The StaticTupleInterner to add the key to.
+ :param self: The SimpleSet to add the key to.
:param key: The key to be added. If the key is already present,
self will not be modified
:return: The current key stored at the location defined by 'key'.
This may be the same object, or it may be an equivalent object.
(consider dict.setdefault(key, key))
"""
- cdef StaticTupleInterner true_self
+ cdef SimpleSet true_self
_check_self_not_none(self)
true_self = self
return true_self.add(key)
-cdef api bint StaticTupleInterner_Contains(object self, object key) except -1:
+cdef api bint SimpleSet_Contains(object self, object key) except -1:
"""Is key present in self?"""
- cdef StaticTupleInterner true_self
+ cdef SimpleSet true_self
_check_self_not_none(self)
true_self = self
return key in true_self
-cdef api int StaticTupleInterner_Discard(object self, object key) except -1:
+cdef api int SimpleSet_Discard(object self, object key) except -1:
"""Remove the object referenced at location 'key'.
- :param self: The StaticTupleInterner being modified
+ :param self: The SimpleSet being modified
:param key: The key we are checking on
:return: 1 if there was an object present, 0 if there was not, and -1 on
error.
"""
- cdef StaticTupleInterner true_self
+ cdef SimpleSet true_self
_check_self_not_none(self)
true_self = self
return true_self.discard(key)
-cdef api PyObject *StaticTupleInterner_Get(StaticTupleInterner self,
+cdef api PyObject *SimpleSet_Get(SimpleSet self,
object key) except? NULL:
"""Get a pointer to the object present at location 'key'.
@@ -505,23 +492,23 @@
:param key: The value we are looking for
:return: The object present at that location
"""
- cdef StaticTupleInterner true_self
+ cdef SimpleSet true_self
_check_self_not_none(self)
true_self = self
return true_self._get(key)
-cdef api Py_ssize_t StaticTupleInterner_Size(object self) except -1:
+cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
"""Get the number of active entries in 'self'"""
- cdef StaticTupleInterner true_self = self
+ cdef SimpleSet true_self = self
_check_self_not_none(self)
return true_self.used
# TODO: this should probably have direct tests, since it isn't used by __iter__
-cdef api int StaticTupleInterner_Next(object self, Py_ssize_t *pos,
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
PyObject **key):
- """Walk over items in a StaticTupleInterner.
+ """Walk over items in a SimpleSet.
:param pos: should be initialized to 0 by the caller, and will be updated
by this function
@@ -529,7 +516,7 @@
:return: 0 if nothing left, 1 if we are returning a new value
"""
cdef Py_ssize_t i, mask
- cdef StaticTupleInterner true_self = self
+ cdef SimpleSet true_self = self
cdef PyObject **table
_check_self_not_none(self)
i = pos[0]
=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_static_tuple_c.c 2009-10-07 15:48:32 +0000
@@ -22,7 +22,7 @@
#include "_static_tuple_c.h"
#include "_export_c_api.h"
-#include "_static_tuple_interned_pyx_api.h"
+#include "_simple_set_pyx_api.h"
#include "python-compat.h"
@@ -85,10 +85,10 @@
Py_INCREF(self);
return self;
}
- /* StaticTupleInterner_Add returns whatever object is present at self
+ /* SimpleSet_Add returns whatever object is present at self
* or the new object if it needs to add it.
*/
- unique_key = StaticTupleInterner_Add(_interned_tuples, (PyObject *)self);
+ unique_key = SimpleSet_Add(_interned_tuples, (PyObject *)self);
if (!unique_key) {
// Suppress any error and just return the object
PyErr_Clear();
@@ -121,7 +121,7 @@
if (_StaticTuple_is_interned(self)) {
/* revive dead object temporarily for DelItem */
// Py_REFCNT(self) = 2;
- if (StaticTupleInterner_Discard(_interned_tuples, (PyObject*)self) != 1)
+ if (SimpleSet_Discard(_interned_tuples, (PyObject*)self) != 1)
Py_FatalError("deletion of interned StaticTuple failed");
}
len = self->size;
@@ -591,11 +591,11 @@
*/
(traverseproc)StaticTuple_traverse, /* tp_traverse */
0, /* tp_clear */
- // TODO: implement richcompare, we should probably be able to compare vs an
- // tuple, as well as versus another StaticTuples object.
StaticTuple_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
- // We could implement this as returning tuples of keys...
+ // without implementing tp_iter, Python will fall back to PySequence*
+ // which seems to work ok, we may need something faster/lighter in the
+ // future.
0, /* tp_iter */
0, /* tp_iternext */
StaticTuple_methods, /* tp_methods */
@@ -612,75 +612,7 @@
};
-static char KeyIntern_doc[] = "";
-
-static PyMethodDef KeyIntern_methods[] = {
- // {"as_tuple", (PyCFunction)Keys_as_tuple, METH_NOARGS, Keys_as_tuple_doc},
- {NULL, NULL} /* sentinel */
-};
-
-// static PySequenceMethods KeyIntern_as_sequence = {
-// 0, //(lenfunc)Keys_length, /* sq_length */
-// 0, /* sq_concat */
-// 0, /* sq_repeat */
-// 0, //(ssizeargfunc)Keys_item, /* sq_item */
-// 0, /* sq_slice */
-// 0, /* sq_ass_item */
-// 0, /* sq_ass_slice */
-// 0, /* sq_contains */
-// };
-
-// static PyTypeObject KeyIntern_Type = {
-// PyObject_HEAD_INIT(NULL)
-// 0, /* ob_size */
-// "KeyIntern", /* tp_name */
-// sizeof(KeyIntern) - sizeof(Key *), /* tp_basicsize */
-// sizeof(Key *), /* tp_itemsize */
-// 0, //(destructor)Keys_dealloc, /* tp_dealloc */
-// 0, /* tp_print */
-// 0, /* tp_getattr */
-// 0, /* tp_setattr */
-// 0, /* tp_compare */
-// // TODO: implement repr() and possibly str()
-// 0, //(reprfunc)Keys_repr, /* tp_repr */
-// 0, /* tp_as_number */
-// &KeyIntern_as_sequence, /* tp_as_sequence */
-// 0, /* tp_as_mapping */
-// 0, //(hashfunc)Keys_hash, /* tp_hash */
-// 0, /* tp_call */
-// 0, /* tp_str */
-// PyObject_GenericGetAttr, /* tp_getattro */
-// 0, /* tp_setattro */
-// 0, /* tp_as_buffer */
-// Py_TPFLAGS_DEFAULT, /* tp_flags*/
-// 0, // Keys_doc, /* tp_doc */
-// /* See Key_traverse for why we have this, even though we aren't GC */
-// 0, //(traverseproc)Keys_traverse, /* tp_traverse */
-// 0, /* tp_clear */
-// // TODO: implement richcompare, we should probably be able to compare vs an
-// // tuple, as well as versus another Keys object.
-// 0, //Keys_richcompare, /* tp_richcompare */
-// 0, /* tp_weaklistoffset */
-// // We could implement this as returning tuples of keys...
-// 0, /* tp_iter */
-// 0, /* tp_iternext */
-// KeyIntern_methods, /* tp_methods */
-// 0, /* tp_members */
-// 0, /* tp_getset */
-// 0, /* tp_base */
-// 0, /* tp_dict */
-// 0, /* tp_descr_get */
-// 0, /* tp_descr_set */
-// 0, /* tp_dictoffset */
-// 0, /* tp_init */
-// 0, /* tp_alloc */
-// 0, //Keys_new, /* tp_new */
-// };
-
-
static PyMethodDef static_tuple_c_methods[] = {
-// {"unique_lcs_c", py_unique_lcs, METH_VARARGS},
-// {"recurse_matches_c", py_recurse_matches, METH_VARARGS},
{NULL, NULL}
};
@@ -688,7 +620,7 @@
static void
setup_interned_tuples(PyObject *m)
{
- _interned_tuples = (PyObject *)StaticTupleInterner_New();
+ _interned_tuples = (PyObject *)SimpleSet_New();
if (_interned_tuples != NULL) {
Py_INCREF(_interned_tuples);
PyModule_AddObject(m, "_interned_tuples", _interned_tuples);
@@ -748,7 +680,7 @@
Py_INCREF(&StaticTuple_Type);
PyModule_AddObject(m, "StaticTuple", (PyObject *)&StaticTuple_Type);
- import_bzrlib___static_tuple_interned_pyx();
+ import_bzrlib___simple_set_pyx();
setup_interned_tuples(m);
setup_empty_tuple(m);
setup_c_api(m);
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2009-10-06 18:14:06 +0000
+++ b/bzrlib/tests/__init__.py 2009-10-07 15:48:32 +0000
@@ -3669,8 +3669,8 @@
'bzrlib.tests.test__groupcompress',
'bzrlib.tests.test__known_graph',
'bzrlib.tests.test__rio',
+ 'bzrlib.tests.test__simple_set',
'bzrlib.tests.test__static_tuple',
- 'bzrlib.tests.test__static_tuple_interned',
'bzrlib.tests.test__walkdirs_win32',
'bzrlib.tests.test_ancestry',
'bzrlib.tests.test_annotate',
=== renamed file 'bzrlib/tests/test__static_tuple_interned.py' => 'bzrlib/tests/test__simple_set.py'
--- a/bzrlib/tests/test__static_tuple_interned.py 2009-10-07 15:29:51 +0000
+++ b/bzrlib/tests/test__simple_set.py 2009-10-07 15:48:32 +0000
@@ -19,7 +19,6 @@
import sys
from bzrlib import (
- # _static_tuple_py,
errors,
osutils,
tests,
@@ -29,7 +28,7 @@
test__static_tuple,
)
try:
- from bzrlib import _static_tuple_interned_pyx as _module
+ from bzrlib import _simple_set_pyx as _module
except ImportError:
_module = None
try:
@@ -39,7 +38,7 @@
# Even though this is an extension, we don't permute the tests for a python
-# version. As the plain python version is just a dict.
+# version. As the plain python version is just a dict or set
class _CompiledStaticTupleInterned(tests.Feature):
@@ -49,7 +48,7 @@
return True
def feature_name(self):
- return 'bzrlib._static_tuple_interned_pyx'
+ return 'bzrlib._simple_set_pyx'
CompiledStaticTupleInterned = _CompiledStaticTupleInterned()
@@ -85,7 +84,7 @@
self.assertEqual(count+3, sys.getrefcount(obj))
def test_initial(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
self.assertEqual(0, len(obj))
st = StaticTuple('foo', 'bar')
self.assertFillState(0, 0, 0x3ff, obj)
@@ -99,7 +98,7 @@
# ('a', 'a'), ('f', '4'), ('p', 'r'), ('q', '1'), ('F', 'T'),
# ('Q', 'Q'), ('V', 'd'), ('7', 'C')
# all collide @ 643
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
offset, val = obj._test_lookup(StaticTuple('a', 'a'))
self.assertEqual(643, offset)
self.assertEqual('<null>', val)
@@ -111,7 +110,7 @@
self.assertEqual('<null>', val)
def test_get_set_del_with_collisions(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
k1 = StaticTuple('a', 'a')
k2 = StaticTuple('f', '4') # collides
k3 = StaticTuple('p', 'r')
@@ -161,7 +160,7 @@
self.assertNotIn(k4, obj)
def test_add(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
self.assertFillState(0, 0, 0x3ff, obj)
k1 = StaticTuple('foo')
self.assertRefcount(1, k1)
@@ -201,7 +200,7 @@
self.assertRefcount(2, k3)
def test_discard(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
k1 = StaticTuple('foo')
k2 = StaticTuple('foo')
k3 = StaticTuple('bar')
@@ -218,7 +217,7 @@
self.assertRefcount(1, k3)
def test__delitem__(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
k1 = StaticTuple('foo')
k2 = StaticTuple('foo')
k3 = StaticTuple('bar')
@@ -235,7 +234,7 @@
self.assertRefcount(1, k3)
def test__resize(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
k1 = StaticTuple('foo')
k2 = StaticTuple('bar')
k3 = StaticTuple('baz')
@@ -265,7 +264,7 @@
self.assertEqual((591, '<null>'), obj._test_lookup(k2))
def test_add_and_remove_lots_of_items(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
for i in chars:
for j in chars:
@@ -285,7 +284,7 @@
self.assertTrue(obj.fill < 1024 / 5)
def test__iter__(self):
- obj = _module.StaticTupleInterner()
+ obj = _module.SimpleSet()
k1 = StaticTuple('1')
k2 = StaticTuple('1', '2')
k3 = StaticTuple('3', '4')
=== modified file 'setup.py'
--- a/setup.py 2009-10-07 15:27:24 +0000
+++ b/setup.py 2009-10-07 15:48:32 +0000
@@ -294,7 +294,7 @@
add_pyrex_extension('bzrlib._chk_map_pyx', libraries=[z_lib])
ext_modules.append(Extension('bzrlib._patiencediff_c',
['bzrlib/_patiencediff_c.c']))
-add_pyrex_extension('bzrlib._static_tuple_interned_pyx')
+add_pyrex_extension('bzrlib._simple_set_pyx')
ext_modules.append(Extension('bzrlib._static_tuple_c',
['bzrlib/_static_tuple_c.c']))
add_pyrex_extension('bzrlib._btree_serializer_pyx')
More information about the bazaar-commits
mailing list