Rev 4708: Start work on implementing a Key.intern() function. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
John Arbash Meinel
john at arbash-meinel.com
Tue Sep 29 16:25:16 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
------------------------------------------------------------
revno: 4708
revision-id: john at arbash-meinel.com-20090929152512-md6x01vbcb9rl2ul
parent: john at arbash-meinel.com-20090928191408-7im66hra3wfcolhz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Tue 2009-09-29 10:25:12 -0500
message:
Start work on implementing a Key.intern() function.
As part of this, I'm upgrading the python type to be a fully fledged class.
It will have *more* overhead than just using tuples (given that we wrap a tuple)
but at least it means we can have an interesting class object that does
more than just a tuple interface.
-------------- next part --------------
=== modified file 'bzrlib/_keys_type_c.c'
--- a/bzrlib/_keys_type_c.c 2009-09-28 19:14:08 +0000
+++ b/bzrlib/_keys_type_c.c 2009-09-29 15:25:12 +0000
@@ -36,6 +36,9 @@
static PyObject *Keys_item(Keys *self, Py_ssize_t offset);
+static PyObject *key_intern = NULL;
+
+
#define Key_CheckExact(op) (Py_TYPE(op) == &Key_Type)
@@ -59,8 +62,42 @@
return tpl;
}
+
static char Key_as_tuple_doc[] = "as_tuple() => tuple";
+static Key *
+Key_intern(Key *self)
+{
+ PyObject *unique_key = NULL;
+
+ if (key_intern == NULL) {
+ return self;
+ }
+ unique_key = PyDict_GetItem((PyObject *)key_intern, (PyObject *)self);
+ if (unique_key) {
+ // An entry already existed, return it, instead of self
+ Py_INCREF(unique_key);
+ return (Key *)unique_key;
+ }
+ // An entry did not exist, make 'self' the unique item
+ if (PyDict_SetItem(key_intern, (PyObject *)self, (PyObject *)self) < 0) {
+ // Suppress an error
+ PyErr_Clear();
+ return self;
+ }
+ // self was added to the dict, return it.
+ Py_INCREF(self);
+ return self;
+}
+
+static char Key_intern_doc[] = "intern() => unique Key\n"
+ "Return a 'canonical' Key object.\n"
+ "Similar to intern() for strings, this makes sure there\n"
+ "is only one Key object for a given value\n."
+ "Common usage is:\n"
+ " key = Key('foo', 'bar').intern()\n";
+
+
static void
Key_dealloc(Key *self)
{
@@ -396,6 +433,7 @@
static PyMethodDef Key_methods[] = {
{"as_tuple", (PyCFunction)Key_as_tuple, METH_NOARGS, Key_as_tuple_doc},
+ {"intern", (PyCFunction)Key_intern, METH_NOARGS, Key_intern_doc},
{NULL, NULL} /* sentinel */
};
@@ -805,6 +843,73 @@
Keys_new, /* tp_new */
};
+
+static char KeyIntern_doc[] = "";
+
+static PyMethodDef KeyIntern_methods[] = {
+ // {"as_tuple", (PyCFunction)Keys_as_tuple, METH_NOARGS, Keys_as_tuple_doc},
+ {NULL, NULL} /* sentinel */
+};
+
+static PySequenceMethods KeyIntern_as_sequence = {
+ 0, //(lenfunc)Keys_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ 0, //(ssizeargfunc)Keys_item, /* sq_item */
+ 0, /* sq_slice */
+ 0, /* sq_ass_item */
+ 0, /* sq_ass_slice */
+ 0, /* sq_contains */
+};
+
+static PyTypeObject KeyIntern_Type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "KeyIntern", /* tp_name */
+ sizeof(KeyIntern) - sizeof(Key *), /* tp_basicsize */
+ sizeof(Key *), /* tp_itemsize */
+ 0, //(destructor)Keys_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ // TODO: implement repr() and possibly str()
+ 0, //(reprfunc)Keys_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ &KeyIntern_as_sequence, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, //(hashfunc)Keys_hash, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags*/
+ 0, // Keys_doc, /* tp_doc */
+ /* See Key_traverse for why we have this, even though we aren't GC */
+ 0, //(traverseproc)Keys_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ // TODO: implement richcompare, we should probably be able to compare vs an
+ // tuple, as well as versus another Keys object.
+ 0, //Keys_richcompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ // We could implement this as returning tuples of keys...
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ KeyIntern_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, //Keys_new, /* tp_new */
+};
+
+
static PyMethodDef keys_type_c_methods[] = {
// {"unique_lcs_c", py_unique_lcs, METH_VARARGS},
// {"recurse_matches_c", py_recurse_matches, METH_VARARGS},
@@ -821,6 +926,8 @@
return;
if (PyType_Ready(&Keys_Type) < 0)
return;
+ if (PyType_Ready(&KeyIntern_Type) < 0)
+ return;
m = Py_InitModule3("_keys_type_c", keys_type_c_methods,
"C implementation of a Keys structure");
@@ -831,4 +938,12 @@
PyModule_AddObject(m, "Key", (PyObject *)&Key_Type);
Py_INCREF(&Keys_Type);
PyModule_AddObject(m, "Keys", (PyObject *)&Keys_Type);
+ // Py_INCREF(&KeyIntern_Type);
+ // PyModule_AddObject(m, "KeyIntern", (PyObject *)&KeyIntern_Type);
+ // key_intern = PyObject_NewVar(KeyIntern, &KeyIntern_Type, 10);
+ key_intern = PyDict_New();
+ if (key_intern != NULL) {
+ Py_INCREF(key_intern);
+ PyModule_AddObject(m, "_intern", key_intern);
+ }
}
=== modified file 'bzrlib/_keys_type_c.h'
--- a/bzrlib/_keys_type_c.h 2009-09-28 19:14:08 +0000
+++ b/bzrlib/_keys_type_c.h 2009-09-29 15:25:12 +0000
@@ -25,7 +25,7 @@
# endif
#endif
-#define KEY_HAS_HASH 1
+#define KEY_HAS_HASH 0
/* This defines a single variable-width key.
* It is basically the same as a tuple, but
@@ -64,6 +64,13 @@
/* Forward declaration */
extern PyTypeObject Keys_Type;
+typedef struct {
+ PyObject_VAR_HEAD
+ Key *table[1];
+} KeyIntern;
+extern PyTypeObject Key_Type;
+
#define Key_SET_ITEM(key, offset, val) \
((((Key*)key)->key_bits[offset]) = (PyStringObject *)val)
#define Key_GET_ITEM(key, offset) (((Key*)key)->key_bits[offset])
+
=== modified file 'bzrlib/_keys_type_py.py'
--- a/bzrlib/_keys_type_py.py 2009-09-10 18:15:17 +0000
+++ b/bzrlib/_keys_type_py.py 2009-09-29 15:25:12 +0000
@@ -21,15 +21,46 @@
"""
-def Key(*args):
- """Implemented as just returning a tuple of the arguments supplied."""
- for bit in args:
- if not isinstance(bit, str):
- raise TypeError('key bits must be strings')
- num_keys = len(args)
- if num_keys <= 0 or num_keys > 256:
- raise ValueError('must have 1 => 256 key bits')
- return args
+class Key(object):
+ """A Key type, similar to a tuple of strings."""
+
+ __slots__ = ('_tuple',)
+
+ def __init__(self, *args):
+ """Create a new 'Key'"""
+ for bit in args:
+ if not isinstance(bit, str):
+ raise TypeError('key bits must be strings')
+ num_keys = len(args)
+ if num_keys <= 0 or num_keys > 256:
+ raise ValueError('must have 1 => 256 key bits')
+ self._tuple = args
+
+ def __repr__(self):
+ return repr(self._tuple)
+
+ def __hash__(self):
+ return hash(self._tuple)
+
+ def __eq__(self, other):
+ if isinstance(other, Key):
+ return self._tuple == other._tuple
+ if isinstance(other, tuple):
+ return other == self._tuple
+ return NotImplemented
+
+ def __len__(self):
+ return len(self._tuple)
+
+ def __cmp__(self, other):
+ return cmp(self._tuple, other)
+
+ def __getitem__(self, idx):
+ return self._tuple[idx]
+
+ def as_tuple(self):
+ return self._tuple
+
def Keys(width, *args):
@@ -51,3 +82,6 @@
raise TypeError('key bits must be strings')
result.append(key)
return tuple(result)
+
+
+_intern = {}
=== modified file 'bzrlib/tests/test__keys_type.py'
--- a/bzrlib/tests/test__keys_type.py 2009-09-11 21:48:56 +0000
+++ b/bzrlib/tests/test__keys_type.py 2009-09-29 15:25:12 +0000
@@ -22,6 +22,7 @@
from bzrlib import (
_keys_type_py,
errors,
+ osutils,
tests,
)
@@ -60,6 +61,21 @@
CompiledKeysType = _CompiledKeysType()
+class _Meliae(tests.Feature):
+
+ def _probe(self):
+ try:
+ from meliae import scanner
+ except ImportError:
+ return False
+ return True
+
+ def feature_name(self):
+ return "Meliae - python memory debugger"
+
+Meliae = _Meliae()
+
+
class TestKeyType(tests.TestCase):
def test_create(self):
@@ -76,16 +92,10 @@
def test_as_tuple(self):
k = self.module.Key('foo')
- if getattr(k, 'as_tuple', None) is None:
- t = k
- else:
- t = k.as_tuple()
+ t = k.as_tuple()
self.assertEqual(('foo',), t)
k = self.module.Key('foo', 'bar')
- if getattr(k, 'as_tuple', None) is None:
- t = k
- else:
- t = k.as_tuple()
+ t = k.as_tuple()
self.assertEqual(('foo', 'bar'), t)
def test_len(self):
@@ -208,13 +218,17 @@
# amount of referenced memory. Unfortunately gc.get_referents() first
# checks the IS_GC flag before it traverses anything. So there isn't a
# way to expose it that I can see.
- try:
- from meliae import scanner
- except ImportError:
- return
+ self.requireFeature(Meliae)
+ from meliae import scanner
strs = ['foo', 'bar', 'baz', 'bing']
k = self.module.Key(*strs)
- self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
+ if isinstance(k, _keys_type_py.Key):
+ # The python version references objects slightly different than the
+ # compiled version
+ self.assertEqual([k._tuple, _keys_type_py.Key],
+ scanner.get_referents(k))
+ else:
+ self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
class TestKeysType(tests.TestCase):
@@ -323,10 +337,8 @@
# amount of referenced memory. Unfortunately gc.get_referents() first
# checks the IS_GC flag before it traverses anything. So there isn't a
# way to expose it that I can see.
- try:
- from meliae import scanner
- except ImportError:
- return
+ self.requireFeature(Meliae)
+ from meliae import scanner
strs = ['foo', 'bar', 'baz', 'bing']
k = self.module.Keys(2, *strs)
if type(k) == tuple:
@@ -334,3 +346,14 @@
sorted(scanner.get_referents(k)))
else:
self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
+
+ def test_intern(self):
+ unique_str1 = 'unique str ' + osutils.rand_chars(20)
+ unique_str2 = 'unique str ' + osutils.rand_chars(20)
+ key = self.module.Key(unique_str1, unique_str2)
+ self.assertFalse(key in self.module._intern)
+ key2 = self.module.Key(unique_str1, unique_str2)
+ self.assertEqual(key, key2)
+ self.assertIsNot(key, key2)
+
+ refcount = sys.getrefcount(key)
More information about the bazaar-commits
mailing list