Rev 4712: Change Key away from being a PyVarObject. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
John Arbash Meinel
john at arbash-meinel.com
Wed Sep 30 05:45:59 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
------------------------------------------------------------
revno: 4712
revision-id: john at arbash-meinel.com-20090930044553-luo6udi4fmsmmw1m
parent: john at arbash-meinel.com-20090930041422-rm1v1u1yzfrabjtv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Tue 2009-09-29 23:45:53 -0500
message:
Change Key away from being a PyVarObject.
This will probably help a bit on 64-bit python, but mostly it is because
I plan on using the upper bits of the 'info' field to store stuff like
'is_interned' as a bit field.
With 32-bits and a max width of 256, it seems silly to waste 24 (or 56) bits
that will always be 0.
-------------- next part --------------
=== modified file 'bzrlib/_keys_type_c.c'
--- a/bzrlib/_keys_type_c.c 2009-09-29 15:25:12 +0000
+++ b/bzrlib/_keys_type_c.c 2009-09-30 04:45:53 +0000
@@ -39,6 +39,12 @@
static PyObject *key_intern = NULL;
+static inline int
+Key_size(Key *self)
+{
+ return self->info & KEY_SIZE_MASK;
+}
+
#define Key_CheckExact(op) (Py_TYPE(op) == &Key_Type)
@@ -47,14 +53,15 @@
Key_as_tuple(Key *self)
{
PyObject *tpl = NULL, *obj = NULL;
- Py_ssize_t i;
+ int i, len;
- tpl = PyTuple_New(self->ob_size);
+ len = Key_size(self);
+ tpl = PyTuple_New(len);
if (!tpl) {
/* Malloc failure */
return NULL;
}
- for (i = 0; i < self->ob_size; ++i) {
+ for (i = 0; i < len; ++i) {
obj = (PyObject *)self->key_bits[i];
Py_INCREF(obj);
PyTuple_SET_ITEM(tpl, i, obj);
@@ -101,9 +108,10 @@
static void
Key_dealloc(Key *self)
{
- Py_ssize_t i;
+ int i, len;
- for (i = 0; i < self->ob_size; ++i) {
+ len = Key_size(self);
+ for (i = 0; i < len; ++i) {
Py_XDECREF(self->key_bits[i]);
}
Py_TYPE(self)->tp_free((PyObject *)self);
@@ -120,10 +128,18 @@
return NULL;
}
+ /* Note that we use PyObject_NewVar because we want to allocate a variable
+ * width entry. However we *aren't* truly a PyVarObject because we don't
+ * use a long for ob_size. Instead we use a plain 'size' that is an int,
+ * and will be overloaded with flags in the future.
+ * As such we do the alloc, and then have to clean up anything it does
+ * incorrectly.
+ */
key = PyObject_NewVar(Key, &Key_Type, size);
if (key == NULL) {
return NULL;
}
+ key->info = size & KEY_SIZE_MASK;
memset(key->key_bits, 0, sizeof(PyStringObject *) * size);
#if KEY_HAS_HASH
key->hash = -1;
@@ -158,10 +174,10 @@
if (self == NULL) {
return NULL;
}
+ self->info = ((int)len) & KEY_SIZE_MASK;
#if KEY_HAS_HASH
self->hash = -1;
#endif
- self->ob_size = len;
for (i = 0; i < len; ++i) {
obj = PyTuple_GET_ITEM(args, i);
if (!PyString_CheckExact(obj)) {
@@ -198,7 +214,7 @@
* 'stable'?
*/
register long x, y;
- Py_ssize_t len = self->ob_size;
+ Py_ssize_t len = Key_size(self);
PyStringObject **p;
hashfunc string_hash;
long mult = 1000003L;
@@ -313,8 +329,8 @@
/* It will be rare that we compare tuples of different lengths, so we don't
* start by optimizing the length comparision, same as the tuple code
*/
- vlen = vk->ob_size;
- wlen = wk->ob_size;
+ vlen = Key_size(vk);
+ wlen = Key_size(wk);
min_len = (vlen < wlen) ? vlen : wlen;
string_richcompare = PyString_Type.tp_richcompare;
for (i = 0; i < min_len; i++) {
@@ -385,14 +401,14 @@
static Py_ssize_t
Key_length(Key *self)
{
- return self->ob_size;
+ return Key_size(self);
}
static PyObject *
Key_item(Key *self, Py_ssize_t offset)
{
PyObject *obj;
- if (offset < 0 || offset >= self->ob_size) {
+ if (offset < 0 || offset >= Key_size(self)) {
PyErr_SetString(PyExc_IndexError, "Key index out of range");
return NULL;
}
=== modified file 'bzrlib/_keys_type_c.h'
--- a/bzrlib/_keys_type_c.h 2009-09-29 21:51:31 +0000
+++ b/bzrlib/_keys_type_c.h 2009-09-30 04:45:53 +0000
@@ -47,8 +47,13 @@
* the Key objects so that you have 1 python object overhead for N Keys, rather
* than N objects.
*/
+
+#define KEY_SIZE_MASK 0x000000FF
+#define KEY_INTERNED_FLAG 0x00000100
typedef struct {
- PyObject_VAR_HEAD
+ PyObject_HEAD
+ int info; // size is in the lowest byte (0 -> 256)
+ // flags is next
#if KEY_HAS_HASH
long hash;
#endif
=== modified file 'bzrlib/tests/test__keys_type.py'
--- a/bzrlib/tests/test__keys_type.py 2009-09-29 15:36:59 +0000
+++ b/bzrlib/tests/test__keys_type.py 2009-09-30 04:45:53 +0000
@@ -355,17 +355,26 @@
key2 = self.module.Key(unique_str1, unique_str2)
self.assertEqual(key, key2)
self.assertIsNot(key, key2)
+ key3 = key.intern()
+ self.assertIs(key, key3)
+ self.assertTrue(key in self.module._intern)
+ self.assertEqual(key, self.module._intern[key])
+ key2 = key2.intern()
+ self.assertIs(key, key2)
+ def test__c_intern_handles_refcount(self):
+ if self.module is _keys_type_py:
+ return # Not applicable
+ unique_str1 = 'unique str ' + osutils.rand_chars(20)
+ unique_str2 = 'unique str ' + osutils.rand_chars(20)
+ key = self.module.Key(unique_str1, unique_str2)
+ self.assertFalse(key in self.module._intern)
+ key2 = self.module.Key(unique_str1, unique_str2)
+ self.assertEqual(key, key2)
+ self.assertIsNot(key, key2)
refcount = sys.getrefcount(key)
self.assertEqual(2, refcount)
- # TODO: Eventually the C version will diverge from the python version
- # here. Namely, it will follow the String route, and interning()
- # a string will not make it immortal. Instead it will remove
- # itself from the intern dict when all other references are
- # removed.
- # Further, the intern dict may become a custom type rather than a
- # pure 'dict'.
key3 = key.intern()
self.assertIs(key, key3)
self.assertTrue(key in self.module._intern)
More information about the bazaar-commits
mailing list