Rev 4712: Change Key away from being a PyVarObject. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

John Arbash Meinel john at arbash-meinel.com
Wed Sep 30 05:45:59 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

------------------------------------------------------------
revno: 4712
revision-id: john at arbash-meinel.com-20090930044553-luo6udi4fmsmmw1m
parent: john at arbash-meinel.com-20090930041422-rm1v1u1yzfrabjtv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Tue 2009-09-29 23:45:53 -0500
message:
  Change Key away from being a PyVarObject.
  This will probably help a bit on 64-bit python, but mostly it is because
  I plan on using the upper bits of the 'info' field to store stuff like
  'is_interned' as a bit field.
  With 32-bits and a max width of 256, it seems silly to waste 24 (or 56) bits
  that will always be 0.
-------------- next part --------------
=== modified file 'bzrlib/_keys_type_c.c'
--- a/bzrlib/_keys_type_c.c	2009-09-29 15:25:12 +0000
+++ b/bzrlib/_keys_type_c.c	2009-09-30 04:45:53 +0000
@@ -39,6 +39,12 @@
 static PyObject *key_intern = NULL;
 
 
+static inline int
+Key_size(Key *self)
+{
+    return self->info & KEY_SIZE_MASK;
+}
+
 
 
 #define Key_CheckExact(op) (Py_TYPE(op) == &Key_Type)
@@ -47,14 +53,15 @@
 Key_as_tuple(Key *self)
 {
     PyObject *tpl = NULL, *obj = NULL;
-    Py_ssize_t i;
+    int i, len;
 
-    tpl = PyTuple_New(self->ob_size);
+    len = Key_size(self);
+    tpl = PyTuple_New(len);
     if (!tpl) {
         /* Malloc failure */
         return NULL;
     }
-    for (i = 0; i < self->ob_size; ++i) {
+    for (i = 0; i < len; ++i) {
         obj = (PyObject *)self->key_bits[i];
         Py_INCREF(obj);
         PyTuple_SET_ITEM(tpl, i, obj);
@@ -101,9 +108,10 @@
 static void
 Key_dealloc(Key *self)
 {
-    Py_ssize_t i;
+    int i, len;
 
-    for (i = 0; i < self->ob_size; ++i) {
+    len = Key_size(self);
+    for (i = 0; i < len; ++i) {
         Py_XDECREF(self->key_bits[i]);
     }
     Py_TYPE(self)->tp_free((PyObject *)self);
@@ -120,10 +128,18 @@
         return NULL;
     }
 
+    /* Note that we use PyObject_NewVar because we want to allocate a variable
+     * width entry. However we *aren't* truly a PyVarObject because we don't
+     * use a long for ob_size. Instead we use a plain 'size' that is an int,
+     * and will be overloaded with flags in the future.
+     * As such we do the alloc, and then have to clean up anything it does
+     * incorrectly.
+     */
     key = PyObject_NewVar(Key, &Key_Type, size);
     if (key == NULL) {
         return NULL;
     }
+    key->info = size & KEY_SIZE_MASK;
     memset(key->key_bits, 0, sizeof(PyStringObject *) * size);
 #if KEY_HAS_HASH
     key->hash = -1;
@@ -158,10 +174,10 @@
     if (self == NULL) {
         return NULL;
     }
+    self->info = ((int)len) & KEY_SIZE_MASK;
 #if KEY_HAS_HASH
     self->hash = -1;
 #endif
-    self->ob_size = len;
     for (i = 0; i < len; ++i) {
         obj = PyTuple_GET_ITEM(args, i);
         if (!PyString_CheckExact(obj)) {
@@ -198,7 +214,7 @@
      * 'stable'?
      */
 	register long x, y;
-	Py_ssize_t len = self->ob_size;
+	Py_ssize_t len = Key_size(self);
 	PyStringObject **p;
     hashfunc string_hash;
 	long mult = 1000003L;
@@ -313,8 +329,8 @@
     /* It will be rare that we compare tuples of different lengths, so we don't
      * start by optimizing the length comparision, same as the tuple code
      */
-    vlen = vk->ob_size;
-    wlen = wk->ob_size;
+    vlen = Key_size(vk);
+    wlen = Key_size(wk);
 	min_len = (vlen < wlen) ? vlen : wlen;
     string_richcompare = PyString_Type.tp_richcompare;
     for (i = 0; i < min_len; i++) {
@@ -385,14 +401,14 @@
 static Py_ssize_t
 Key_length(Key *self)
 {
-    return self->ob_size;
+    return Key_size(self);
 }
 
 static PyObject *
 Key_item(Key *self, Py_ssize_t offset)
 {
     PyObject *obj;
-    if (offset < 0 || offset >= self->ob_size) {
+    if (offset < 0 || offset >= Key_size(self)) {
         PyErr_SetString(PyExc_IndexError, "Key index out of range");
         return NULL;
     }

=== modified file 'bzrlib/_keys_type_c.h'
--- a/bzrlib/_keys_type_c.h	2009-09-29 21:51:31 +0000
+++ b/bzrlib/_keys_type_c.h	2009-09-30 04:45:53 +0000
@@ -47,8 +47,13 @@
  * the Key objects so that you have 1 python object overhead for N Keys, rather
  * than N objects.
  */
+
+#define KEY_SIZE_MASK     0x000000FF
+#define KEY_INTERNED_FLAG 0x00000100
 typedef struct {
-    PyObject_VAR_HEAD
+    PyObject_HEAD
+    int info; // size is in the lowest byte (0 -> 256)
+              // flags is next
 #if KEY_HAS_HASH
     long hash;
 #endif

=== modified file 'bzrlib/tests/test__keys_type.py'
--- a/bzrlib/tests/test__keys_type.py	2009-09-29 15:36:59 +0000
+++ b/bzrlib/tests/test__keys_type.py	2009-09-30 04:45:53 +0000
@@ -355,17 +355,26 @@
         key2 = self.module.Key(unique_str1, unique_str2)
         self.assertEqual(key, key2)
         self.assertIsNot(key, key2)
+        key3 = key.intern()
+        self.assertIs(key, key3)
+        self.assertTrue(key in self.module._intern)
+        self.assertEqual(key, self.module._intern[key])
+        key2 = key2.intern()
+        self.assertIs(key, key2)
 
+    def test__c_intern_handles_refcount(self):
+        if self.module is _keys_type_py:
+            return # Not applicable
+        unique_str1 = 'unique str ' + osutils.rand_chars(20)
+        unique_str2 = 'unique str ' + osutils.rand_chars(20)
+        key = self.module.Key(unique_str1, unique_str2)
+        self.assertFalse(key in self.module._intern)
+        key2 = self.module.Key(unique_str1, unique_str2)
+        self.assertEqual(key, key2)
+        self.assertIsNot(key, key2)
         refcount = sys.getrefcount(key)
         self.assertEqual(2, refcount)
 
-        # TODO: Eventually the C version will diverge from the python version
-        #       here. Namely, it will follow the String route, and interning()
-        #       a string will not make it immortal. Instead it will remove
-        #       itself from the intern dict when all other references are
-        #       removed.
-        #       Further, the intern dict may become a custom type rather than a
-        #       pure 'dict'.
         key3 = key.intern()
         self.assertIs(key, key3)
         self.assertTrue(key in self.module._intern)



More information about the bazaar-commits mailing list