Rev 4708: Start work on implementing a Key.intern() function. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

John Arbash Meinel john at arbash-meinel.com
Tue Sep 29 16:25:16 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

------------------------------------------------------------
revno: 4708
revision-id: john at arbash-meinel.com-20090929152512-md6x01vbcb9rl2ul
parent: john at arbash-meinel.com-20090928191408-7im66hra3wfcolhz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Tue 2009-09-29 10:25:12 -0500
message:
  Start work on implementing a Key.intern() function.
  
  As part of this, I'm upgrading the python type to be a fully fledged class.
  It will have *more* overhead than just using tuples (given that we wrap a tuple)
  but at least it means we can have an interesting class object that does
  more than just a tuple interface.
-------------- next part --------------
=== modified file 'bzrlib/_keys_type_c.c'
--- a/bzrlib/_keys_type_c.c	2009-09-28 19:14:08 +0000
+++ b/bzrlib/_keys_type_c.c	2009-09-29 15:25:12 +0000
@@ -36,6 +36,9 @@
 
 
 static PyObject *Keys_item(Keys *self, Py_ssize_t offset);
+static PyObject *key_intern = NULL;
+
+
 
 
 #define Key_CheckExact(op) (Py_TYPE(op) == &Key_Type)
@@ -59,8 +62,42 @@
     return tpl;
 }
 
+
 static char Key_as_tuple_doc[] = "as_tuple() => tuple";
 
+static Key *
+Key_intern(Key *self)
+{
+    PyObject *unique_key = NULL;
+
+    if (key_intern == NULL) {
+        return self;
+    }
+    unique_key = PyDict_GetItem((PyObject *)key_intern, (PyObject *)self);
+    if (unique_key) {
+        // An entry already existed, return it, instead of self
+        Py_INCREF(unique_key);
+        return (Key *)unique_key;
+    }
+    // An entry did not exist, make 'self' the unique item
+    if (PyDict_SetItem(key_intern, (PyObject *)self, (PyObject *)self) < 0) {
+        // Suppress an error
+        PyErr_Clear();
+        return self;
+    }
+    // self was added to the dict, return it.
+    Py_INCREF(self);
+    return self;
+}
+
+static char Key_intern_doc[] = "intern() => unique Key\n"
+    "Return a 'canonical' Key object.\n"
+    "Similar to intern() for strings, this makes sure there\n"
+    "is only one Key object for a given value\n."
+    "Common usage is:\n"
+    "  key = Key('foo', 'bar').intern()\n";
+
+
 static void
 Key_dealloc(Key *self)
 {
@@ -396,6 +433,7 @@
 
 static PyMethodDef Key_methods[] = {
     {"as_tuple", (PyCFunction)Key_as_tuple, METH_NOARGS, Key_as_tuple_doc},
+    {"intern", (PyCFunction)Key_intern, METH_NOARGS, Key_intern_doc},
     {NULL, NULL} /* sentinel */
 };
 
@@ -805,6 +843,73 @@
     Keys_new,                                    /* tp_new */
 };
 
+
+static char KeyIntern_doc[] = "";
+
+static PyMethodDef KeyIntern_methods[] = {
+    // {"as_tuple", (PyCFunction)Keys_as_tuple, METH_NOARGS, Keys_as_tuple_doc},
+    {NULL, NULL} /* sentinel */
+};
+
+static PySequenceMethods KeyIntern_as_sequence = {
+    0, //(lenfunc)Keys_length,           /* sq_length */
+    0,                              /* sq_concat */
+    0,                              /* sq_repeat */
+    0, //(ssizeargfunc)Keys_item,        /* sq_item */
+    0,                              /* sq_slice */
+    0,                              /* sq_ass_item */
+    0,                              /* sq_ass_slice */
+    0,                              /* sq_contains */
+};
+
+static PyTypeObject KeyIntern_Type = {
+    PyObject_HEAD_INIT(NULL)
+    0,                                           /* ob_size */
+    "KeyIntern",                                 /* tp_name */
+    sizeof(KeyIntern) - sizeof(Key *),           /* tp_basicsize */
+    sizeof(Key *),                               /* tp_itemsize */
+    0, //(destructor)Keys_dealloc,               /* tp_dealloc */
+    0,                                           /* tp_print */
+    0,                                           /* tp_getattr */
+    0,                                           /* tp_setattr */
+    0,                                           /* tp_compare */
+    // TODO: implement repr() and possibly str()
+    0, //(reprfunc)Keys_repr,                         /* tp_repr */
+    0,                                           /* tp_as_number */
+    &KeyIntern_as_sequence,                      /* tp_as_sequence */
+    0,                                           /* tp_as_mapping */
+    0, //(hashfunc)Keys_hash,                         /* tp_hash */
+    0,                                           /* tp_call */
+    0,                                           /* tp_str */
+    PyObject_GenericGetAttr,                     /* tp_getattro */
+    0,                                           /* tp_setattro */
+    0,                                           /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                          /* tp_flags*/
+    0, // Keys_doc,                                    /* tp_doc */
+    /* See Key_traverse for why we have this, even though we aren't GC */
+    0, //(traverseproc)Keys_traverse,                 /* tp_traverse */
+    0,                                           /* tp_clear */
+    // TODO: implement richcompare, we should probably be able to compare vs an
+    //       tuple, as well as versus another Keys object.
+    0, //Keys_richcompare,                            /* tp_richcompare */
+    0,                                           /* tp_weaklistoffset */
+    // We could implement this as returning tuples of keys...
+    0,                                           /* tp_iter */
+    0,                                           /* tp_iternext */
+    KeyIntern_methods,                           /* tp_methods */
+    0,                                           /* tp_members */
+    0,                                           /* tp_getset */
+    0,                                           /* tp_base */
+    0,                                           /* tp_dict */
+    0,                                           /* tp_descr_get */
+    0,                                           /* tp_descr_set */
+    0,                                           /* tp_dictoffset */
+    0,                                           /* tp_init */
+    0,                                           /* tp_alloc */
+    0, //Keys_new,                                    /* tp_new */
+};
+
+
 static PyMethodDef keys_type_c_methods[] = {
 //    {"unique_lcs_c", py_unique_lcs, METH_VARARGS},
 //    {"recurse_matches_c", py_recurse_matches, METH_VARARGS},
@@ -821,6 +926,8 @@
         return;
     if (PyType_Ready(&Keys_Type) < 0)
         return;
+    if (PyType_Ready(&KeyIntern_Type) < 0)
+        return;
 
     m = Py_InitModule3("_keys_type_c", keys_type_c_methods,
                        "C implementation of a Keys structure");
@@ -831,4 +938,12 @@
     PyModule_AddObject(m, "Key", (PyObject *)&Key_Type);
     Py_INCREF(&Keys_Type);
     PyModule_AddObject(m, "Keys", (PyObject *)&Keys_Type);
+    // Py_INCREF(&KeyIntern_Type);
+    // PyModule_AddObject(m, "KeyIntern", (PyObject *)&KeyIntern_Type);
+    // key_intern = PyObject_NewVar(KeyIntern, &KeyIntern_Type, 10);
+    key_intern = PyDict_New();
+    if (key_intern != NULL) {
+        Py_INCREF(key_intern);
+        PyModule_AddObject(m, "_intern", key_intern);
+    }
 }

=== modified file 'bzrlib/_keys_type_c.h'
--- a/bzrlib/_keys_type_c.h	2009-09-28 19:14:08 +0000
+++ b/bzrlib/_keys_type_c.h	2009-09-29 15:25:12 +0000
@@ -25,7 +25,7 @@
 #  endif
 #endif
 
-#define KEY_HAS_HASH 1
+#define KEY_HAS_HASH 0
 
 /* This defines a single variable-width key.
  * It is basically the same as a tuple, but
@@ -64,6 +64,13 @@
 /* Forward declaration */
 extern PyTypeObject Keys_Type;
 
+typedef struct {
+    PyObject_VAR_HEAD
+    Key *table[1];
+} KeyIntern;
+extern PyTypeObject Key_Type;
+
 #define Key_SET_ITEM(key, offset, val) \
     ((((Key*)key)->key_bits[offset]) = (PyStringObject *)val)
 #define Key_GET_ITEM(key, offset) (((Key*)key)->key_bits[offset])
+

=== modified file 'bzrlib/_keys_type_py.py'
--- a/bzrlib/_keys_type_py.py	2009-09-10 18:15:17 +0000
+++ b/bzrlib/_keys_type_py.py	2009-09-29 15:25:12 +0000
@@ -21,15 +21,46 @@
 """
 
 
-def Key(*args):
-    """Implemented as just returning a tuple of the arguments supplied."""
-    for bit in args:
-        if not isinstance(bit, str):
-            raise TypeError('key bits must be strings')
-    num_keys = len(args)
-    if num_keys <= 0 or num_keys > 256:
-        raise ValueError('must have 1 => 256 key bits')
-    return args
+class Key(object):
+    """A Key type, similar to a tuple of strings."""
+
+    __slots__ = ('_tuple',)
+
+    def __init__(self, *args):
+        """Create a new 'Key'"""
+        for bit in args:
+            if not isinstance(bit, str):
+                raise TypeError('key bits must be strings')
+        num_keys = len(args)
+        if num_keys <= 0 or num_keys > 256:
+            raise ValueError('must have 1 => 256 key bits')
+        self._tuple = args
+
+    def __repr__(self):
+        return repr(self._tuple)
+
+    def __hash__(self):
+        return hash(self._tuple)
+
+    def __eq__(self, other):
+        if isinstance(other, Key):
+            return self._tuple == other._tuple
+        if isinstance(other, tuple):
+            return other == self._tuple
+        return NotImplemented
+
+    def __len__(self):
+        return len(self._tuple)
+
+    def __cmp__(self, other):
+        return cmp(self._tuple, other)
+
+    def __getitem__(self, idx):
+        return self._tuple[idx]
+
+    def as_tuple(self):
+        return self._tuple
+
 
 
 def Keys(width, *args):
@@ -51,3 +82,6 @@
                 raise TypeError('key bits must be strings')
         result.append(key)
     return tuple(result)
+
+
+_intern = {}

=== modified file 'bzrlib/tests/test__keys_type.py'
--- a/bzrlib/tests/test__keys_type.py	2009-09-11 21:48:56 +0000
+++ b/bzrlib/tests/test__keys_type.py	2009-09-29 15:25:12 +0000
@@ -22,6 +22,7 @@
 from bzrlib import (
     _keys_type_py,
     errors,
+    osutils,
     tests,
     )
 
@@ -60,6 +61,21 @@
 CompiledKeysType = _CompiledKeysType()
 
 
+class _Meliae(tests.Feature):
+
+    def _probe(self):
+        try:
+            from meliae import scanner
+        except ImportError:
+            return False
+        return True
+
+    def feature_name(self):
+        return "Meliae - python memory debugger"
+
+Meliae = _Meliae()
+
+
 class TestKeyType(tests.TestCase):
 
     def test_create(self):
@@ -76,16 +92,10 @@
         
     def test_as_tuple(self):
         k = self.module.Key('foo')
-        if getattr(k, 'as_tuple', None) is None:
-            t = k
-        else:
-            t = k.as_tuple()
+        t = k.as_tuple()
         self.assertEqual(('foo',), t)
         k = self.module.Key('foo', 'bar')
-        if getattr(k, 'as_tuple', None) is None:
-            t = k
-        else:
-            t = k.as_tuple()
+        t = k.as_tuple()
         self.assertEqual(('foo', 'bar'), t)
 
     def test_len(self):
@@ -208,13 +218,17 @@
         # amount of referenced memory. Unfortunately gc.get_referents() first
         # checks the IS_GC flag before it traverses anything. So there isn't a
         # way to expose it that I can see.
-        try:
-            from meliae import scanner
-        except ImportError:
-            return
+        self.requireFeature(Meliae)
+        from meliae import scanner
         strs = ['foo', 'bar', 'baz', 'bing']
         k = self.module.Key(*strs)
-        self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
+        if isinstance(k, _keys_type_py.Key):
+            # The python version references objects slightly different than the
+            # compiled version
+            self.assertEqual([k._tuple, _keys_type_py.Key],
+                             scanner.get_referents(k))
+        else:
+            self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
 
 
 class TestKeysType(tests.TestCase):
@@ -323,10 +337,8 @@
         # amount of referenced memory. Unfortunately gc.get_referents() first
         # checks the IS_GC flag before it traverses anything. So there isn't a
         # way to expose it that I can see.
-        try:
-            from meliae import scanner
-        except ImportError:
-            return
+        self.requireFeature(Meliae)
+        from meliae import scanner
         strs = ['foo', 'bar', 'baz', 'bing']
         k = self.module.Keys(2, *strs)
         if type(k) == tuple:
@@ -334,3 +346,14 @@
                              sorted(scanner.get_referents(k)))
         else:
             self.assertEqual(sorted(strs), sorted(scanner.get_referents(k)))
+
+    def test_intern(self):
+        unique_str1 = 'unique str ' + osutils.rand_chars(20)
+        unique_str2 = 'unique str ' + osutils.rand_chars(20)
+        key = self.module.Key(unique_str1, unique_str2)
+        self.assertFalse(key in self.module._intern)
+        key2 = self.module.Key(unique_str1, unique_str2)
+        self.assertEqual(key, key2)
+        self.assertIsNot(key, key2)
+
+        refcount = sys.getrefcount(key)



More information about the bazaar-commits mailing list