Rev 98: Add the ability to pull in the '__dict__' attribute of instances and modules. in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

John Arbash Meinel john at arbash-meinel.com
Sat Oct 17 04:02:17 BST 2009


At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

------------------------------------------------------------
revno: 98
revision-id: john at arbash-meinel.com-20091017030158-8pa7tzmyiwfk02bq
parent: john at arbash-meinel.com-20091017014149-yo14wvveuqc6vs6e
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Fri 2009-10-16 22:01:58 -0500
message:
  Add the ability to pull in the '__dict__' attribute of instances and modules.
  This saves a dereference when walking around trying to figure out what is going on.
  It also attributes memory consumption to the instances themselves, rather
  than a generic '__dict__' object.
  
  Also, a basic implementation of being able to turn a ref list back into a more
  understandable form.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2009-10-17 01:41:49 +0000
+++ b/meliae/_loader.pyx	2009-10-17 03:01:58 +0000
@@ -108,7 +108,7 @@
     cdef readonly long address
     cdef readonly object type_str # pointer to a PyString, this is expected to be shared
                                   # with many other instances, but longer than 4 bytes
-    cdef readonly long size
+    cdef public long size
     cdef long *_ref_list # An array of addresses that this object
                          # referenced. May be NULL if len() == 0
                          # If not null, the first item is the length of the

=== modified file 'meliae/loader.py'
--- a/meliae/loader.py	2009-10-17 01:41:49 +0000
+++ b/meliae/loader.py	2009-10-17 03:01:58 +0000
@@ -214,6 +214,9 @@
                 referrers.setdefault(ref, []).append(address)
         for obj in self.objs.itervalues():
             obj.referrers = referrers.get(obj.address, ())
+        if self.show_progress:
+            sys.stderr.write('compute referrers %8d / %8d        \n'
+                             % (idx, total))
 
     def remove_expensive_references(self):
         """Filter out references that are mere houskeeping links.
@@ -230,7 +233,7 @@
         source = lambda:self.objs.itervalues()
         total_objs = len(self.objs)
         for changed, obj in remove_expensive_references(source, total_objs,
-            self.show_progress):
+                                                        self.show_progress):
             if changed:
                 self.objs[obj.address] = obj
 
@@ -283,6 +286,9 @@
                 sys.stderr.write('compute size %8d / %8d        \r'
                                  % (idx, total))
             self._compute_total_size(obj)
+        if self.show_progress:
+            sys.stderr.write('compute size %8d / %8d        \n'
+                             % (idx, total))
 
     def summarize(self):
         summary = _ObjSummary()
@@ -292,7 +298,98 @@
 
     def get_all(self, type_str):
         """Return all objects that match a given type."""
-        return [o for o in self.objs.itervalues() if o.type_str == type_str]
+        all = [o for o in self.objs.itervalues() if o.type_str == type_str]
+        all.sort(key=lambda x:(x.size, x.num_refs, x.num_referrers),
+                 reverse=True)
+        return all
+
+    def collapse_instance_dicts(self):
+        """Hide the __dict__ member of instances.
+
+        When a class does not have __slots__ defined, all instances get a
+        separate '__dict__' attribute that actually holds their contents. This
+        adds a level of indirection that can make it harder than it needs to
+        be, to actually find what instance holds what objects.
+
+        So we collapse those references back into the object, and grow its
+        'size' at the same time.
+        """
+        # The instances I'm focusing on have a custom type name, and every
+        # instance has 2 pointers. The first is to __dict__, and the second is
+        # to the 'type' object whose name matches the type of the instance.
+        # Also __dict__ has only 1 referrer, and that is *this* object
+        collapsed = 0
+        total = len(self.objs)
+        for address, obj in self.objs.items():
+            if obj.type_str == 'module' and obj.num_refs == 1:
+                (dict_ref,) = obj.ref_list
+                extra_refs = []
+            else:
+                if obj.type_str in ('str', 'dict', 'tuple', 'list', 'type',
+                                    'function', 'wrapper_descriptor',
+                                    'code', 'classobj'):
+                    continue
+                if obj.num_refs != 2:
+                    continue
+                (dict_ref, type_ref) = obj.ref_list
+                type_obj = self.objs[type_ref]
+                if type_obj.type_str != 'type' or type_obj.name != obj.type_str:
+                    continue
+                extra_refs = [type_ref]
+            dict_obj = self.objs[dict_ref]
+            if dict_obj.type_str != 'dict':
+                continue
+            if (dict_obj.num_referrers != 1
+                or dict_obj.referrers[0] != address):
+                continue
+            collapsed += 1
+            if self.show_progress and collapsed & 0xff == 0:
+                sys.stderr.write('collapsed %8d / %8d        \r'
+                                 % (collapsed, total))
+            # We found an instance \o/
+            for ref in dict_obj.ref_list:
+                referenced_obj = self.objs[ref]
+                referrers = referenced_obj.referrers
+                for idx in xrange(len(referrers)):
+                    if referrers[idx] == dict_ref:
+                        referrers[idx] = address
+                referenced_obj.referrers = referrers
+            obj.ref_list = dict_obj.ref_list + extra_refs
+            obj.size = obj.size + dict_obj.size
+            obj.total_size = 0
+            # Now that all the data has been moved into the instance, remove
+            # the dict from the collection
+            del self.objs[dict_ref]
+        if self.show_progress:
+            sys.stderr.write('collapsed %8d / %8d => %8d   \n'
+                             % (collapsed, total, len(self.objs)))
+
+    def refs_as_dict(self, obj):
+        """Expand the ref list considering it to be a 'dict' structure.
+        
+        Often we have dicts that point to simple strings and ints, etc. This
+        tries to expand that as much as possible.
+
+        :param obj: Should be a MemObject representing an instance (that has
+            been collapsed) or a dict.
+        """
+        as_dict = {}
+        ref_list = obj.ref_list
+        if obj.type_str not in ('dict', 'module'):
+            # Instance dicts end with a 'type' reference
+            ref_list = ref_list[:-1]
+        for idx in xrange(0, len(ref_list), 2):
+            key = self.objs[ref_list[idx]]
+            val = self.objs[ref_list[idx+1]]
+            if key.value is not None:
+                key = key.value
+            # TODO: We should consider recursing if val is a 'known' type, such
+            #       a tuple/dict/etc
+            if val.value is not None:
+                val = val.value
+            as_dict[key] = val
+        return as_dict
+
 
 
 def load(source, using_json=None, show_prog=True):

=== modified file 'meliae/tests/test_loader.py'
--- a/meliae/tests/test_loader.py	2009-10-08 20:52:20 +0000
+++ b/meliae/tests/test_loader.py	2009-10-17 03:01:58 +0000
@@ -47,6 +47,31 @@
  ', "refs": []}',
 ]
 
+# Note that this doesn't have a complete copy of the references. Namely when
+# you subclass object you get a lot of references, and type instances also
+# reference other stuff that tends to chain to stuff like 'sys', which ends up
+# referencing everything.
+_instance_dump = [
+'{"address": 1, "type": "MyClass", "size": 32, "refs": [2, 3]}',
+'{"address": 3, "type": "type", "size": 452, "name": "MyClass", "refs": []}',
+'{"address": 2, "type": "dict", "size": 140, "len": 4'
+ ', "refs": [4, 5, 6, 7, 9, 10, 11, 12]}',
+'{"address": 4, "type": "str", "size": 25, "len": 1, "value": "a", "refs": []}',
+'{"address": 5, "type": "int", "size": 12, "value": 1, "refs": []}',
+'{"address": 6, "type": "str", "size": 25, "len": 1, "value": "c", "refs": []}',
+'{"address": 7, "type": "dict", "size": 140, "len": 1, "refs": [8, 6]}',
+'{"address": 8, "type": "str", "size": 25, "len": 1, "value": "s", "refs": []}',
+'{"address": 9, "type": "str", "size": 25, "len": 1, "value": "b", "refs": []}',
+'{"address": 10, "type": "str", "size": 30, "len": 6'
+ ', "value": "string", "refs": []}',
+'{"address": 11, "type": "str", "size": 25, "len": 1, "value": "d", "refs": []}',
+'{"address": 12, "type": "tuple", "size": 32, "len": 1, "refs": [13]}',
+'{"address": 13, "type": "int", "size": 12, "value": 2, "refs": []}',
+'{"address": 14, "type": "module", "size": 28, "name": "sys", "refs": [15]}',
+'{"address": 15, "type": "dict", "size": 140, "len": 2, "refs": [5, 6, 9, 6]}',
+]
+
+
 class TestLoad(tests.TestCase):
 
     def test_load_smoketest(self):
@@ -197,3 +222,43 @@
         self.assertEqual(0, null_obj.address)
         self.assertEqual('<ex-reference>', null_obj.type_str)
         self.assertEqual([11, 0], mymod_dict.ref_list)
+
+    def test_collapse_instance_dicts(self):
+        lines = list(_instance_dump)
+        manager = loader.load(lines, show_prog=False)
+        # This should collapse all of the references from the instance's dict
+        # @2 into the instance @1
+        instance = manager.objs[1]
+        self.assertEqual(32, instance.size)
+        self.assertEqual([2, 3], instance.ref_list)
+        inst_dict = manager.objs[2]
+        self.assertEqual(140, inst_dict.size)
+        self.assertEqual([4, 5, 6, 7, 9, 10, 11, 12], inst_dict.ref_list)
+        mod = manager.objs[14]
+        self.assertEqual([15], mod.ref_list)
+        mod_dict = manager.objs[15]
+        self.assertEqual([5, 6, 9, 6], mod_dict.ref_list)
+        manager.compute_referrers()
+        tpl = manager.objs[12]
+        self.assertEqual([2], tpl.referrers)
+        self.assertEqual([1], inst_dict.referrers)
+        self.assertEqual([14], mod_dict.referrers)
+        manager.collapse_instance_dicts()
+        # The instance dict has been removed
+        self.assertEqual([4, 5, 6, 7, 9, 10, 11, 12, 3], instance.ref_list)
+        self.assertEqual(172, instance.size)
+        self.assertFalse(2 in manager.objs)
+        self.assertEqual([1], tpl.referrers)
+        self.assertEqual([5, 6, 9, 6], mod.ref_list)
+        self.assertFalse(15 in manager.objs)
+
+    def test_expand_refs_as_dict(self):
+        lines = list(_instance_dump)
+        manager = loader.load(lines, show_prog=False)
+        as_dict = manager.refs_as_dict(manager[15])
+        self.assertEqual({1: 'c', 'b': 'c'}, as_dict)
+        manager.compute_referrers()
+        manager.collapse_instance_dicts()
+        self.assertEqual({1: 'c', 'b': 'c'}, manager.refs_as_dict(manager[14]))
+        self.assertEqual({'a': 1, 'c': manager[7], 'b': 'string',
+                          'd': manager[12]}, manager.refs_as_dict(manager[1]))



More information about the bazaar-commits mailing list