Rev 165: Add MemObjectProxy.iter_recursive_refs() which allows us to in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

John Arbash Meinel john at arbash-meinel.com
Thu Jul 29 16:24:18 BST 2010


At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

------------------------------------------------------------
revno: 165
revision-id: john at arbash-meinel.com-20100729152358-4p8h7ke9rwvxb5it
parent: john at arbash-meinel.com-20100729144035-tlibn0y4qhlg2qyz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Thu 2010-07-29 10:23:58 -0500
message:
  Add MemObjectProxy.iter_recursive_refs() which allows us to
  quickly get all of the recursively referenced objects from a given object.
  Implement it in Cython, seems to work pretty well.
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore	2009-09-08 17:19:16 +0000
+++ b/.bzrignore	2010-07-29 15:23:58 +0000
@@ -2,3 +2,4 @@
 ./meliae/_intset.c
 ./meliae/_loader.c
 ./meliae/_scanner.c
+./tags

=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2010-07-29 14:37:29 +0000
+++ b/meliae/_loader.pyx	2010-07-29 15:23:58 +0000
@@ -605,6 +605,12 @@
             as_dict[key] = val
         return as_dict
 
+    def iter_recursive_refs(self):
+        cdef _MOPReferencedIterator iterator
+        iterator = _MOPReferencedIterator(self)
+        return iterator
+
+
 
 cdef class MemObjectCollection:
     """Track a bunch of _MemObject instances."""
@@ -975,3 +981,56 @@
                 ' %d, %d %d' % (<int>cur, self.table_pos,
                                 self.collection._table_mask))
         return self.collection._proxy_for(<object>cur.address, cur)
+
+
+cdef class _MOPReferencedIterator:
+    """Iterate over all the children referenced from this object."""
+
+    cdef MemObjectCollection collection
+    cdef object seen_addresses
+    cdef list pending_addresses
+    cdef int pending_offset
+
+    def __init__(self, proxy):
+        cdef _MemObjectProxy c_proxy
+
+        from meliae import _intset
+        c_proxy = proxy
+        self.collection = c_proxy.collection
+        self.seen_addresses = _intset.IDSet()
+        self.seen_addresses.add(c_proxy.address)
+        self.pending_addresses = list(c_proxy.children)
+        self.pending_offset = len(self.pending_addresses) - 1
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        while self.pending_offset >= 0:
+            next_address = self.pending_addresses[self.pending_offset]
+            self.pending_offset -= 1
+            # Avoid letting the pending addresses queue remain at its largest
+            # size forever. If it has more that 50% waste, and it is 'big',
+            # shrink it. Leave it a little room to grow, though.
+            if (self.pending_offset > 50
+                and len(self.pending_addresses) > 2*self.pending_offset):
+                self.pending_addresses = self.pending_addresses[
+                    :self.pending_offset+10]
+            if next_address in self.seen_addresses:
+                continue
+            self.seen_addresses.add(next_address)
+            next_proxy = self.collection.get(next_address)
+            if next_proxy is None:
+                continue
+            # Queue up the children of this object
+            for c in next_proxy.children:
+                if c in self.seen_addresses:
+                    continue
+                self.pending_offset += 1
+                if self.pending_offset >= len(self.pending_addresses):
+                    self.pending_addresses.append(c)
+                else:
+                    self.pending_addresses[self.pending_offset] = c
+            return next_proxy
+        # if we got this far, then we don't have anything left:
+        raise StopIteration()

=== modified file 'meliae/loader.py'
--- a/meliae/loader.py	2010-07-29 14:37:29 +0000
+++ b/meliae/loader.py	2010-07-29 15:23:58 +0000
@@ -316,30 +316,8 @@
 
     def compute_total_size(self, obj):
         """Sum the size of all referenced objects (recursively)."""
-        pending_descendents = list(obj.children)
-        seen = _intset.IDSet()
-        seen.add(obj.address)
-        total_size = obj.size
-        while pending_descendents:
-            next_ref = pending_descendents.pop()
-            if next_ref in seen:
-                continue
-            seen.add(next_ref)
-            next_obj = self.objs.get(next_ref, None)
-            if next_obj is None:
-                continue
-            # type and frame types tend to cause us to recurse into
-            # everything. So for now, when we encounter them, don't add
-            # their references
-            total_size += next_obj.size
-            pending_descendents.extend([ref for ref in next_obj.children
-                                             if ref not in seen])
-        ## count = len(seen)
-        ## # This single object references more than 10% of all objects, and
-        ## # expands to more that 10x its direct references
-        ## if count > obj.num_refs * 10 and count > break_on:
-        ##     import pdb; pdb.set_trace()
-        obj.total_size = total_size
+        obj.total_size = (obj.size
+            + sum(c.size for c in obj.iter_recursive_refs()))
         return obj
 
     def summarize(self):

=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py	2010-07-28 21:13:06 +0000
+++ b/meliae/tests/test__loader.py	2010-07-29 15:23:58 +0000
@@ -512,3 +512,38 @@
         # 7: unsigned long total_size
         # 8: PyObject *proxy
         self.assertSizeOf(5+8, mop, has_gc=True)
+
+
+class Test_MemObjectProxyIterRecursiveRefs(tests.TestCase):
+
+    def setUp(self):
+        super(Test_MemObjectProxyIterRecursiveRefs, self).setUp()
+        self.moc = _loader.MemObjectCollection()
+        self.moc.add(1024, 'bar', 200)
+        self.moc.add(0, 'foo', 100)
+        self.moc.add(255, 'baz', 300)
+
+    def assertIterRecursiveRefs(self, addresses, obj):
+        self.assertEqual(addresses,
+                         [o.address for o in obj.iter_recursive_refs()])
+
+    def test_no_refs(self):
+        self.assertIterRecursiveRefs([], self.moc[1024])
+
+    def test_single_depth_refs(self):
+        obj = self.moc.add(1, 'test', 1234, children=[1024])
+        self.assertIterRecursiveRefs([1024], obj)
+
+    def test_deep_refs(self):
+        obj = self.moc.add(1, '1', 1234, children=[2])
+        self.moc.add(2, '2', 1234, children=[3])
+        self.moc.add(3, '3', 1234, children=[4])
+        self.moc.add(4, '4', 1234, children=[5])
+        self.moc.add(5, '5', 1234, children=[6])
+        self.moc.add(6, '6', 1234, children=[])
+        self.assertIterRecursiveRefs([2, 3, 4, 5, 6], obj)
+
+    def test_self_referenced(self):
+        self.moc.add(1, 'test', 1234, children=[1024, 2])
+        obj = self.moc.add(2, 'test2', 1234, children=[1])
+        self.assertIterRecursiveRefs([1, 1024], obj)



More information about the bazaar-commits mailing list