Rev 165: Add MemObjectProxy.iter_recursive_refs() which allows us to in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
John Arbash Meinel
john at arbash-meinel.com
Thu Jul 29 16:24:18 BST 2010
At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
------------------------------------------------------------
revno: 165
revision-id: john at arbash-meinel.com-20100729152358-4p8h7ke9rwvxb5it
parent: john at arbash-meinel.com-20100729144035-tlibn0y4qhlg2qyz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Thu 2010-07-29 10:23:58 -0500
message:
Add MemObjectProxy.iter_recursive_refs() which allows us to
quickly get all of the recursively referenced objects from a given object.
Implement it in Cython, seems to work pretty well.
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore 2009-09-08 17:19:16 +0000
+++ b/.bzrignore 2010-07-29 15:23:58 +0000
@@ -2,3 +2,4 @@
./meliae/_intset.c
./meliae/_loader.c
./meliae/_scanner.c
+./tags
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx 2010-07-29 14:37:29 +0000
+++ b/meliae/_loader.pyx 2010-07-29 15:23:58 +0000
@@ -605,6 +605,12 @@
as_dict[key] = val
return as_dict
+ def iter_recursive_refs(self):
+ cdef _MOPReferencedIterator iterator
+ iterator = _MOPReferencedIterator(self)
+ return iterator
+
+
cdef class MemObjectCollection:
"""Track a bunch of _MemObject instances."""
@@ -975,3 +981,56 @@
' %d, %d %d' % (<int>cur, self.table_pos,
self.collection._table_mask))
return self.collection._proxy_for(<object>cur.address, cur)
+
+
+cdef class _MOPReferencedIterator:
+ """Iterate over all the children referenced from this object."""
+
+ cdef MemObjectCollection collection
+ cdef object seen_addresses
+ cdef list pending_addresses
+ cdef int pending_offset
+
+ def __init__(self, proxy):
+ cdef _MemObjectProxy c_proxy
+
+ from meliae import _intset
+ c_proxy = proxy
+ self.collection = c_proxy.collection
+ self.seen_addresses = _intset.IDSet()
+ self.seen_addresses.add(c_proxy.address)
+ self.pending_addresses = list(c_proxy.children)
+ self.pending_offset = len(self.pending_addresses) - 1
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ while self.pending_offset >= 0:
+ next_address = self.pending_addresses[self.pending_offset]
+ self.pending_offset -= 1
+ # Avoid letting the pending addresses queue remain at its largest
+ # size forever. If it has more that 50% waste, and it is 'big',
+ # shrink it. Leave it a little room to grow, though.
+ if (self.pending_offset > 50
+ and len(self.pending_addresses) > 2*self.pending_offset):
+ self.pending_addresses = self.pending_addresses[
+ :self.pending_offset+10]
+ if next_address in self.seen_addresses:
+ continue
+ self.seen_addresses.add(next_address)
+ next_proxy = self.collection.get(next_address)
+ if next_proxy is None:
+ continue
+ # Queue up the children of this object
+ for c in next_proxy.children:
+ if c in self.seen_addresses:
+ continue
+ self.pending_offset += 1
+ if self.pending_offset >= len(self.pending_addresses):
+ self.pending_addresses.append(c)
+ else:
+ self.pending_addresses[self.pending_offset] = c
+ return next_proxy
+ # if we got this far, then we don't have anything left:
+ raise StopIteration()
=== modified file 'meliae/loader.py'
--- a/meliae/loader.py 2010-07-29 14:37:29 +0000
+++ b/meliae/loader.py 2010-07-29 15:23:58 +0000
@@ -316,30 +316,8 @@
def compute_total_size(self, obj):
"""Sum the size of all referenced objects (recursively)."""
- pending_descendents = list(obj.children)
- seen = _intset.IDSet()
- seen.add(obj.address)
- total_size = obj.size
- while pending_descendents:
- next_ref = pending_descendents.pop()
- if next_ref in seen:
- continue
- seen.add(next_ref)
- next_obj = self.objs.get(next_ref, None)
- if next_obj is None:
- continue
- # type and frame types tend to cause us to recurse into
- # everything. So for now, when we encounter them, don't add
- # their references
- total_size += next_obj.size
- pending_descendents.extend([ref for ref in next_obj.children
- if ref not in seen])
- ## count = len(seen)
- ## # This single object references more than 10% of all objects, and
- ## # expands to more that 10x its direct references
- ## if count > obj.num_refs * 10 and count > break_on:
- ## import pdb; pdb.set_trace()
- obj.total_size = total_size
+ obj.total_size = (obj.size
+ + sum(c.size for c in obj.iter_recursive_refs()))
return obj
def summarize(self):
=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py 2010-07-28 21:13:06 +0000
+++ b/meliae/tests/test__loader.py 2010-07-29 15:23:58 +0000
@@ -512,3 +512,38 @@
# 7: unsigned long total_size
# 8: PyObject *proxy
self.assertSizeOf(5+8, mop, has_gc=True)
+
+
+class Test_MemObjectProxyIterRecursiveRefs(tests.TestCase):
+
+ def setUp(self):
+ super(Test_MemObjectProxyIterRecursiveRefs, self).setUp()
+ self.moc = _loader.MemObjectCollection()
+ self.moc.add(1024, 'bar', 200)
+ self.moc.add(0, 'foo', 100)
+ self.moc.add(255, 'baz', 300)
+
+ def assertIterRecursiveRefs(self, addresses, obj):
+ self.assertEqual(addresses,
+ [o.address for o in obj.iter_recursive_refs()])
+
+ def test_no_refs(self):
+ self.assertIterRecursiveRefs([], self.moc[1024])
+
+ def test_single_depth_refs(self):
+ obj = self.moc.add(1, 'test', 1234, children=[1024])
+ self.assertIterRecursiveRefs([1024], obj)
+
+ def test_deep_refs(self):
+ obj = self.moc.add(1, '1', 1234, children=[2])
+ self.moc.add(2, '2', 1234, children=[3])
+ self.moc.add(3, '3', 1234, children=[4])
+ self.moc.add(4, '4', 1234, children=[5])
+ self.moc.add(5, '5', 1234, children=[6])
+ self.moc.add(6, '6', 1234, children=[])
+ self.assertIterRecursiveRefs([2, 3, 4, 5, 6], obj)
+
+ def test_self_referenced(self):
+ self.moc.add(1, 'test', 1234, children=[1024, 2])
+ obj = self.moc.add(2, 'test2', 1234, children=[1])
+ self.assertIterRecursiveRefs([1, 1024], obj)
More information about the bazaar-commits
mailing list