Rev 91: Add a dump_all_objects() implementation. in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 7 23:15:25 BST 2009
At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
------------------------------------------------------------
revno: 91
revision-id: john at arbash-meinel.com-20091007221506-jsn9uj5awx487p1b
parent: john at arbash-meinel.com-20091007213953-s7byaxh7adrfeaja
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-10-07 17:15:06 -0500
message:
Add a dump_all_objects() implementation.
This is the easiest way to get all referenced items, as long as you don't
mind using a bit of memory to track what items have been dumped.
The nice part is that the result stays 'small', and you don't need to
run strip_duplicates afterwards.
-------------- next part --------------
=== modified file 'meliae/scanner.py'
--- a/meliae/scanner.py 2009-09-30 04:01:59 +0000
+++ b/meliae/scanner.py 2009-10-07 22:15:06 +0000
@@ -29,12 +29,14 @@
def dump_all_referenced(outf, obj):
"""Recursively dump everything that is referenced from obj."""
- # if isinstance(outf, str):
- # outf = open(outf, 'wb')
+ if isinstance(outf, str):
+ outf = open(outf, 'wb')
pending = [obj]
+ last_offset = 0
seen = _intset.IDSet()
- while pending:
- next = pending.pop()
+ while last_offset >= 0:
+ next = pending[last_offset]
+ last_offset -= 1
id_next = id(next)
if id_next in seen:
continue
@@ -43,13 +45,15 @@
_scanner.dump_object_info(outf, next, recurse_depth=0)
for ref in get_referents(next):
if id(ref) not in seen:
- pending.append(ref)
+ last_offset += 1
+ if len(pending) > last_offset:
+ pending[last_offset] = ref
+ else:
+ pending.append(ref)
def dump_gc_objects(outf, recurse_depth=1):
- """Dump everything that is available via gc.objects().
-
- This does *not* do a recursive search.
+ """Dump everything that is available via gc.get_objects().
"""
if isinstance(outf, basestring):
outf = open(outf, 'wb')
@@ -84,6 +88,23 @@
recurse_depth=recurse_depth)
+def dump_all_objects(outf):
+ """Dump everything that is referenced from gc.get_objects()
+
+ This recurses, and tracks dumped objects in an IDSet. Which means it costs
+ memory, which is often about 10% of currently active memory. Otherwise,
+ this usually results in smaller dump files than dump_gc_objects().
+
+ This also can be faster, because it doesn't dump the same item multiple
+ times.
+ """
+ if isinstance(outf, basestring):
+ outf = open(outf, 'wb')
+ all_objs = gc.get_objects()
+ dump_all_referenced(outf, all_objs)
+
+
+
def get_recursive_size(obj):
"""Get the memory referenced from this object.
More information about the bazaar-commits
mailing list