Rev 91: Add a dump_all_objects() implementation. in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

John Arbash Meinel john at arbash-meinel.com
Wed Oct 7 23:15:25 BST 2009


At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

------------------------------------------------------------
revno: 91
revision-id: john at arbash-meinel.com-20091007221506-jsn9uj5awx487p1b
parent: john at arbash-meinel.com-20091007213953-s7byaxh7adrfeaja
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-10-07 17:15:06 -0500
message:
  Add a dump_all_objects() implementation.
  
  This is the easiest way to get all referenced items, as long as you don't
  mind using a bit of memory to track what items have been dumped.
  The nice part is that the result stays 'small', and you don't need to
  run strip_duplicates afterwards.
-------------- next part --------------
=== modified file 'meliae/scanner.py'
--- a/meliae/scanner.py	2009-09-30 04:01:59 +0000
+++ b/meliae/scanner.py	2009-10-07 22:15:06 +0000
@@ -29,12 +29,14 @@
 
 def dump_all_referenced(outf, obj):
     """Recursively dump everything that is referenced from obj."""
-    # if isinstance(outf, str):
-    #     outf = open(outf, 'wb')
+    if isinstance(outf, str):
+        outf = open(outf, 'wb')
     pending = [obj]
+    last_offset = 0
     seen = _intset.IDSet()
-    while pending:
-        next = pending.pop()
+    while last_offset >= 0:
+        next = pending[last_offset]
+        last_offset -= 1
         id_next = id(next)
         if id_next in seen:
             continue
@@ -43,13 +45,15 @@
         _scanner.dump_object_info(outf, next, recurse_depth=0)
         for ref in get_referents(next):
             if id(ref) not in seen:
-                pending.append(ref)
+                last_offset += 1
+                if len(pending) > last_offset:
+                    pending[last_offset] = ref
+                else:
+                    pending.append(ref)
 
 
 def dump_gc_objects(outf, recurse_depth=1):
-    """Dump everything that is available via gc.objects().
-
-    This does *not* do a recursive search.
+    """Dump everything that is available via gc.get_objects().
     """
     if isinstance(outf, basestring):
         outf = open(outf, 'wb')
@@ -84,6 +88,23 @@
                                   recurse_depth=recurse_depth)
 
 
+def dump_all_objects(outf):
+    """Dump everything that is referenced from gc.get_objects()
+
+    This recurses, and tracks dumped objects in an IDSet. Which means it costs
+    memory, which is often about 10% of currently active memory. Otherwise,
+    this usually results in smaller dump files than dump_gc_objects().
+
+    This also can be faster, because it doesn't dump the same item multiple
+    times.
+    """
+    if isinstance(outf, basestring):
+        outf = open(outf, 'wb')
+    all_objs = gc.get_objects()
+    dump_all_referenced(outf, all_objs)
+
+
+
 def get_recursive_size(obj):
     """Get the memory referenced from this object.
 



More information about the bazaar-commits mailing list