Rev 156: Remove MemObject entirely. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

John Arbash Meinel john at arbash-meinel.com
Tue Dec 29 21:35:15 GMT 2009


At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

------------------------------------------------------------
revno: 156
revision-id: john at arbash-meinel.com-20091229213451-fqddkg3pmr0e23nn
parent: john at arbash-meinel.com-20091229203715-0lu6cshqg59ac54w
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Tue 2009-12-29 15:34:51 -0600
message:
  Remove MemObject entirely.
  
  I'm not 100% satisfied with the new api, but it does make it clear
  that the old way is no longer valid.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2009-12-29 15:59:46 +0000
+++ b/meliae/_loader.pyx	2009-12-29 21:34:51 +0000
@@ -180,6 +180,40 @@
     PyObject *proxy
 
 
+cdef _MemObject *_new_mem_object(address, type_str, size, ref_list,
+                             value, name, parent_list, total_size) except NULL:
+    cdef _MemObject *new_entry
+    cdef PyObject *addr
+
+    new_entry = <_MemObject *>PyMem_Malloc(sizeof(_MemObject))
+    if new_entry == NULL:
+        raise MemoryError('Failed to allocate %d bytes' % (sizeof(_MemObject),))
+    memset(new_entry, 0, sizeof(_MemObject))
+    addr = <PyObject *>address
+    Py_INCREF(addr)
+    new_entry.address = addr
+    new_entry.type_str = <PyObject *>type_str
+    Py_INCREF(new_entry.type_str)
+    new_entry.size = size
+    new_entry.ref_list = _list_to_ref_list(ref_list)
+    # TODO: Was found wanting and removed
+    # if length is None:
+    #     new_entry.length = -1
+    # else:
+    #     new_entry.length = length
+    if value is not None and name is not None:
+        raise RuntimeError("We currently only support one of value or name"
+                           " per object.")
+    if value is not None:
+        new_entry.value = <PyObject *>value
+    else:
+        new_entry.value = <PyObject *>name
+    Py_INCREF(new_entry.value)
+    new_entry.parent_list = _list_to_ref_list(parent_list)
+    new_entry.total_size = total_size
+    return new_entry
+
+
 cdef int _free_mem_object(_MemObject *cur) except -1:
     if cur == NULL: # Already cleared
         return 0
@@ -209,6 +243,27 @@
 
 
 cdef class MemObjectCollection
+cdef class _MemObjectProxy
+
+
+def _MemObjectProxy_from_args(address, type_str, size, ref_list=(), length=0,
+                              value=None, name=None, parent_list=(),
+                              total_size=0):
+    """Create a standalone _MemObjectProxy instance.
+
+    Note that things like '__getitem__' won't work, as they query the
+    collection for the actual data.
+    """
+    cdef _MemObject *new_entry
+    cdef _MemObjectProxy proxy
+
+    new_entry = _new_mem_object(address, type_str, size, ref_list,
+                                value, name, parent_list, total_size)
+    proxy = _MemObjectProxy(None)
+    proxy._obj = new_entry
+    proxy._managed_obj = new_entry
+    new_entry.proxy = <PyObject *>proxy
+    return proxy
 
 
 cdef class _MemObjectProxy:
@@ -420,6 +475,25 @@
             self.type_str, self.address, self.size,
             refs, parent_str, val, total_size_str)
 
+    def to_json(self):
+        """Convert this back into json."""
+        refs = []
+        for ref in sorted(self.ref_list):
+            refs.append(str(ref))
+        # Note: We've lost the info about whether this was a value or a name
+        #       We've also lost the 'length' field.
+        if self.value is not None:
+            if self.type_str == 'int':
+                value = '"value": %s, ' % self.value
+            else:
+                # TODO: This isn't perfect, as it doesn't do proper json
+                #       escaping
+                value = '"value": "%s", ' % self.value
+        else:
+            value = ''
+        return '{"address": %d, "type": "%s", "size": %d, %s"refs": [%s]}' % (
+            self.address, self.type_str, self.size, value, ', '.join(refs))
+
 
 cdef class MemObjectCollection:
     """Track a bunch of _MemObject instances."""
@@ -635,7 +709,6 @@
         """Add a new MemObject to this collection."""
         cdef _MemObject **slot, *new_entry
         cdef _MemObjectProxy proxy
-        cdef PyObject *addr
 
         slot = self._lookup(address)
         if slot[0] != NULL and slot[0] != _dummy:
@@ -644,41 +717,13 @@
             assert False, "We don't support overwrite yet."
         # TODO: These are fairy small and more subject to churn, maybe we
         #       should be using PyObj_Malloc instead...
-        new_entry = <_MemObject *>PyMem_Malloc(sizeof(_MemObject))
-        if new_entry == NULL:
-            # TODO: as we are running out of memory here, we might want to
-            #       pre-allocate this object. Since it is likely to take as
-            #       much mem to create this object as _MemObject
-            raise MemoryError('Failed to allocate %d bytes'
-                              % (sizeof(_MemObject),))
-        memset(new_entry, 0, sizeof(_MemObject))
-        addr = <PyObject *>address
+        new_entry = _new_mem_object(address, type_str, size, ref_list,
+                                    value, name, parent_list, total_size)
+
         if slot[0] == NULL:
             self._filled += 1
         self._active += 1
         slot[0] = new_entry
-        Py_INCREF(addr)
-        new_entry.address = addr
-        new_entry.type_str = <PyObject *>type_str
-        Py_INCREF(new_entry.type_str)
-        new_entry.size = size
-        new_entry.ref_list = _list_to_ref_list(ref_list)
-        # TODO: Scheduled for removal
-        # if length is None:
-        #     new_entry.length = -1
-        # else:
-        #     new_entry.length = length
-        if value is not None and name is not None:
-            raise RuntimeError("We currently only support one of value or name"
-                " per object.")
-        if value is not None:
-            new_entry.value = <PyObject *>value
-        else:
-            new_entry.value = <PyObject *>name
-        Py_INCREF(new_entry.value)
-        new_entry.parent_list = _list_to_ref_list(parent_list)
-        new_entry.total_size = total_size
-
         if self._filled * 3 > (self._table_mask + 1) * 2:
             # We need to grow
             self._resize(self._active * 2)
@@ -807,203 +852,3 @@
                 ' %d, %d %d' % (<int>cur, self.table_pos,
                                 self.collection._table_mask))
         return self.collection._proxy_for(<object>cur.address, cur)
-
-
-cdef class MemObject:
-    """This defines the information we know about the objects.
-
-    We use a Pyrex class, since in python each object is 40 bytes, but you also
-    have to include the size of all the objects referenced. (a 4-byte integer,
-    becomes a 12-byte PyInt.)
-
-    :ivar address: The address in memory of the original object. This is used
-        as the 'handle' to this object.
-    :ivar type_str: The type of this object
-    :ivar size: The number of bytes consumed for just this object. So for a
-        dict, this would be the basic_size + the size of the allocated array to
-        store the reference pointers
-    :ivar ref_list: A list of items referenced from this object
-    :ivar num_refs: Count of references
-    :ivar value: A PyObject representing the Value for this object. (For
-        strings, it is the first 100 bytes, it may be None if we have no value,
-        or it may be an integer, etc.)
-    :ivar name: Some objects have associated names, like modules, classes, etc.
-    """
-
-    cdef readonly object address  # We track the address by pointing to a PyInt
-                                  # This is valid, because we put these objects
-                                  # into a dict anyway, so we need a PyInt
-                                  # And we can just share it
-    cdef readonly object type_str # pointer to a PyString, this is expected to
-                                  # be shared with many other instances, but
-                                  # longer than 4 bytes
-    cdef public long size # Number of bytes consumed by this instance
-    # TODO: Right now this points to the integer offset, which we then look up
-    #       in the OM dict. However, if we are going to go with PyObject *, why
-    #       not just point to the final object anyway...
-    cdef RefList *_ref_list # An array of addresses that this object
-                            # referenced. May be NULL if len() == 0
-    # TODO: Scheduled for removal
-    cdef readonly int length # Object length (ob_size), aka len(object)
-    cdef public object value    # May be None, a PyString or a PyInt
-    cdef readonly object name     # Name of this object (only valid for
-                                  # modules, etc)
-    cdef RefList *_referrer_list # An array of addresses that refer to this,
-
-    cdef public unsigned long total_size # Size of everything referenced from
-                                         # this object
-
-    def __init__(self, address, type_str, size, ref_list, length=None,
-                 value=None, name=None):
-        self.address = address
-        self.type_str = type_str
-        self.size = size
-        self._ref_list = _list_to_ref_list(ref_list)
-        if length is None:
-            self.length = -1
-        else:
-            self.length = length
-        self.value = value
-        self.name = name
-        self._referrer_list = NULL
-        self.total_size = 0 # uncomputed yet
-
-    property ref_list:
-        """The list of objects referenced by this object."""
-        def __get__(self):
-            return _ref_list_to_list(self._ref_list)
-
-        def __set__(self, value):
-            _free_ref_list(self._ref_list)
-            self._ref_list = _list_to_ref_list(value)
-
-    property num_refs:
-        """The length of the ref_list."""
-        def __get__(self):
-            if self._ref_list == NULL:
-                return 0
-            return self._ref_list.size
-
-    def __len__(self):
-        if self._ref_list == NULL:
-            return 0
-        return self._ref_list.size
-
-    property referrers:
-        """The list of objects that reference this object.
-
-        Original set to None, can be computed on demand.
-        """
-        def __get__(self):
-            return _ref_list_to_list(self._referrer_list)
-
-        def __set__(self, value):
-            _free_ref_list(self._referrer_list)
-            self._referrer_list = _list_to_ref_list(value)
-
-    property num_referrers:
-        """The length of the referrers list."""
-        def __get__(self):
-            if self._referrer_list == NULL:
-                return 0
-            return self._referrer_list.size
-
-    def __dealloc__(self):
-        cdef long i
-        _free_ref_list(self._ref_list)
-        self._ref_list = NULL
-        _free_ref_list(self._referrer_list)
-        self._referrer_list = NULL
-
-    def __repr__(self):
-        cdef int i, max_refs
-        cdef double total_size
-        if self.name is not None:
-            name_str = ', %s' % (self.name,)
-        else:
-            name_str = ''
-        if self._ref_list == NULL:
-            num_refs = 0
-            ref_space = ''
-            ref_str = ''
-        else:
-            num_refs = self._ref_list.size
-            ref_str = _format_list(self._ref_list)
-            ref_space = ' '
-        if self._referrer_list == NULL:
-            referrer_str = ''
-        else:
-            referrer_str = ', %d referrers %s' % (
-                self._referrer_list.size,
-                _format_list(self._referrer_list))
-        if self.value is None:
-            value_str = ''
-        else:
-            r = repr(self.value)
-            if isinstance(self.value, basestring):
-                if len(r) > 21:
-                    r = r[:18] + "..."
-            value_str = ', %s' % (r,)
-        if self.total_size == 0:
-            total_size_str = ''
-        else:
-            total_size = self.total_size
-            order = 'B'
-            if total_size > 800.0:
-                total_size = total_size / 1024
-                order = 'KiB'
-            if total_size > 800.0:
-                total_size = total_size / 1024
-                order = 'MiB'
-            if total_size > 800.0:
-                total_size = total_size / 1024
-                order = 'GiB'
-            total_size_str = ', %.1f%s' % (total_size, order)
-
-
-        return ('%s(%d, %s%s, %d bytes, %d refs%s%s%s%s%s)'
-                % (self.__class__.__name__, self.address, self.type_str,
-                   name_str, self.size, num_refs, ref_space, ref_str,
-                   referrer_str, value_str, total_size_str))
-
-    def __getitem__(self, offset):
-        cdef long off
-        cdef PyObject *res
-
-        if self._ref_list == NULL:
-            raise IndexError('%s has no refs' % (self,))
-        off = offset
-        if off >= self._ref_list.size:
-            raise IndexError('%s has only %d refs'
-                             % (self, self._ref_list.size))
-        res = self._ref_list.refs[off]
-        return <object>res
-
-    def _intern_from_cache(self, cache):
-        self.address = _set_default(cache, self.address)
-        self.type_str = _set_default(cache, self.type_str)
-
-    def to_json(self):
-        """Convert this MemObject to json."""
-        refs = []
-        for ref in sorted(self.ref_list):
-            refs.append(str(ref))
-        if self.length != -1:
-            length = '"len": %d, ' % self.length
-        else:
-            length = ''
-        if self.value is not None:
-            if self.type_str == 'int':
-                value = '"value": %s, ' % self.value
-            else:
-                value = '"value": "%s", ' % self.value
-        else:
-            value = ''
-        if self.name:
-            name = '"name": "%s", ' % self.name
-        else:
-            name = ''
-        return '{"address": %d, "type": "%s", "size": %d, %s%s%s"refs": [%s]}' % (
-            self.address, self.type_str, self.size, name, length, value,
-            ', '.join(refs))
-

=== modified file 'meliae/loader.py'
--- a/meliae/loader.py	2009-12-29 20:37:15 +0000
+++ b/meliae/loader.py	2009-12-29 21:34:51 +0000
@@ -17,6 +17,7 @@
 Currently requires simplejson to parse.
 """
 
+import gc
 import math
 import os
 import re
@@ -209,6 +210,7 @@
         get_refs = referrers.get
         total = len(self.objs)
         tlast = timer()-20
+        gc.disable()
         for idx, obj in enumerate(self.objs.itervalues()):
             if self.show_progress and idx & 0x3f == 0:
                 tnow = timer()
@@ -267,6 +269,7 @@
                     obj.referrers = (refs,)
                 else:
                     obj.referrers = refs
+        gc.enable()
         if self.show_progress:
             sys.stderr.write('set referrers %8d / %8d        \n'
                              % (idx, total))
@@ -382,15 +385,19 @@
         #       and reference a 'classobj' with the actual type name
         collapsed = 0
         total = len(self.objs)
+        tlast = timer()-20
         for item_idx, (address, obj) in enumerate(self.objs.items()):
             if obj.type_str in ('str', 'dict', 'tuple', 'list', 'type',
                                 'function', 'wrapper_descriptor',
                                 'code', 'classobj', 'int',
                                 'weakref'):
                 continue
-            if self.show_progress and item_idx & 0x5ff:
-                sys.stderr.write('checked %8d / %8d collapsed %8d    \r'
-                                 % (item_idx, total, collapsed))
+            if self.show_progress and item_idx & 0x3f:
+                tnow = timer()
+                if tnow - tlast > 0.1:
+                    tlast = tnow
+                    sys.stderr.write('checked %8d / %8d collapsed %8d    \r'
+                                     % (item_idx, total, collapsed))
             if obj.type_str == 'module' and len(obj) == 1:
                 (dict_ref,) = obj.ref_list
                 extra_refs = []
@@ -418,14 +425,14 @@
             # the dict from the collection
             del self.objs[dict_ref]
         if self.show_progress:
-            sys.stderr.write('checked %8d / %8d collapsed %8d    \r'
+            sys.stderr.write('checked %8d / %8d collapsed %8d    \n'
                              % (item_idx, total, collapsed))
         if collapsed:
             self.compute_referrers()
 
     def refs_as_dict(self, obj):
         """Expand the ref list considering it to be a 'dict' structure.
-        
+
         Often we have dicts that point to simple strings and ints, etc. This
         tries to expand that as much as possible.
 
@@ -514,8 +521,8 @@
     :param objs: Either None or a dict containing objects by address. If not
         None, then duplicate objects will not be parsed or output.
     :param factory: Use this to create new instances, if None, use
-        _loader.MemObject
-    :return: A generator of MemObjects.
+        _loader._MemObjectProxy.from_args
+    :return: A generator of memory objects.
     """
     # TODO: cStringIO?
     tstart = timer()
@@ -532,7 +539,7 @@
     else:
         decoder = _from_line
     if factory is None:
-        factory = _loader.MemObject
+        factory = _loader._MemObjectProxy_from_args
     for line_num, line in enumerate(source):
         bytes_read += len(line)
         if line in ("[\n", "]\n"):
@@ -619,7 +626,7 @@
     # Second pass, any object which refers to something in noref_objs will
     # have that reference removed, and replaced with the null_memobj
     num_expensive = len(noref_objs)
-    null_memobj = _loader.MemObject(0, '<ex-reference>', 0, [])
+    null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, [])
     if not seen_zero:
         yield (True, null_memobj)
     if show_progress and total_objs == 0:

=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py	2009-12-29 15:40:23 +0000
+++ b/meliae/tests/test__loader.py	2009-12-29 21:34:51 +0000
@@ -20,100 +20,6 @@
     )
 
 
-class TestMemObject(tests.TestCase):
-
-    def test_test_simple_attributes(self):
-        mem = _loader.MemObject(1234, 'int', 12, [])
-        self.assertEqual(1234, mem.address)
-        # Make sure we don't cast into PyLong
-        self.assertTrue(isinstance(mem.address, int))
-        self.assertEqual('int', mem.type_str)
-        self.assertEqual(12, mem.size)
-        self.assertTrue(isinstance(mem.size, int))
-        self.assertEqual((), mem.ref_list)
-        self.assertEqual(0, mem.total_size)
-
-    def test_ref_list(self):
-        mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
-        self.assertEqual([4567, 8901], mem.ref_list)
-        mem.ref_list = [999, 4567, 0]
-        self.assertEqual([999, 4567, 0], mem.ref_list)
-        self.assertEqual(3, mem.num_refs)
-
-    def test_num_refs(self):
-        mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
-        self.assertEqual(2, mem.num_refs)
-        mem = _loader.MemObject(1234, 'tuple', 12, [])
-        self.assertEqual(0, mem.num_refs)
-
-    def test__getitem__(self):
-        mem = _loader.MemObject(1234, 'tuple', 12, [])
-        def get(offset):
-            return mem[offset]
-        self.assertRaises(IndexError, get, 0)
-        self.assertRaises(IndexError, get, 1)
-        self.assertRaises(IndexError, get, -1)
-        mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
-        self.assertEqual(4567, mem[0])
-        self.assertEqual(8901, mem[1])
-
-    def test_num_referrers(self):
-        mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
-        mem.referrers = ()
-        self.assertEqual(0, mem.num_referrers)
-        self.assertEqual((), mem.referrers)
-        mem.referrers = [1, 2, 3]
-        self.assertEqual(3, mem.num_referrers)
-        self.assertEqual([1, 2, 3], mem.referrers)
-
-    def test_total_size(self):
-        mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
-        self.assertEqual(0, mem.total_size)
-        mem.total_size = 100
-        self.assertEqual(100, mem.total_size)
-
-    def test__repr__(self):
-        mem = _loader.MemObject(1234, 'str', 24, [])
-        self.assertEqual('MemObject(1234, str, 24 bytes'
-                         ', 0 refs)', repr(mem))
-        mem = _loader.MemObject(1234, 'tuple', 12, [4567, 8900])
-        self.assertEqual('MemObject(1234, tuple, 12 bytes'
-                         ', 2 refs [4567, 8900])', repr(mem))
-        mem = _loader.MemObject(1234, 'module', 12, [4567, 8900],
-                                name='named')
-        self.assertEqual('MemObject(1234, module, named, 12 bytes'
-                         ', 2 refs [4567, 8900])', repr(mem))
-        mem = _loader.MemObject(1234, 'module', 12, range(20))
-        self.assertEqual('MemObject(1234, module, 12 bytes'
-                         ', 20 refs [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ...])',
-                         repr(mem))
-        mem = _loader.MemObject(1234, 'foo', 12, [10])
-        mem.referrers = [20, 30]
-        self.assertEqual('MemObject(1234, foo, 12 bytes'
-                         ', 1 refs [10], 2 referrers [20, 30])',
-                         repr(mem))
-        mem = _loader.MemObject(1234, 'str', 24, [])
-        mem.value = 'teststr'
-        self.assertEqual('MemObject(1234, str, 24 bytes'
-                         ', 0 refs, \'teststr\')', repr(mem))
-        mem.value = 'averylongstringwithmorestuff'
-        self.assertEqual('MemObject(1234, str, 24 bytes'
-                         ', 0 refs, \'averylongstringwi...)', repr(mem))
-        mem = _loader.MemObject(1234, 'int', 12, [])
-        mem.value = 12345
-        self.assertEqual('MemObject(1234, int, 12 bytes'
-                         ', 0 refs, 12345)', repr(mem))
-        mem.total_size = 12
-        self.assertEqual('MemObject(1234, int, 12 bytes'
-                         ', 0 refs, 12345, 12.0B)', repr(mem))
-        mem.total_size = 1024
-        self.assertEqual('MemObject(1234, int, 12 bytes'
-                         ', 0 refs, 12345, 1.0KiB)', repr(mem))
-        mem.total_size = int(1024*1024*10.5)
-        self.assertEqual('MemObject(1234, int, 12 bytes'
-                         ', 0 refs, 12345, 10.5MiB)', repr(mem))
-
-
 class TestMemObjectCollection(tests.TestCase):
     
     def test__init__(self):

=== modified file 'meliae/tests/test_loader.py'
--- a/meliae/tests/test_loader.py	2009-12-29 20:37:15 +0000
+++ b/meliae/tests/test_loader.py	2009-12-29 21:34:51 +0000
@@ -166,9 +166,19 @@
 class TestMemObj(tests.TestCase):
 
     def test_to_json(self):
-        objs = list(loader.iter_objs(_example_dump))
+        manager = loader.load(_example_dump, show_prog=False)
+        objs = manager.objs.values()
         objs.sort(key=lambda x:x.address)
-        expected = sorted(_example_dump)
+        expected = [
+'{"address": 1, "type": "tuple", "size": 20, "refs": [2, 3]}',
+'{"address": 2, "type": "dict", "size": 124, "refs": [4, 5, 6, 7]}',
+'{"address": 3, "type": "list", "size": 44, "refs": [3, 4, 5]}',
+'{"address": 4, "type": "int", "size": 12, "value": 2, "refs": []}',
+'{"address": 5, "type": "int", "size": 12, "value": 1, "refs": []}',
+'{"address": 6, "type": "str", "size": 29, "value": "a str", "refs": []}',
+'{"address": 7, "type": "tuple", "size": 20, "refs": [4, 5]}',
+'{"address": 8, "type": "module", "size": 60, "value": "mymod", "refs": [2]}',
+        ]
         self.assertEqual(expected, [obj.to_json() for obj in objs])
 
 



More information about the bazaar-commits mailing list