Rev 123: Start working on a dict-like container for memory objects. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

John Arbash Meinel john at arbash-meinel.com
Thu Dec 24 03:34:47 GMT 2009


At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

------------------------------------------------------------
revno: 123
revision-id: john at arbash-meinel.com-20091224033431-vbjptb3wsptqp49v
parent: john at arbash-meinel.com-20091223230144-benkylfi7yaof907
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Wed 2009-12-23 21:34:31 -0600
message:
  Start working on a dict-like container for memory objects.
  
  The main goal is that MemoryObjectCollection can be much more efficient if
  we combine how the objects are stored with how they are accessed.
  dicts waste a fair amount of memory storing a key,value,hash tuple,
  when we have the 'address' as our key and cheap hash, and
  the value can then be stored inline. Which also means we don't have
  the per-object memory overhead (no gc, no refcount, type pointer, etc.)
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2009-12-23 23:01:44 +0000
+++ b/meliae/_loader.pyx	2009-12-24 03:34:31 +0000
@@ -21,12 +21,18 @@
     void *PyMem_Malloc(size_t)
     void PyMem_Free(void *)
 
+    long PyObject_Hash(PyObject *) except -1
+
     PyObject *PyDict_GetItem(object d, object key)
     int PyDict_SetItem(object d, object key, object val) except -1
     void Py_INCREF(PyObject*)
+    void Py_XDECREF(PyObject*)
     void Py_DECREF(PyObject*)
     object PyTuple_New(Py_ssize_t)
     object PyTuple_SET_ITEM(object, Py_ssize_t, object)
+    int PyObject_RichCompareBool(PyObject *, PyObject *, int) except -1
+    int Py_EQ
+    void memset(void *, int, size_t)
 
 
 ctypedef struct RefList:
@@ -118,6 +124,107 @@
     return ''.join(ref_str)
 
 
+cdef struct _MemObject:
+    # """The raw C structure, used to minimize memory allocation size."""
+    PyObject *address
+    PyObject *type_str
+    long size
+    RefList *ref_list
+    int length
+    PyObject *value
+    PyObject *name
+    RefList *referrer_list
+    unsigned long total_size
+
+
+cdef _MemObject *_dummy
+_dummy = <_MemObject*>(-1)
+
+
+cdef class MemObjectCollection:
+    """Track a bunch of _MemObject instances."""
+
+    cdef readonly int _table_mask  # N slots = table_mask + 1
+    cdef readonly int _active      # How many slots have real data
+    cdef readonly int _filled      # How many slots have real or dummy
+    cdef _MemObject* _table # _MemObjects are stored inline
+
+    def __init__(self):
+        self._table_mask = 1024 - 1
+        self._table = <_MemObject*>PyMem_Malloc(sizeof(_MemObject)*1024)
+        memset(self._table, 0, sizeof(_MemObject)*1024)
+
+    cdef _MemObject* _lookup(self, PyObject *address) except NULL:
+        cdef long the_hash
+        cdef size_t i, n_lookup
+        cdef long mask
+        cdef _MemObject *table, *slot, *free_slot
+
+        the_hash = PyObject_Hash(address)
+        i = <size_t>the_hash
+        mask = self._table_mask
+        table = self._table
+        free_slot = NULL
+        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
+            slot = &table[i & mask]
+            if slot.address == NULL:
+                # Found a blank spot
+                if free_slot != NULL:
+                    # Did we find an earlier _dummy entry?
+                    return free_slot
+                else:
+                    return slot
+            if slot.address == address:
+                # Found an exact pointer to the key
+                return slot
+            if slot == _dummy:
+                if free_slot == NULL:
+                    free_slot = slot
+            elif PyObject_RichCompareBool(slot.address, address, Py_EQ):
+                # Both py_key and cur belong in this slot, return it
+                return slot
+            i = i + 1 + n_lookup
+        raise AssertionError('should never get here')
+
+    def _test_lookup(self, address):
+        cdef _MemObject *slot 
+
+        slot = self._lookup(<PyObject *>address)
+        return (slot - self._table)
+
+    # def __contains__(self, address):
+    #     pass
+
+    # def __getitem__(self, address):
+    #     pass
+
+    # def __delitem__(self, address):
+    #     pass
+
+    # def __setitem__(self, address, value):
+    #     pass
+
+    def __dealloc__(self):
+        cdef long i
+        cdef _MemObject *cur
+
+        for i from 0 <= i < self._table_mask:
+            cur = self._table + i
+            if cur.address != NULL:
+                Py_XDECREF(cur.type_str)
+                cur.type_str = NULL
+                _free_ref_list(cur.ref_list)
+                cur.ref_list = NULL
+                Py_XDECREF(cur.value)
+                cur.value = NULL
+                Py_XDECREF(cur.name)
+                cur.name = NULL
+                _free_ref_list(cur.referrer_list)
+                cur.referrer_list = NULL
+        PyMem_Free(self._table)
+        self._table = NULL
+
+
 cdef class MemObject:
     """This defines the information we know about the objects.
 

=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py	2009-12-23 22:55:29 +0000
+++ b/meliae/tests/test__loader.py	2009-12-24 03:34:31 +0000
@@ -112,3 +112,23 @@
         mem.total_size = int(1024*1024*10.5)
         self.assertEqual('MemObject(1234, int, 12 bytes'
                          ', 0 refs, 12345, 10.5MiB)', repr(mem))
+
+
+class TestMemObjectCollection(tests.TestCase):
+    
+    def test__init__(self):
+        moc = _loader.MemObjectCollection()
+        self.assertEqual(0, moc._active)
+        self.assertEqual(0, moc._filled)
+        self.assertEqual(1023, moc._table_mask)
+
+    def test__lookup_direct(self):
+        moc = _loader.MemObjectCollection()
+        self.assertEqual(1023, moc._table_mask)
+        self.assertEqual(0, moc._test_lookup(0))
+        self.assertEqual(0, moc._test_lookup(1024))
+        self.assertEqual(255, moc._test_lookup(255))
+        self.assertEqual(933, moc._test_lookup(933))
+        self.assertEqual(933, moc._test_lookup(933+1024))
+        self.assertEqual(933, moc._test_lookup(933L+1024L))
+        self.assertEqual(933, moc._test_lookup(933L+2**32-1))



More information about the bazaar-commits mailing list