Rev 154: Speed up collection.items() tremendously by disabling gc. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

John Arbash Meinel john at arbash-meinel.com
Tue Dec 29 16:00:08 GMT 2009


At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

------------------------------------------------------------
revno: 154
revision-id: john at arbash-meinel.com-20091229155946-18hvabfyjvs39m59
parent: john at arbash-meinel.com-20091229154532-4drym9jwwkpy598i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Tue 2009-12-29 09:59:46 -0600
message:
  Speed up collection.items() tremendously by disabling gc.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2009-12-29 15:40:23 +0000
+++ b/meliae/_loader.pyx	2009-12-29 15:59:46 +0000
@@ -24,6 +24,8 @@
 
     long PyObject_Hash(PyObject *) except -1
 
+    object PyList_New(Py_ssize_t)
+    void PyList_SET_ITEM(object, Py_ssize_t, object)
     PyObject *PyDict_GetItem(object d, object key)
     PyObject *PyDict_GetItem_ptr "PyDict_GetItem" (object d, PyObject *key)
     int PyDict_SetItem(object d, object key, object val) except -1
@@ -41,6 +43,8 @@
     # void fprintf(void *, char *, ...)
     # void *stderr
 
+import gc
+
 
 ctypedef struct RefList:
     long size
@@ -716,20 +720,35 @@
 
     def items(self):
         """Iterate over (key, value) tuples."""
-        cdef long i
+        cdef long i, out_idx
         cdef _MemObject *cur
         cdef _MemObjectProxy proxy
 
-        # TODO: Pre-allocate the full size list
-        values = []
-        for i from 0 <= i < self._table_mask:
-            cur = self._table[i]
-            if cur == NULL or cur == _dummy:
-                continue
-            else:
-                address = <object>cur.address
-                proxy = self._proxy_for(address, cur)
-                values.append((address, proxy))
+        enabled = gc.isenabled()
+        if enabled:
+            # We are going to be creating a lot of objects here, but not with
+            # cycles, so we disable gc temporarily
+            # With an object list of ~3M items, this drops the .items() time
+            # from 25s down to 1.3s
+            gc.disable()
+        try:
+            values = PyList_New(self._active)
+            out_idx = 0
+            for i from 0 <= i < self._table_mask:
+                cur = self._table[i]
+                if cur == NULL or cur == _dummy:
+                    continue
+                else:
+                    address = <object>cur.address
+                    proxy = self._proxy_for(address, cur)
+                    item = (address, proxy)
+                    # SET_ITEM steals a reference
+                    Py_INCREF(<PyObject *>item)
+                    PyList_SET_ITEM(values, out_idx, item)
+                    out_idx += 1
+        finally:
+            if enabled:
+                gc.enable()
         return values
 
     def itervalues(self):

=== modified file 'meliae/loader.py'
--- a/meliae/loader.py	2009-12-29 15:45:32 +0000
+++ b/meliae/loader.py	2009-12-29 15:59:46 +0000
@@ -577,7 +577,7 @@
     # _fill_total_size(objs)
     return ObjManager(objs, show_progress=show_prog)
 
-#_load = _load_moc
+_load = _load_moc
 
 
 def remove_expensive_references(source, total_objs=0, show_progress=False):



More information about the bazaar-commits mailing list