Rev 151: Some tweaks for supporting _load_moc in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 29 04:59:31 GMT 2009
At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
------------------------------------------------------------
revno: 151
revision-id: john at arbash-meinel.com-20091229045910-5tjwea1ap86aizsp
parent: john at arbash-meinel.com-20091229041627-mdyt3j1qx0graikx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Mon 2009-12-28 22:59:10 -0600
message:
Some tweaks for supporting _load_moc
One big one, implement itervalues() as an actual iterator.
Pre-building the whole list is considerably expensive, as we
both have to allocate and realloc the list many times, but
we also have to create all of those proxy objects ahead of
time, and deal with all of that gc overhead.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx 2009-12-29 04:13:29 +0000
+++ b/meliae/_loader.pyx 2009-12-29 04:59:10 +0000
@@ -679,6 +679,7 @@
cdef _MemObject *cur
cdef _MemObjectProxy proxy
+ # TODO: Pre-allocate the full size list
values = []
for i from 0 <= i < self._table_mask:
cur = self._table[i]
@@ -698,6 +699,7 @@
cdef _MemObject *cur
cdef _MemObjectProxy proxy
+ # TODO: Pre-allocate the full size list
values = []
for i from 0 <= i < self._table_mask:
cur = self._table[i]
@@ -711,7 +713,7 @@
def itervalues(self):
"""Return an iterable of values stored in this map."""
- return self.values()
+ return _MOCValueIterator(self)
def values(self):
# This returns a list, but that is 'close enough' for what we need
@@ -730,6 +732,38 @@
return values
+cdef class _MOCValueIterator:
+ """A simple iterator over the values in a MOC."""
+
+ cdef MemObjectCollection collection
+ cdef int initial_active
+ cdef int table_pos
+
+ def __init__(self, collection):
+ self.collection = collection
+ self.initial_active = self.collection._active
+ self.table_pos = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ cdef _MemObject *cur
+
+ if self.collection._active != self.initial_active:
+ raise RuntimeError('MemObjectCollection changed size during'
+ ' iteration')
+ cur = NULL
+ while (cur == NULL or cur == _dummy
+ and self.table_pos <= self.collection._table_mask):
+ cur = self.collection._table[self.table_pos]
+ self.table_pos += 1
+ # self.table_pos points to the *next* entry, so make sure it is fully
+ # off the table
+ if self.table_pos > self.collection._table_mask + 1:
+ raise StopIteration()
+ return self.collection._proxy_for(<object>cur.address, cur)
+
cdef class MemObject:
"""This defines the information we know about the objects.
=== modified file 'meliae/loader.py'
--- a/meliae/loader.py 2009-12-29 04:16:27 +0000
+++ b/meliae/loader.py 2009-12-29 04:59:10 +0000
@@ -200,9 +200,8 @@
def compute_referrers(self):
"""For each object, figure out who is referencing it."""
- referrers = dict.fromkeys(self.objs, None)
- id_cache = dict((obj.address, obj.address) for obj in
- self.objs.itervalues())
+ addresses = self.objs.keys()
+ referrers = dict.fromkeys(addresses, None)
total = len(self.objs)
for idx, obj in enumerate(self.objs.itervalues()):
if self.show_progress and idx & 0x1ff == 0:
@@ -211,14 +210,13 @@
address = obj.address
for ref in obj.ref_list:
try:
- ref = id_cache[ref]
+ refs = referrers[ref]
except KeyError:
# Reference to something outside this set of objects.
# Doesn't matter what it is, we won't be updating it.
continue
- refs = referrers[ref]
# This is ugly, so it should be explained.
- # To save memory pressure, referrers will point to one of 3
+ # To save memory pressure, referrers will point to one of 4
# types.
# 1) A simple integer, representing a single referrer
# this saves the allocation of a separate structure
@@ -227,6 +225,7 @@
# requires creating a new tuple to 'add' an entry.
# 3) A list, as before, for things with lots of referrers, we
# use a regular list to let it grow.
+ # 4) None, no references from this object
t = type(refs)
if refs is None:
refs = address
@@ -244,7 +243,6 @@
raise TypeError('unknown refs type: %s\n'
% (t,))
referrers[ref] = refs
- del id_cache
for obj in self.objs.itervalues():
try:
refs = referrers.pop(obj.address)
=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py 2009-12-29 04:13:29 +0000
+++ b/meliae/tests/test__loader.py 2009-12-29 04:59:10 +0000
@@ -225,10 +225,13 @@
moc = _loader.MemObjectCollection()
moc.add(0, 'bar', 100)
moc.add(1024, 'baz', 102)
- moc.add(512, 'bing', 103)
- self.assertEqual([0, 1024, 512], [x.address for x in moc.itervalues()])
+ moc.add(1023, 'booze', 103)
+ moc.add(512, 'bing', 104)
+ self.assertEqual([0, 1024, 512, 1023],
+ [x.address for x in moc.itervalues()])
del moc[0]
- self.assertEqual([1024, 512], [x.address for x in moc.itervalues()])
+ self.assertEqual([1024, 512, 1023],
+ [x.address for x in moc.itervalues()])
def test_items(self):
moc = _loader.MemObjectCollection()
=== modified file 'meliae/tests/test_loader.py'
--- a/meliae/tests/test_loader.py 2009-10-22 22:05:00 +0000
+++ b/meliae/tests/test_loader.py 2009-12-29 04:59:10 +0000
@@ -43,8 +43,9 @@
'{"address": 4, "type": "int", "size": 12, "value": 2, "refs": []}',
'{"address": 2, "type": "dict", "size": 124, "len": 2, "refs": [4, 5, 6, 7]}',
'{"address": 7, "type": "tuple", "size": 20, "len": 2, "refs": [4, 5]}',
-'{"address": 6, "type": "str", "size": 29, "name": "bah", "len": 5, "value": "a str"'
+'{"address": 6, "type": "str", "size": 29, "len": 5, "value": "a str"'
', "refs": []}',
+'{"address": 8, "type": "module", "name": "mymod", "size": 60, "refs": [2]}',
]
# Note that this doesn't have a complete copy of the references. Namely when
@@ -141,6 +142,7 @@
def test_remove_expensive_references(self):
lines = list(_example_dump)
+ lines.pop(-1) # Remove the old module
lines.append('{"address": 8, "type": "module", "size": 12'
', "name": "mymod", "refs": [9]}')
lines.append('{"address": 9, "type": "dict", "size": 124'
@@ -177,12 +179,13 @@
manager.compute_referrers()
objs = manager.objs
self.assertEqual((), objs[1].referrers)
- self.assertEqual([1], objs[2].referrers)
+ self.assertEqual([1, 8], objs[2].referrers)
self.assertEqual([1, 3], objs[3].referrers)
self.assertEqual([2, 3, 7], objs[4].referrers)
self.assertEqual([2, 3, 7], objs[5].referrers)
self.assertEqual([2], objs[6].referrers)
self.assertEqual([2], objs[7].referrers)
+ self.assertEqual((), objs[8].referrers)
def test_compute_total_size(self):
manager = loader.load(_example_dump, show_prog=False)
@@ -195,6 +198,7 @@
self.assertEqual(12, objs[5].total_size)
self.assertEqual(29, objs[6].total_size)
self.assertEqual(44, objs[7].total_size)
+ self.assertEqual(257, objs[8].total_size)
def test_compute_total_size_missing_ref(self):
lines = list(_example_dump)
@@ -207,6 +211,7 @@
def test_remove_expensive_references(self):
lines = list(_example_dump)
+ lines.pop(-1) # Remove the old module
lines.append('{"address": 8, "type": "module", "size": 12'
', "name": "mymod", "refs": [9]}')
lines.append('{"address": 9, "type": "dict", "size": 124'
More information about the bazaar-commits
mailing list