Rev 4323: (jam) Change the _LRUNode implementation to avoid cyclic refs. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Fri May 1 23:03:50 BST 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4323
revision-id: pqm at pqm.ubuntu.com-20090501220339-msx1b0ouap06xsp9
parent: pqm at pqm.ubuntu.com-20090501204351-ll10ketv3bu8plhx
parent: john at arbash-meinel.com-20090501202037-frhn2e9060142ldf
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2009-05-01 23:03:39 +0100
message:
(jam) Change the _LRUNode implementation to avoid cyclic refs.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
------------------------------------------------------------
revno: 4287.1.11
revision-id: john at arbash-meinel.com-20090501202037-frhn2e9060142ldf
parent: john at arbash-meinel.com-20090416220625-hejrugy3r5qwlut9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Fri 2009-05-01 15:20:37 -0500
message:
Small tweaks from Ian.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.10
revision-id: john at arbash-meinel.com-20090416220625-hejrugy3r5qwlut9
parent: john at arbash-meinel.com-20090416205818-arnw8c6tvnjrs3h5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 17:06:25 -0500
message:
Restore the ability to handle None as a key.
We now use _null_key instead of None to indicate the end-of-refs.
This means we now check that _null_key isn't used as an actual key.
This slows us down from 7.1 => 7.3s or so.
Interestingly, the globals lookup of _null_key was faster than
node is self._lru (7.5s+). I was a bit surprised at that.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
------------------------------------------------------------
revno: 4287.1.9
revision-id: john at arbash-meinel.com-20090416205818-arnw8c6tvnjrs3h5
parent: john at arbash-meinel.com-20090416205631-3utrx0gutoxove2f
parent: pqm at pqm.ubuntu.com-20090416152500-83w2bul0hwhhm7gd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 15:58:18 -0500
message:
Merge bzr.dev, resolve lru_cache.py
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/branchbuilder.py branchbuilder.py-20070427022007-zlxpqz2lannhk6y8-1
bzrlib/bzrdir.py bzrdir.py-20060131065624-156dfea39c4387cb
bzrlib/config.py config.py-20051011043216-070c74f4e9e338e8
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/foreign.py foreign.py-20081112170002-olsxmandkk8qyfuq-1
bzrlib/mail_client.py mail_client.py-20070809192806-vuxt3t19srtpjpdn-1
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/smart/branch.py branch.py-20061124031907-mzh3pla28r83r97f-1
bzrlib/smart/bzrdir.py bzrdir.py-20061122024551-ol0l0o0oofsu9b3t-1
bzrlib/smart/request.py request.py-20061108095550-gunadhxmzkdjfeek-1
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/blackbox/test_branch.py test_branch.py-20060524161337-noms9gmcwqqrfi8y-1
bzrlib/tests/blackbox/test_push.py test_push.py-20060329002750-929af230d5d22663
bzrlib/tests/branch_implementations/test_locking.py test_locking.py-20060707151933-tav3o2hpibwi53u4-4
bzrlib/tests/branch_implementations/test_parent.py test_parent.py-20050830052751-5e62766623c32222
bzrlib/tests/bzrdir_implementations/test_bzrdir.py test_bzrdir.py-20060131065642-0ebeca5e30e30866
bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
bzrlib/tests/interrepository_implementations/test_fetch.py test_fetch.py-20080425213627-j60cjh782ufm83ry-1
bzrlib/tests/lock_helpers.py LockHelpers.py-20060707151933-tav3o2hpibwi53u4-1
bzrlib/tests/test_config.py testconfig.py-20051011041908-742d0c15d8d8c8eb
bzrlib/tests/test_foreign.py test_foreign.py-20081125004048-ywb901edgp9lluxo-1
bzrlib/tests/test_mail_client.py test_mail_client.py-20070809192806-vuxt3t19srtpjpdn-2
bzrlib/tests/test_remote.py test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
bzrlib/tests/test_smart.py test_smart.py-20061122024551-ol0l0o0oofsu9b3t-2
bzrlib/tests/test_urlutils.py test_urlutils.py-20060502192900-46b1f9579987cf9c
bzrlib/transport/__init__.py transport.py-20050711165921-4978aa7ce1285ad5
bzrlib/urlutils.py urlutils.py-20060502195429-e8a161ecf8fac004
------------------------------------------------------------
revno: 4287.1.8
revision-id: john at arbash-meinel.com-20090416205631-3utrx0gutoxove2f
parent: john at arbash-meinel.com-20090416204541-gg2qtfggaxwzudvz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 15:56:31 -0500
message:
Because we now store 'key' references, and we use None there to indicate
the end-of-chain, we can no longer safely map the None object to real values.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.7
revision-id: john at arbash-meinel.com-20090416204541-gg2qtfggaxwzudvz
parent: john at arbash-meinel.com-20090416203230-ltw57cs1yp0yp5q9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 15:45:41 -0500
message:
Fairly significant savings... avoid looking at self._last_recently_used.
We can get the same information from node.next_key, which is a value we need anyway.
Somewhat surprisingly, this drops us from 7.6s => 7.1s on 2.8M lookups.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.6
revision-id: john at arbash-meinel.com-20090416203230-ltw57cs1yp0yp5q9
parent: john at arbash-meinel.com-20090416195528-bb69o9sepyh0fmk8
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 15:32:30 -0500
message:
Remove the double getattr() for self._cache.
The common case is that prev and next both exist, so tweak for that case.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.5
revision-id: john at arbash-meinel.com-20090416195528-bb69o9sepyh0fmk8
parent: john at arbash-meinel.com-20090415220144-ubls1xdzq9t04hao
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Thu 2009-04-16 14:55:28 -0500
message:
Switch to using prev as the object and next_key as the pointer.
This shouldn't really change the __getitem__ time, but it should make removing
the lru a tiny bit more straightforward.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.4
revision-id: john at arbash-meinel.com-20090415220144-ubls1xdzq9t04hao
parent: john at arbash-meinel.com-20090415181407-l2fjdkjrun9931k2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Wed 2009-04-15 17:01:44 -0500
message:
use indirection on both next and prev.
This was done because I thought we still had a cycle.
It turns out that we *actually* just had a frame referencing
my cache object, which caused 'del cache' to not actually
destroy it.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.3
revision-id: john at arbash-meinel.com-20090415181407-l2fjdkjrun9931k2
parent: john at arbash-meinel.com-20090415180816-gt70a2f8zdy9zkn9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Wed 2009-04-15 13:14:07 -0500
message:
(temporary) Revert back to double-linked list, just with the fix for actually removing nodes properly.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
------------------------------------------------------------
revno: 4287.1.2
revision-id: john at arbash-meinel.com-20090415180816-gt70a2f8zdy9zkn9
parent: john at arbash-meinel.com-20090414194349-0drrvwi7n9ju3e51
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Wed 2009-04-15 13:08:16 -0500
message:
Properly remove the nodes from the internal linked list in _remove_node.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
------------------------------------------------------------
revno: 4287.1.1
revision-id: john at arbash-meinel.com-20090414194349-0drrvwi7n9ju3e51
parent: pqm at pqm.ubuntu.com-20090411224546-82f5xlgs2r51k164
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.15-lru-gc
timestamp: Tue 2009-04-14 14:43:49 -0500
message:
Remove the refcycle for _LRUNode.
We access the 'next' node via a direct pointer, but we access the
prev node via the dict. Accessing it directly should be faster
than accessing via an indirection, but doing it this way means that
we can have LRUNode's clean themselves up automatically when you
remove the reference to the cache itself, rather than needing
an explicit cleanup.
modified:
bzrlib/lru_cache.py lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py 2009-04-16 02:08:21 +0000
+++ b/bzrlib/lru_cache.py 2009-05-01 20:20:37 +0000
@@ -21,15 +21,16 @@
trace,
)
+_null_key = object()
class _LRUNode(object):
"""This maintains the linked-list which is the lru internals."""
- __slots__ = ('prev', 'next', 'key', 'value', 'cleanup', 'size')
+ __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
def __init__(self, key, value, cleanup=None):
self.prev = None
- self.next = None
+ self.next_key = _null_key
self.key = key
self.value = value
self.cleanup = cleanup
@@ -39,16 +40,12 @@
self.size = None
def __repr__(self):
- if self.next is None:
- next = None
- else:
- next = self.next.key
if self.prev is None:
- prev = None
+ prev_key = None
else:
- prev = self.prev.key
+ prev_key = self.prev.key
return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
- next, prev)
+ self.next_key, prev_key)
def run_cleanup(self):
if self.cleanup is not None:
@@ -73,14 +70,15 @@
# The "HEAD" of the lru linked list
self._most_recently_used = None
# The "TAIL" of the lru linked list
- self._last_recently_used = None
+ self._least_recently_used = None
self._update_max_cache(max_cache, after_cleanup_count)
def __contains__(self, key):
return key in self._cache
def __getitem__(self, key):
- node = self._cache[key]
+ cache = self._cache
+ node = cache[key]
# Inlined from _record_access to decrease the overhead of __getitem__
# We also have more knowledge about structure if __getitem__ is
# succeeding, then we know that self._most_recently_used must not be
@@ -89,17 +87,21 @@
if node is mru:
# Nothing to do, this node is already at the head of the queue
return node.value
- elif node is self._last_recently_used:
- self._last_recently_used = node.prev
# Remove this node from the old location
node_prev = node.prev
- node_next = node.next
- if node_prev is not None:
- node_prev.next = node_next
- if node_next is not None:
+ next_key = node.next_key
+ # benchmarking shows that the lookup of _null_key in globals is faster
+ # than the attribute lookup for (node is self._least_recently_used)
+ if next_key is _null_key:
+ # 'node' is the _least_recently_used, because it doesn't have a
+ # 'next' item. So move the current lru to the previous node.
+ self._least_recently_used = node_prev
+ else:
+ node_next = cache[next_key]
node_next.prev = node_prev
- # Insert this node to the front of the list
- node.next = mru
+ node_prev.next_key = next_key
+ # Insert this node at the front of the list
+ node.next_key = mru.key
mru.prev = node
self._most_recently_used = node
node.prev = None
@@ -117,12 +119,14 @@
' supposed to have a previous entry'
' %s' % (node,))
while node is not None:
- if node.next is None:
- if node is not self._last_recently_used:
+ if node.next_key is _null_key:
+ if node is not self._least_recently_used:
raise AssertionError('only the last node should have'
' no next value: %s' % (node,))
+ node_next = None
else:
- if node.next.prev is not node:
+ node_next = self._cache[node.next_key]
+ if node_next.prev is not node:
raise AssertionError('inconsistency found, node.next.prev'
' != node: %s' % (node,))
if node.prev is None:
@@ -131,11 +135,11 @@
' not have a previous node: %s'
% (node,))
else:
- if node.prev.next is not node:
+ if node.prev.next_key != node.key:
raise AssertionError('inconsistency found, node.prev.next'
' != node: %s' % (node,))
yield node
- node = node.next
+ node = node_next
def add(self, key, value, cleanup=None):
"""Add a new value to the cache.
@@ -148,6 +152,8 @@
:param cleanup: None or a function taking (key, value) to indicate
'value' should be cleaned up.
"""
+ if key is _null_key:
+ raise ValueError('cannot use _null_key as a key')
if key in self._cache:
node = self._cache[key]
node.run_cleanup()
@@ -207,42 +213,44 @@
# Move 'node' to the front of the queue
if self._most_recently_used is None:
self._most_recently_used = node
- self._last_recently_used = node
+ self._least_recently_used = node
return
elif node is self._most_recently_used:
# Nothing to do, this node is already at the head of the queue
return
- elif node is self._last_recently_used:
- self._last_recently_used = node.prev
# We've taken care of the tail pointer, remove the node, and insert it
# at the front
# REMOVE
+ if node is self._least_recently_used:
+ self._least_recently_used = node.prev
if node.prev is not None:
- node.prev.next = node.next
- if node.next is not None:
- node.next.prev = node.prev
+ node.prev.next_key = node.next_key
+ if node.next_key is not _null_key:
+ node_next = self._cache[node.next_key]
+ node_next.prev = node.prev
# INSERT
- node.next = self._most_recently_used
+ node.next_key = self._most_recently_used.key
+ self._most_recently_used.prev = node
self._most_recently_used = node
- node.next.prev = node
node.prev = None
def _remove_node(self, node):
- if node is self._last_recently_used:
- self._last_recently_used = node.prev
+ if node is self._least_recently_used:
+ self._least_recently_used = node.prev
self._cache.pop(node.key)
# If we have removed all entries, remove the head pointer as well
- if self._last_recently_used is None:
+ if self._least_recently_used is None:
self._most_recently_used = None
node.run_cleanup()
# Now remove this node from the linked list
if node.prev is not None:
- node.prev.next = node.next
- if node.next is not None:
- node.next.prev = node.prev
+ node.prev.next_key = node.next_key
+ if node.next_key is not _null_key:
+ node_next = self._cache[node.next_key]
+ node_next.prev = node.prev
# And remove this node's pointers
node.prev = None
- node.next = None
+ node.next_key = _null_key
def _remove_lru(self):
"""Remove one entry from the lru, and handle consequences.
@@ -250,7 +258,7 @@
If there are no more references to the lru, then this entry should be
removed from the cache.
"""
- self._remove_node(self._last_recently_used)
+ self._remove_node(self._least_recently_used)
def clear(self):
"""Clear out all of the cache."""
@@ -316,6 +324,8 @@
:param cleanup: None or a function taking (key, value) to indicate
'value' should be cleaned up.
"""
+ if key is _null_key:
+ raise ValueError('cannot use _null_key as a key')
node = self._cache.get(key, None)
value_len = self._compute_size(value)
if value_len >= self._after_cleanup_size:
=== modified file 'bzrlib/tests/test_lru_cache.py'
--- a/bzrlib/tests/test_lru_cache.py 2009-04-16 02:08:21 +0000
+++ b/bzrlib/tests/test_lru_cache.py 2009-04-16 22:06:25 +0000
@@ -46,6 +46,27 @@
self.failUnless('foo' in cache)
self.failIf('bar' in cache)
+ def test_map_None(self):
+ # Make sure that we can properly map None as a key.
+ cache = lru_cache.LRUCache(max_cache=10)
+ self.failIf(None in cache)
+ cache[None] = 1
+ self.assertEqual(1, cache[None])
+ cache[None] = 2
+ self.assertEqual(2, cache[None])
+ # Test the various code paths of __getitem__, to make sure that we can
+ # handle when None is the key for the LRU and the MRU
+ cache[1] = 3
+ cache[None] = 1
+ cache[None]
+ cache[1]
+ cache[None]
+ self.assertEqual([None, 1], [n.key for n in cache._walk_lru()])
+
+ def test_add__null_key(self):
+ cache = lru_cache.LRUCache(max_cache=10)
+ self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
def test_overflow(self):
"""Adding extra entries will pop out old ones."""
cache = lru_cache.LRUCache(max_cache=1, after_cleanup_count=1)
@@ -279,6 +300,10 @@
self.assertEqual(int(cache._max_size*0.8), cache._after_cleanup_size)
self.assertEqual(0, cache._value_size)
+ def test_add__null_key(self):
+ cache = lru_cache.LRUSizeCache()
+ self.assertRaises(ValueError, cache.add, lru_cache._null_key, 1)
+
def test_add_tracks_size(self):
cache = lru_cache.LRUSizeCache()
self.assertEqual(0, cache._value_size)
More information about the bazaar-commits
mailing list