Rev 5194: Possible thread-safe LRUCache implementation. in http://bazaar.launchpad.net/~jameinel/bzr/lru_locking
John Arbash Meinel
john at arbash-meinel.com
Sat May 1 05:52:18 BST 2010
At http://bazaar.launchpad.net/~jameinel/bzr/lru_locking
------------------------------------------------------------
revno: 5194
revision-id: john at arbash-meinel.com-20100501045208-7ljni1paetl7s939
parent: pqm at pqm.ubuntu.com-20100429073011-5f4kzm2wpojq9we1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lru_locking
timestamp: Fri 2010-04-30 23:52:08 -0500
message:
Possible thread-safe LRUCache implementation.
You could still break stuff by using .add() by multiple threads, but
at least the double-linked list structure should always be correct.
Which is the greater source of threading failures.
-------------- next part --------------
=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py 2009-07-08 15:43:51 +0000
+++ b/bzrlib/lru_cache.py 2010-05-01 04:52:08 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006, 2008, 2009 Canonical Ltd
+# Copyright (C) 2007-2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -75,10 +75,19 @@
# The "TAIL" of the lru linked list
self._least_recently_used = None
self._update_max_cache(max_cache, after_cleanup_count)
+ self._lru_lock = None
def __contains__(self, key):
return key in self._cache
+ def set_lock(self, lock):
+ """Add a lock to the LRUCache to prevent cross-thread corruption.
+
+ Once set, updates to the LRU structure (prev_node/next_node) will be
+ protected behind the supplied lock.
+ """
+ self._lru_lock = lock
+
def __getitem__(self, key):
cache = self._cache
node = cache[key]
@@ -90,31 +99,42 @@
if node is mru:
# Nothing to do, this node is already at the head of the queue
return node.value
- # Remove this node from the old location
- node_prev = node.prev
- next_key = node.next_key
- # benchmarking shows that the lookup of _null_key in globals is faster
- # than the attribute lookup for (node is self._least_recently_used)
- if next_key is _null_key:
- # 'node' is the _least_recently_used, because it doesn't have a
- # 'next' item. So move the current lru to the previous node.
- self._least_recently_used = node_prev
- else:
- node_next = cache[next_key]
- node_next.prev = node_prev
- node_prev.next_key = next_key
- # Insert this node at the front of the list
- node.next_key = mru.key
- mru.prev = node
- self._most_recently_used = node
- node.prev = None
- return node.value
+ lru_lock = self._lru_lock
+ if lru_lock is not None:
+ lru_lock.acquire()
+ try:
+ # Remove this node from the old location
+ node_prev = node.prev
+ next_key = node.next_key
+ # benchmarking shows that the lookup of _null_key in globals is
+ # faster than the attribute lookup for (node is
+ # self._least_recently_used)
+ if next_key is _null_key:
+ # 'node' is the _least_recently_used, because it doesn't have a
+ # 'next' item. So move the current lru to the previous node.
+ self._least_recently_used = node_prev
+ else:
+ node_next = cache[next_key]
+ node_next.prev = node_prev
+ node_prev.next_key = next_key
+ # Insert this node at the front of the list
+ node.next_key = mru.key
+ mru.prev = node
+ self._most_recently_used = node
+ node.prev = None
+ return node.value
+ finally:
+ if lru_lock is not None:
+ lru_lock.release()
def __len__(self):
return len(self._cache)
def _walk_lru(self):
- """Walk the LRU list, only meant to be used in tests."""
+ """Walk the LRU list, only meant to be used in tests.
+
+ This is not thread-safe.
+ """
node = self._most_recently_used
if node is not None:
if node.prev is not None:
@@ -154,11 +174,18 @@
:param value: The object to store
:param cleanup: None or a function taking (key, value) to indicate
'value' should be cleaned up.
+ :return None:
+
+ Note: While the LRU aspect can be made thread-safe by calling
+ set_lock(), we don't make thread-safety guarantees if 2 threads are
+ adding values for the same key. However, as it is a *cache* you
+ really don't want to have the possibility of different values for the
+ same key anyway.
"""
if key is _null_key:
raise ValueError('cannot use _null_key as a key')
- if key in self._cache:
- node = self._cache[key]
+ node = self._cache.get(key, None)
+ if node is not None:
try:
node.run_cleanup()
finally:
@@ -219,49 +246,61 @@
def _record_access(self, node):
"""Record that key was accessed."""
# Move 'node' to the front of the queue
- if self._most_recently_used is None:
- self._most_recently_used = node
- self._least_recently_used = node
- return
- elif node is self._most_recently_used:
- # Nothing to do, this node is already at the head of the queue
- return
- # We've taken care of the tail pointer, remove the node, and insert it
- # at the front
- # REMOVE
- if node is self._least_recently_used:
- self._least_recently_used = node.prev
- if node.prev is not None:
- node.prev.next_key = node.next_key
- if node.next_key is not _null_key:
- node_next = self._cache[node.next_key]
- node_next.prev = node.prev
- # INSERT
- node.next_key = self._most_recently_used.key
- self._most_recently_used.prev = node
- self._most_recently_used = node
- node.prev = None
-
- def _remove_node(self, node):
- if node is self._least_recently_used:
- self._least_recently_used = node.prev
- self._cache.pop(node.key)
- # If we have removed all entries, remove the head pointer as well
- if self._least_recently_used is None:
- self._most_recently_used = None
+ if self._lru_lock is not None:
+ self._lru_lock.acquire()
try:
- node.run_cleanup()
- finally:
- # cleanup might raise an exception, but we want to make sure to
- # maintain the linked list
+ if self._most_recently_used is None:
+ self._most_recently_used = node
+ self._least_recently_used = node
+ return
+ elif node is self._most_recently_used:
+ # Nothing to do, this node is already at the head of the queue
+ return
+ # We've taken care of the tail pointer, remove the node, and insert
+ # it at the front
+ # REMOVE
+ if node is self._least_recently_used:
+ self._least_recently_used = node.prev
if node.prev is not None:
node.prev.next_key = node.next_key
if node.next_key is not _null_key:
node_next = self._cache[node.next_key]
node_next.prev = node.prev
- # And remove this node's pointers
+ # INSERT
+ node.next_key = self._most_recently_used.key
+ self._most_recently_used.prev = node
+ self._most_recently_used = node
node.prev = None
- node.next_key = _null_key
+ finally:
+ if self._lru_lock is not None:
+ self._lru_lock.release()
+
+ def _remove_node(self, node):
+ if self._lru_lock is not None:
+ self._lru_lock.acquire()
+ try:
+ if node is self._least_recently_used:
+ self._least_recently_used = node.prev
+ self._cache.pop(node.key)
+ # If we have removed all entries, remove the head pointer as well
+ if self._least_recently_used is None:
+ self._most_recently_used = None
+ try:
+ node.run_cleanup()
+ finally:
+ # cleanup might raise an exception, but we want to make sure to
+ # maintain the linked list
+ if node.prev is not None:
+ node.prev.next_key = node.next_key
+ if node.next_key is not _null_key:
+ node_next = self._cache[node.next_key]
+ node_next.prev = node.prev
+ # And remove this node's pointers
+ node.prev = None
+ node.next_key = _null_key
+ finally:
+ if self._lru_lock is not None:
+ self._lru_lock.release()
def _remove_lru(self):
"""Remove one entry from the lru, and handle consequences.
More information about the bazaar-commits
mailing list