Rev 5194: Possible thread-safe LRUCache implementation. in http://bazaar.launchpad.net/~jameinel/bzr/lru_locking

John Arbash Meinel john at arbash-meinel.com
Sat May 1 05:52:18 BST 2010


At http://bazaar.launchpad.net/~jameinel/bzr/lru_locking

------------------------------------------------------------
revno: 5194
revision-id: john at arbash-meinel.com-20100501045208-7ljni1paetl7s939
parent: pqm at pqm.ubuntu.com-20100429073011-5f4kzm2wpojq9we1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lru_locking
timestamp: Fri 2010-04-30 23:52:08 -0500
message:
  Possible thread-safe LRUCache implementation.
  
  You could still break stuff by using .add() by multiple threads, but
  at least the double-linked list structure should always be correct.
  Which is the greater source of threading failures.
-------------- next part --------------
=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py	2009-07-08 15:43:51 +0000
+++ b/bzrlib/lru_cache.py	2010-05-01 04:52:08 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006, 2008, 2009 Canonical Ltd
+# Copyright (C) 2007-2010 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -75,10 +75,19 @@
         # The "TAIL" of the lru linked list
         self._least_recently_used = None
         self._update_max_cache(max_cache, after_cleanup_count)
+        self._lru_lock = None
 
     def __contains__(self, key):
         return key in self._cache
 
+    def set_lock(self, lock):
+        """Add a lock to the LRUCache to prevent cross-thread corruption.
+
+        Once set, updates to the LRU structure (prev_node/next_node) will be
+        protected behind the supplied lock.
+        """
+        self._lru_lock = lock
+
     def __getitem__(self, key):
         cache = self._cache
         node = cache[key]
@@ -90,31 +99,42 @@
         if node is mru:
             # Nothing to do, this node is already at the head of the queue
             return node.value
-        # Remove this node from the old location
-        node_prev = node.prev
-        next_key = node.next_key
-        # benchmarking shows that the lookup of _null_key in globals is faster
-        # than the attribute lookup for (node is self._least_recently_used)
-        if next_key is _null_key:
-            # 'node' is the _least_recently_used, because it doesn't have a
-            # 'next' item. So move the current lru to the previous node.
-            self._least_recently_used = node_prev
-        else:
-            node_next = cache[next_key]
-            node_next.prev = node_prev
-        node_prev.next_key = next_key
-        # Insert this node at the front of the list
-        node.next_key = mru.key
-        mru.prev = node
-        self._most_recently_used = node
-        node.prev = None
-        return node.value
+        lru_lock = self._lru_lock
+        if lru_lock is not None:
+            lru_lock.acquire()
+        try:
+            # Remove this node from the old location
+            node_prev = node.prev
+            next_key = node.next_key
+            # benchmarking shows that the lookup of _null_key in globals is
+            # faster than the attribute lookup for (node is
+            # self._least_recently_used)
+            if next_key is _null_key:
+                # 'node' is the _least_recently_used, because it doesn't have a
+                # 'next' item. So move the current lru to the previous node.
+                self._least_recently_used = node_prev
+            else:
+                node_next = cache[next_key]
+                node_next.prev = node_prev
+            node_prev.next_key = next_key
+            # Insert this node at the front of the list
+            node.next_key = mru.key
+            mru.prev = node
+            self._most_recently_used = node
+            node.prev = None
+            return node.value
+        finally:
+            if lru_lock is not None:
+                lru_lock.release()
 
     def __len__(self):
         return len(self._cache)
 
     def _walk_lru(self):
-        """Walk the LRU list, only meant to be used in tests."""
+        """Walk the LRU list, only meant to be used in tests.
+
+        This is not thread-safe.
+        """
         node = self._most_recently_used
         if node is not None:
             if node.prev is not None:
@@ -154,11 +174,18 @@
         :param value: The object to store
         :param cleanup: None or a function taking (key, value) to indicate
                         'value' should be cleaned up.
+        :return None:
+
+        Note: While the LRU aspect can be made thread-safe by calling
+        set_lock(), we don't make thread-safety guarantees if 2 threads are
+        adding values for the same key. However, as it is a *cache* you
+        really don't want to have the possibility of different values for the
+        same key anyway.
         """
         if key is _null_key:
             raise ValueError('cannot use _null_key as a key')
-        if key in self._cache:
-            node = self._cache[key]
+        node = self._cache.get(key, None)
+        if node is not None:
             try:
                 node.run_cleanup()
             finally:
@@ -219,49 +246,61 @@
     def _record_access(self, node):
         """Record that key was accessed."""
         # Move 'node' to the front of the queue
-        if self._most_recently_used is None:
-            self._most_recently_used = node
-            self._least_recently_used = node
-            return
-        elif node is self._most_recently_used:
-            # Nothing to do, this node is already at the head of the queue
-            return
-        # We've taken care of the tail pointer, remove the node, and insert it
-        # at the front
-        # REMOVE
-        if node is self._least_recently_used:
-            self._least_recently_used = node.prev
-        if node.prev is not None:
-            node.prev.next_key = node.next_key
-        if node.next_key is not _null_key:
-            node_next = self._cache[node.next_key]
-            node_next.prev = node.prev
-        # INSERT
-        node.next_key = self._most_recently_used.key
-        self._most_recently_used.prev = node
-        self._most_recently_used = node
-        node.prev = None
-
-    def _remove_node(self, node):
-        if node is self._least_recently_used:
-            self._least_recently_used = node.prev
-        self._cache.pop(node.key)
-        # If we have removed all entries, remove the head pointer as well
-        if self._least_recently_used is None:
-            self._most_recently_used = None
+        if self._lru_lock is not None:
+            self._lru_lock.acquire()
         try:
-            node.run_cleanup()
-        finally:
-            # cleanup might raise an exception, but we want to make sure to
-            # maintain the linked list
+            if self._most_recently_used is None:
+                self._most_recently_used = node
+                self._least_recently_used = node
+                return
+            elif node is self._most_recently_used:
+                # Nothing to do, this node is already at the head of the queue
+                return
+            # We've taken care of the tail pointer, remove the node, and insert
+            # it at the front
+            # REMOVE
+            if node is self._least_recently_used:
+                self._least_recently_used = node.prev
             if node.prev is not None:
                 node.prev.next_key = node.next_key
             if node.next_key is not _null_key:
                 node_next = self._cache[node.next_key]
                 node_next.prev = node.prev
-            # And remove this node's pointers
+            # INSERT
+            node.next_key = self._most_recently_used.key
+            self._most_recently_used.prev = node
+            self._most_recently_used = node
             node.prev = None
-            node.next_key = _null_key
+        finally:
+            if self._lru_lock is not None:
+                self._lru_lock.release()
+
+    def _remove_node(self, node):
+        if self._lru_lock is not None:
+            self._lru_lock.acquire()
+        try:
+            if node is self._least_recently_used:
+                self._least_recently_used = node.prev
+            self._cache.pop(node.key)
+            # If we have removed all entries, remove the head pointer as well
+            if self._least_recently_used is None:
+                self._most_recently_used = None
+            try:
+                node.run_cleanup()
+            finally:
+                # cleanup might raise an exception, but we want to make sure to
+                # maintain the linked list
+                if node.prev is not None:
+                    node.prev.next_key = node.next_key
+                if node.next_key is not _null_key:
+                    node_next = self._cache[node.next_key]
+                    node_next.prev = node.prev
+                # And remove this node's pointers
+                node.prev = None
+                node.next_key = _null_key
+        finally:
+            if self._lru_lock is not None:
+                self._lru_lock.release()
 
     def _remove_lru(self):
         """Remove one entry from the lru, and handle consequences.



More information about the bazaar-commits mailing list