Rev 3886: Start working on a FIFOCache. in http://bzr.arbash-meinel.com/branches/bzr/1.11/fifo_cache
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 9 21:36:12 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/fifo_cache
------------------------------------------------------------
revno: 3886
revision-id: john at arbash-meinel.com-20081209213549-yc1mqv3l5gun9c63
parent: pqm at pqm.ubuntu.com-20081209163533-fj6hx9l65sretbai
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: fifo_cache
timestamp: Tue 2008-12-09 15:35:49 -0600
message:
Start working on a FIFOCache.
This is designed to have less runtime cost than the LRUCache.
Mostly because we don't have to do any recording work on *access*
only on update or delete.
As such, we subclass dict directly, because experiments show that
it performed the best. Unfortunately, even though we don't have
a custom __getitem__ implementation, it is still approx 2x slower
than using a raw dict.
-------------- next part --------------
=== added file 'bzrlib/fifo_cache.py'
--- a/bzrlib/fifo_cache.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/fifo_cache.py 2008-12-09 21:35:49 +0000
@@ -0,0 +1,118 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""A simple first-in-first-out (FIFO) cache."""
+
+from collections import deque
+
+
+class FIFOCache(dict):
+ """A class which manages a cache of entries, removing old ones."""
+
+ def __init__(self, max_cache=100, after_cleanup_count=None):
+ dict.__init__(self)
+ self._max_cache = max_cache
+ if after_cleanup_count is None:
+ self._after_cleanup_count = self._max_cache * 8 / 10
+ else:
+ self._after_cleanup_count = min(after_cleanup_count,
+ self._max_cache)
+ self._cleanup = {} # map to cleanup functions when items are removed
+ self._queue = deque() # Track when things are accessed
+
+ def __setitem__(self, key, value):
+ """Add a value to the cache, there will be no cleanup function."""
+ self.add(key, value, cleanup=None)
+
+ def __delitem__(self, key):
+ self._queue.remove(key)
+ self._remove(key)
+
+ def add(self, key, value, cleanup=None):
+ """Add a new value to the cache.
+
+ Also, if the entry is ever removed from the queue, call cleanup.
+ Passing it the key and value being removed.
+
+ :param key: The key to store it under
+ :param value: The object to store
+ :param cleanup: None or a function taking (key, value) to indicate
+ 'value' should be cleaned up
+ """
+ if key in self:
+ # Remove the earlier reference to this key, adding it again bumps
+ # it to the end of the queue
+ del self[key]
+ self._queue.append(key)
+ dict.__setitem__(self, key, value)
+ if len(self) > self._max_cache:
+ self.cleanup()
+ if cleanup is not None:
+ self._cleanup[key] = cleanup
+
+ def cleanup(self):
+ """Clear the cache until it shrinks to the requested size.
+
+ This does not completely wipe the cache, just makes sure it is under
+ the after_cleanup_count.
+ """
+ # Make sure the cache is shrunk to the correct size
+ while len(self) > self._after_cleanup_count:
+ self._remove_oldest()
+
+ def clear(self):
+ """Clear out all of the cache."""
+ # Clean up in FIFO order
+ while self:
+ self._remove_oldest()
+
+ def _remove(self, key):
+ """Remove an entry, making sure to call any cleanup function."""
+ cleanup = self._cleanup.pop(key, None)
+ # We override self.pop() because it doesn't play well with cleanup
+ # functions.
+ val = dict.pop(self, key)
+ if cleanup is not None:
+ cleanup(key, val)
+ return val
+
+ def _remove_oldest(self):
+ """Remove the oldest entry."""
+ key = self._queue.popleft()
+ self._remove(key)
+
+ # raise NotImplementedError on dict functions that would mutate the cache
+ # which have not been properly implemented yet.
+ def copy(self):
+ raise NotImplementedError(self.copy)
+
+ def pop(self, key, default=None):
+ # If there is a cleanup() function, than it is unclear what pop()
+ # should do. Specifically, we would have to call the cleanup on the
+ # value before we return it, which should cause whatever resources were
+ # allocated to be removed, which makes the return value fairly useless.
+ # So instead, we just don't implement it.
+ raise NotImplementedError(self.pop)
+
+ def popitem(self):
+ # See pop()
+ raise NotImplementedError(self.popitem)
+
+ def setdefault(self):
+ raise NotImplementedError(self.setdefault)
+
+ def update(self, *args, **kwargs):
+ raise NotImplementedError(self.update)
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2008-11-25 03:05:14 +0000
+++ b/bzrlib/tests/__init__.py 2008-12-09 21:35:49 +0000
@@ -2804,6 +2804,7 @@
'bzrlib.tests.test_errors',
'bzrlib.tests.test_extract',
'bzrlib.tests.test_fetch',
+ 'bzrlib.tests.test_fifo_cache',
'bzrlib.tests.test_ftp_transport',
'bzrlib.tests.test_foreign',
'bzrlib.tests.test_generate_docs',
=== added file 'bzrlib/tests/test_fifo_cache.py'
--- a/bzrlib/tests/test_fifo_cache.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_fifo_cache.py 2008-12-09 21:35:49 +0000
@@ -0,0 +1,138 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Tests for the fifo_cache module."""
+
+from bzrlib import (
+ fifo_cache,
+ tests,
+ )
+
+
+class TestFIFOCache(tests.TestCase):
+ """Test that FIFO cache properly keeps track of entries."""
+
+ def test_add_is_present(self):
+ c = fifo_cache.FIFOCache()
+ c[1] = 2
+ self.assertTrue(1 in c)
+ self.assertEqual(1, len(c))
+ self.assertEqual(2, c[1])
+ self.assertEqual(2, c.get(1))
+ self.assertEqual(2, c.get(1, None))
+ self.assertEqual([1], c.keys())
+ self.assertEqual([1], list(c.iterkeys()))
+ self.assertEqual([(1, 2)], c.items())
+ self.assertEqual([(1, 2)], list(c.iteritems()))
+ self.assertEqual([2], c.values())
+ self.assertEqual([2], list(c.itervalues()))
+
+ def test_missing(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(KeyError, c.__getitem__, 1)
+ self.assertFalse(1 in c)
+ self.assertEqual(0, len(c))
+ self.assertEqual(None, c.get(1))
+ self.assertEqual(None, c.get(1, None))
+ self.assertEqual([], c.keys())
+ self.assertEqual([], list(c.iterkeys()))
+ self.assertEqual([], c.items())
+ self.assertEqual([], list(c.iteritems()))
+ self.assertEqual([], c.values())
+ self.assertEqual([], list(c.itervalues()))
+
+ def test_add_maintains_fifo(self):
+ c = fifo_cache.FIFOCache(4, 4)
+ c[1] = 2
+ c[2] = 3
+ c[3] = 4
+ c[4] = 5
+ self.assertEqual([1, 2, 3, 4], sorted(c.keys()))
+ c[5] = 6
+ # This should pop out the oldest entry
+ self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+ # Replacing an item doesn't change the stored keys
+ c[2] = 7
+ self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+ # But it does change the position in the FIFO
+ c[6] = 7
+ self.assertEqual([2, 4, 5, 6], sorted(c.keys()))
+ self.assertEqual([4, 5, 2, 6], list(c._queue))
+
+ def test_default_after_cleanup_count(self):
+ c = fifo_cache.FIFOCache(5)
+ self.assertEqual(4, c._after_cleanup_count)
+ c[1] = 2
+ c[2] = 3
+ c[3] = 4
+ c[4] = 5
+ c[5] = 6
+ # So far, everything fits
+ self.assertEqual([1, 2, 3, 4, 5], sorted(c.keys()))
+ c[6] = 7
+ # But adding one more should shrink down to after_cleanup_count
+ self.assertEqual([3, 4, 5, 6], sorted(c.keys()))
+
+ def test_clear(self):
+ c = fifo_cache.FIFOCache(5)
+ c[1] = 2
+ c[2] = 3
+ c[3] = 4
+ c[4] = 5
+ c[5] = 6
+ c.cleanup()
+ self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+ c.clear()
+ self.assertEqual([], c.keys())
+ self.assertEqual([], list(c._queue))
+
+ def test_copy_not_implemented(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(NotImplementedError, c.copy)
+
+ def test_pop_not_implemeted(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(NotImplementedError, c.pop, 'key')
+
+ def test_popitem_not_implemeted(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(NotImplementedError, c.popitem)
+
+ def test_setdefault_not_implemented(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(NotImplementedError, c.setdefault)
+
+ def test_update_not_implemented(self):
+ c = fifo_cache.FIFOCache()
+ self.assertRaises(NotImplementedError, c.update)
+
+ def test_cleanup_funcs(self):
+ log = []
+ def logging_cleanup(key, value):
+ log.append((key, value))
+ c = fifo_cache.FIFOCache(5, 4)
+ c.add(1, 2, cleanup=logging_cleanup)
+ c.add(2, 3, cleanup=logging_cleanup)
+ c.add(3, 4, cleanup=logging_cleanup)
+ c.add(4, 5, cleanup=None) # no cleanup for 4
+ c[5] = 6 # no cleanup for 5
+ self.assertEqual([], log)
+ # Adding another key should cleanup 1 & 2
+ c[6] = 7
+ self.assertEqual([(1, 2), (2, 3)], log)
+ del log[:]
+ c.clear()
+ self.assertEqual([(3, 4)], log)
More information about the bazaar-commits
mailing list