Rev 3886: Start working on a FIFOCache. in http://bzr.arbash-meinel.com/branches/bzr/1.11/fifo_cache

John Arbash Meinel john at arbash-meinel.com
Tue Dec 9 21:36:12 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/fifo_cache

------------------------------------------------------------
revno: 3886
revision-id: john at arbash-meinel.com-20081209213549-yc1mqv3l5gun9c63
parent: pqm at pqm.ubuntu.com-20081209163533-fj6hx9l65sretbai
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: fifo_cache
timestamp: Tue 2008-12-09 15:35:49 -0600
message:
  Start working on a FIFOCache.
  
  This is designed to have less runtime cost than the LRUCache.
  Mostly because we don't have to do any recording work on *access*
  only on update or delete.
  As such, we subclass dict directly, because experiments show that
  it performed the best. Unfortunately, even though we don't have
  a custom __getitem__ implementation, it is still approx 2x slower
  than using a raw dict.
-------------- next part --------------
=== added file 'bzrlib/fifo_cache.py'
--- a/bzrlib/fifo_cache.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/fifo_cache.py	2008-12-09 21:35:49 +0000
@@ -0,0 +1,118 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""A simple first-in-first-out (FIFO) cache."""
+
+from collections import deque
+
+
+class FIFOCache(dict):
+    """A class which manages a cache of entries, removing old ones."""
+
+    def __init__(self, max_cache=100, after_cleanup_count=None):
+        dict.__init__(self)
+        self._max_cache = max_cache
+        if after_cleanup_count is None:
+            self._after_cleanup_count = self._max_cache * 8 / 10
+        else:
+            self._after_cleanup_count = min(after_cleanup_count,
+                                            self._max_cache)
+        self._cleanup = {} # map to cleanup functions when items are removed
+        self._queue = deque() # Track when things are accessed
+
+    def __setitem__(self, key, value):
+        """Add a value to the cache, there will be no cleanup function."""
+        self.add(key, value, cleanup=None)
+
+    def __delitem__(self, key):
+        self._queue.remove(key)
+        self._remove(key)
+
+    def add(self, key, value, cleanup=None):
+        """Add a new value to the cache.
+
+        Also, if the entry is ever removed from the queue, call cleanup.
+        Passing it the key and value being removed.
+
+        :param key: The key to store it under
+        :param value: The object to store
+        :param cleanup: None or a function taking (key, value) to indicate
+                        'value' should be cleaned up
+        """
+        if key in self:
+            # Remove the earlier reference to this key, adding it again bumps
+            # it to the end of the queue
+            del self[key]
+        self._queue.append(key)
+        dict.__setitem__(self, key, value)
+        if len(self) > self._max_cache:
+            self.cleanup()
+        if cleanup is not None:
+            self._cleanup[key] = cleanup
+
+    def cleanup(self):
+        """Clear the cache until it shrinks to the requested size.
+
+        This does not completely wipe the cache, just makes sure it is under
+        the after_cleanup_count.
+        """
+        # Make sure the cache is shrunk to the correct size
+        while len(self) > self._after_cleanup_count:
+            self._remove_oldest()
+
+    def clear(self):
+        """Clear out all of the cache."""
+        # Clean up in FIFO order
+        while self:
+            self._remove_oldest()
+
+    def _remove(self, key):
+        """Remove an entry, making sure to call any cleanup function."""
+        cleanup = self._cleanup.pop(key, None)
+        # We override self.pop() because it doesn't play well with cleanup
+        # functions.
+        val = dict.pop(self, key)
+        if cleanup is not None:
+            cleanup(key, val)
+        return val
+
+    def _remove_oldest(self):
+        """Remove the oldest entry."""
+        key = self._queue.popleft()
+        self._remove(key)
+
+    # raise NotImplementedError on dict functions that would mutate the cache
+    # which have not been properly implemented yet.
+    def copy(self):
+        raise NotImplementedError(self.copy)
+
+    def pop(self, key, default=None):
+        # If there is a cleanup() function, than it is unclear what pop()
+        # should do. Specifically, we would have to call the cleanup on the
+        # value before we return it, which should cause whatever resources were
+        # allocated to be removed, which makes the return value fairly useless.
+        # So instead, we just don't implement it.
+        raise NotImplementedError(self.pop)
+
+    def popitem(self):
+        # See pop()
+        raise NotImplementedError(self.popitem)
+
+    def setdefault(self):
+        raise NotImplementedError(self.setdefault)
+
+    def update(self, *args, **kwargs):
+        raise NotImplementedError(self.update)

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2008-11-25 03:05:14 +0000
+++ b/bzrlib/tests/__init__.py	2008-12-09 21:35:49 +0000
@@ -2804,6 +2804,7 @@
                    'bzrlib.tests.test_errors',
                    'bzrlib.tests.test_extract',
                    'bzrlib.tests.test_fetch',
+                   'bzrlib.tests.test_fifo_cache',
                    'bzrlib.tests.test_ftp_transport',
                    'bzrlib.tests.test_foreign',
                    'bzrlib.tests.test_generate_docs',

=== added file 'bzrlib/tests/test_fifo_cache.py'
--- a/bzrlib/tests/test_fifo_cache.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_fifo_cache.py	2008-12-09 21:35:49 +0000
@@ -0,0 +1,138 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for the fifo_cache module."""
+
+from bzrlib import (
+    fifo_cache,
+    tests,
+    )
+
+
+class TestFIFOCache(tests.TestCase):
+    """Test that FIFO cache properly keeps track of entries."""
+
+    def test_add_is_present(self):
+        c = fifo_cache.FIFOCache()
+        c[1] = 2
+        self.assertTrue(1 in c)
+        self.assertEqual(1, len(c))
+        self.assertEqual(2, c[1])
+        self.assertEqual(2, c.get(1))
+        self.assertEqual(2, c.get(1, None))
+        self.assertEqual([1], c.keys())
+        self.assertEqual([1], list(c.iterkeys()))
+        self.assertEqual([(1, 2)], c.items())
+        self.assertEqual([(1, 2)], list(c.iteritems()))
+        self.assertEqual([2], c.values())
+        self.assertEqual([2], list(c.itervalues()))
+
+    def test_missing(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(KeyError, c.__getitem__, 1)
+        self.assertFalse(1 in c)
+        self.assertEqual(0, len(c))
+        self.assertEqual(None, c.get(1))
+        self.assertEqual(None, c.get(1, None))
+        self.assertEqual([], c.keys())
+        self.assertEqual([], list(c.iterkeys()))
+        self.assertEqual([], c.items())
+        self.assertEqual([], list(c.iteritems()))
+        self.assertEqual([], c.values())
+        self.assertEqual([], list(c.itervalues()))
+
+    def test_add_maintains_fifo(self):
+        c = fifo_cache.FIFOCache(4, 4)
+        c[1] = 2
+        c[2] = 3
+        c[3] = 4
+        c[4] = 5
+        self.assertEqual([1, 2, 3, 4], sorted(c.keys()))
+        c[5] = 6
+        # This should pop out the oldest entry
+        self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+        # Replacing an item doesn't change the stored keys
+        c[2] = 7
+        self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+        # But it does change the position in the FIFO
+        c[6] = 7
+        self.assertEqual([2, 4, 5, 6], sorted(c.keys()))
+        self.assertEqual([4, 5, 2, 6], list(c._queue))
+
+    def test_default_after_cleanup_count(self):
+        c = fifo_cache.FIFOCache(5)
+        self.assertEqual(4, c._after_cleanup_count)
+        c[1] = 2
+        c[2] = 3
+        c[3] = 4
+        c[4] = 5
+        c[5] = 6
+        # So far, everything fits
+        self.assertEqual([1, 2, 3, 4, 5], sorted(c.keys()))
+        c[6] = 7
+        # But adding one more should shrink down to after_cleanup_count
+        self.assertEqual([3, 4, 5, 6], sorted(c.keys()))
+
+    def test_clear(self):
+        c = fifo_cache.FIFOCache(5)
+        c[1] = 2
+        c[2] = 3
+        c[3] = 4
+        c[4] = 5
+        c[5] = 6
+        c.cleanup()
+        self.assertEqual([2, 3, 4, 5], sorted(c.keys()))
+        c.clear()
+        self.assertEqual([], c.keys())
+        self.assertEqual([], list(c._queue))
+
+    def test_copy_not_implemented(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(NotImplementedError, c.copy)
+
+    def test_pop_not_implemeted(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(NotImplementedError, c.pop, 'key')
+
+    def test_popitem_not_implemeted(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(NotImplementedError, c.popitem)
+
+    def test_setdefault_not_implemented(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(NotImplementedError, c.setdefault)
+
+    def test_update_not_implemented(self):
+        c = fifo_cache.FIFOCache()
+        self.assertRaises(NotImplementedError, c.update)
+
+    def test_cleanup_funcs(self):
+        log = []
+        def logging_cleanup(key, value):
+            log.append((key, value))
+        c = fifo_cache.FIFOCache(5, 4)
+        c.add(1, 2, cleanup=logging_cleanup)
+        c.add(2, 3, cleanup=logging_cleanup)
+        c.add(3, 4, cleanup=logging_cleanup)
+        c.add(4, 5, cleanup=None) # no cleanup for 4
+        c[5] = 6 # no cleanup for 5
+        self.assertEqual([], log)
+        # Adding another key should cleanup 1 & 2
+        c[6] = 7
+        self.assertEqual([(1, 2), (2, 3)], log)
+        del log[:]
+        c.clear()
+        self.assertEqual([(3, 4)], log)



More information about the bazaar-commits mailing list