Rev 3887: (jam) Tweaks to the LRUCache code, butter memory use, in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Dec 10 00:41:20 GMT 2008


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 3887
revision-id: pqm at pqm.ubuntu.com-20081210004116-qqips4wfg800p4ku
parent: pqm at pqm.ubuntu.com-20081209205258-uop2lclksyc9whj8
parent: john at arbash-meinel.com-20081209223156-8usxe0ihbbg4cpjq
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2008-12-10 00:41:16 +0000
message:
  (jam) Tweaks to the LRUCache code, butter memory use,
  	mostly code cleanup.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/lru_cache.py            lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
  bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
    ------------------------------------------------------------
    revno: 3882.3.2
    revision-id: john at arbash-meinel.com-20081209223156-8usxe0ihbbg4cpjq
    parent: john at arbash-meinel.com-20081208182300-u1qmnxafwt2rr5dz
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: lru_cache
    timestamp: Tue 2008-12-09 16:31:56 -0600
    message:
      Only cache cleanup functions if they aren't None.
    modified:
      bzrlib/lru_cache.py            lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
    ------------------------------------------------------------
    revno: 3882.3.1
    revision-id: john at arbash-meinel.com-20081208182300-u1qmnxafwt2rr5dz
    parent: pqm at pqm.ubuntu.com-20081205181554-ofrdnafloc43bxkh
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: lru_cache
    timestamp: Mon 2008-12-08 12:23:00 -0600
    message:
      Add LRUCache.resize(), and change the init arguments for LRUCache.
      
      The old name was a bit confusing, and caused LRUSizeCache to re-use variables in
      a confusing way with LRUCache.
      
      
      Also, this changes the default cleanup size to be 80% of max_size. This should
      be better, as it means we get a little bit of room when adding keys,
      rather than having to cleanup after every add, we can instead do it in
      batches.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/lru_cache.py            lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
      bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
=== modified file 'NEWS'
--- a/NEWS	2008-12-09 20:52:58 +0000
+++ b/NEWS	2008-12-10 00:41:16 +0000
@@ -9,6 +9,11 @@
 --------------
 
   CHANGES:
+  
+   * ``LRUCache(after_cleanup_size)`` was renamed to
+     ``after_cleanup_count`` and the old name deprecated. The new name is
+     used for clarity, and to avoid confusion with
+     ``LRUSizeCache(after_cleanup_size)``. (John Arbash Meinel)
 
   NEW FEATURES:
 

=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py	2008-10-14 20:19:06 +0000
+++ b/bzrlib/lru_cache.py	2008-12-09 22:31:56 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006 Canonical Ltd
+# Copyright (C) 2006, 2008 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -17,25 +17,26 @@
 """A simple least-recently-used (LRU) cache."""
 
 from collections import deque
-import gc
+
+from bzrlib import symbol_versioning
 
 
 class LRUCache(object):
     """A class which manages a cache of entries, removing unused ones."""
 
-    def __init__(self, max_cache=100, after_cleanup_size=None):
-        self._max_cache = max_cache
-        if after_cleanup_size is None:
-            self._after_cleanup_size = self._max_cache
-        else:
-            self._after_cleanup_size = min(after_cleanup_size, self._max_cache)
-
-        self._compact_queue_length = 4*self._max_cache
-
+    def __init__(self, max_cache=100, after_cleanup_count=None,
+                 after_cleanup_size=symbol_versioning.DEPRECATED_PARAMETER):
+        if symbol_versioning.deprecated_passed(after_cleanup_size):
+            symbol_versioning.warn('LRUCache.__init__(after_cleanup_size) was'
+                                   ' deprecated in 1.11. Use'
+                                   ' after_cleanup_count instead.',
+                                   DeprecationWarning)
+            after_cleanup_count = after_cleanup_size
         self._cache = {}
         self._cleanup = {}
         self._queue = deque() # Track when things are accessed
         self._refcount = {} # number of entries in self._queue for each key
+        self._update_max_cache(max_cache, after_cleanup_count)
 
     def __contains__(self, key):
         return key in self._cache
@@ -62,7 +63,8 @@
         if key in self._cache:
             self._remove(key)
         self._cache[key] = value
-        self._cleanup[key] = cleanup
+        if cleanup is not None:
+            self._cleanup[key] = cleanup
         self._record_access(key)
 
         if len(self._cache) > self._max_cache:
@@ -89,11 +91,13 @@
         """Clear the cache until it shrinks to the requested size.
 
         This does not completely wipe the cache, just makes sure it is under
-        the after_cleanup_size.
+        the after_cleanup_count.
         """
         # Make sure the cache is shrunk to the correct size
-        while len(self._cache) > self._after_cleanup_size:
+        while len(self._cache) > self._after_cleanup_count:
             self._remove_lru()
+        # No need to compact the queue at this point, because the code that
+        # calls this would have already triggered it based on queue length
 
     def __setitem__(self, key, value):
         """Add a value to the cache, there will be no cleanup function."""
@@ -126,7 +130,7 @@
 
     def _remove(self, key):
         """Remove an entry, making sure to maintain the invariants."""
-        cleanup = self._cleanup.pop(key)
+        cleanup = self._cleanup.pop(key, None)
         val = self._cache.pop(key)
         if cleanup is not None:
             cleanup(key, val)
@@ -150,6 +154,23 @@
         while self._cache:
             self._remove_lru()
 
+    def resize(self, max_cache, after_cleanup_count=None):
+        """Change the number of entries that will be cached."""
+        self._update_max_cache(max_cache,
+                               after_cleanup_count=after_cleanup_count)
+
+    def _update_max_cache(self, max_cache, after_cleanup_count=None):
+        self._max_cache = max_cache
+        if after_cleanup_count is None:
+            self._after_cleanup_count = self._max_cache * 8 / 10
+        else:
+            self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
+
+        self._compact_queue_length = 4*self._max_cache
+        if len(self._queue) > self._compact_queue_length:
+            self._compact_queue()
+        self.cleanup()
+
 
 class LRUSizeCache(LRUCache):
     """An LRUCache that removes things based on the size of the values.
@@ -175,20 +196,15 @@
             The function should take the form "compute_size(value) => integer".
             If not supplied, it defaults to 'len()'
         """
-        # This approximates that texts are > 0.5k in size. It only really
-        # effects when we clean up the queue, so we don't want it to be too
-        # large.
-        LRUCache.__init__(self, max_cache=int(max_size/512))
-        self._max_size = max_size
-        if after_cleanup_size is None:
-            self._after_cleanup_size = self._max_size
-        else:
-            self._after_cleanup_size = min(after_cleanup_size, self._max_size)
-
         self._value_size = 0
         self._compute_size = compute_size
         if compute_size is None:
             self._compute_size = len
+        # This approximates that texts are > 0.5k in size. It only really
+        # effects when we clean up the queue, so we don't want it to be too
+        # large.
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
 
     def add(self, key, value, cleanup=None):
         """Add a new value to the cache.
@@ -208,7 +224,8 @@
             return
         self._value_size += value_len
         self._cache[key] = value
-        self._cleanup[key] = cleanup
+        if cleanup is not None:
+            self._cleanup[key] = cleanup
         self._record_access(key)
 
         if self._value_size > self._max_size:
@@ -229,3 +246,16 @@
         """Remove an entry, making sure to maintain the invariants."""
         val = LRUCache._remove(self, key)
         self._value_size -= self._compute_size(val)
+
+    def resize(self, max_size, after_cleanup_size=None):
+        """Change the number of bytes that will be cached."""
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        max_cache = max(int(max_size/512), 1)
+        self._update_max_cache(max_cache)
+
+    def _update_max_size(self, max_size, after_cleanup_size=None):
+        self._max_size = max_size
+        if after_cleanup_size is None:
+            self._after_cleanup_size = self._max_size * 8 / 10
+        else:
+            self._after_cleanup_size = min(after_cleanup_size, self._max_size)

=== modified file 'bzrlib/tests/test_lru_cache.py'
--- a/bzrlib/tests/test_lru_cache.py	2008-10-14 20:19:06 +0000
+++ b/bzrlib/tests/test_lru_cache.py	2008-12-08 18:23:00 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006 Canonical Ltd
+# Copyright (C) 2006, 2008 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -38,7 +38,7 @@
 
     def test_overflow(self):
         """Adding extra entries will pop out old ones."""
-        cache = lru_cache.LRUCache(max_cache=1)
+        cache = lru_cache.LRUCache(max_cache=1, after_cleanup_count=1)
 
         cache['foo'] = 'bar'
         # With a max cache of 1, adding 'baz' should pop out 'foo'
@@ -113,7 +113,7 @@
         self.assertEqual([(2, 20), (2, 25)], cleanup_called)
 
     def test_len(self):
-        cache = lru_cache.LRUCache(max_cache=10)
+        cache = lru_cache.LRUCache(max_cache=10, after_cleanup_count=10)
 
         cache[1] = 10
         cache[2] = 20
@@ -140,8 +140,8 @@
         # We hit the max
         self.assertEqual(10, len(cache))
 
-    def test_cleanup_shrinks_to_after_clean_size(self):
-        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_size=3)
+    def test_cleanup_shrinks_to_after_clean_count(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=3)
 
         cache.add(1, 10)
         cache.add(2, 20)
@@ -156,15 +156,16 @@
         self.assertEqual(3, len(cache))
 
     def test_after_cleanup_larger_than_max(self):
-        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_size=10)
-        self.assertEqual(5, cache._after_cleanup_size)
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=10)
+        self.assertEqual(5, cache._after_cleanup_count)
 
     def test_after_cleanup_none(self):
-        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_size=None)
-        self.assertEqual(5, cache._after_cleanup_size)
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=None)
+        # By default _after_cleanup_size is 80% of the normal size
+        self.assertEqual(4, cache._after_cleanup_count)
 
     def test_cleanup(self):
-        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_size=2)
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=2)
 
         # Add these in order
         cache.add(1, 10)
@@ -214,7 +215,7 @@
         self.assertIs(obj, cache.get(3, obj))
 
     def test_keys(self):
-        cache = lru_cache.LRUCache(max_cache=5)
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=5)
 
         cache[1] = 2
         cache[2] = 3
@@ -225,6 +226,52 @@
         cache[6] = 7
         self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
 
+    def test_after_cleanup_size_deprecated(self):
+        obj = self.callDeprecated([
+            'LRUCache.__init__(after_cleanup_size) was deprecated in 1.11.'
+            ' Use after_cleanup_count instead.'],
+            lru_cache.LRUCache, 50, after_cleanup_size=25)
+        self.assertEqual(obj._after_cleanup_count, 25)
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        # Now resize to something smaller, which triggers a cleanup
+        cache.resize(max_cache=3, after_cleanup_count=2)
+        self.assertEqual([5, 6], sorted(cache.keys()))
+        # Adding something will use the new size
+        cache[7] = 8
+        self.assertEqual([5, 6, 7], sorted(cache.keys()))
+        cache[8] = 9
+        self.assertEqual([7, 8], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUCache(max_cache=5, after_cleanup_count=4)
+        cache[1] = 2
+        cache[2] = 3
+        cache[3] = 4
+        cache[4] = 5
+        cache[5] = 6
+        self.assertEqual([1, 2, 3, 4, 5], sorted(cache.keys()))
+        cache[6] = 7
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache.resize(max_cache=8, after_cleanup_count=6)
+        self.assertEqual([3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 8
+        cache[8] = 9
+        cache[9] = 10
+        cache[10] = 11
+        self.assertEqual([3, 4, 5, 6, 7, 8, 9, 10], sorted(cache.keys()))
+        cache[11] = 12 # triggers cleanup back to new after_cleanup_count
+        self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
+
 
 class TestLRUSizeCache(tests.TestCase):
 
@@ -232,7 +279,7 @@
         cache = lru_cache.LRUSizeCache()
         self.assertEqual(2048, cache._max_cache)
         self.assertEqual(4*2048, cache._compact_queue_length)
-        self.assertEqual(cache._max_size, cache._after_cleanup_size)
+        self.assertEqual(int(cache._max_size*0.8), cache._after_cleanup_size)
         self.assertEqual(0, cache._value_size)
 
     def test_add_tracks_size(self):
@@ -332,3 +379,37 @@
         cache[2] = 'b'
         cache[3] = 'cdef'
         self.assertEqual([1, 2, 3], sorted(cache.keys()))
+
+    def test_resize_smaller(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        # Resize should also cleanup again
+        cache.resize(max_size=6, after_cleanup_size=4)
+        self.assertEqual([4], sorted(cache.keys()))
+        # Adding should use the new max size
+        cache[5] = 'mno'
+        self.assertEqual([4, 5], sorted(cache.keys()))
+        cache[6] = 'pqr'
+        self.assertEqual([6], sorted(cache.keys()))
+
+    def test_resize_larger(self):
+        cache = lru_cache.LRUSizeCache(max_size=10, after_cleanup_size=9)
+        cache[1] = 'abc'
+        cache[2] = 'def'
+        cache[3] = 'ghi'
+        cache[4] = 'jkl'
+        # Triggers a cleanup
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache.resize(max_size=15, after_cleanup_size=12)
+        self.assertEqual([2, 3, 4], sorted(cache.keys()))
+        cache[5] = 'mno'
+        cache[6] = 'pqr'
+        self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
+        cache[7] = 'stu'
+        self.assertEqual([4, 5, 6, 7], sorted(cache.keys()))
+




More information about the bazaar-commits mailing list