Rev 3894: Use resize logic to ensure our inventory entry cache is at an optimal size. in http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache

Wed Dec 10 22:40:09 GMT 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache

------------------------------------------------------------
revno: 3894
revision-id: john at arbash-meinel.com-20081210223943-aqedq91tf7e6tecs
parent: john at arbash-meinel.com-20081210222704-465gxu7k0wehug6o
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: xml_cache
timestamp: Wed 2008-12-10 16:39:43 -0600
message:
  Use resize logic to ensure our inventory entry cache is at an optimal size.
  
  We don't want to cache all entries across all inventories, but we are okay caching a
  bit more than would fit in one total inventory.
-------------- next part --------------
=== modified file 'bzrlib/xml5.py'

--- a/bzrlib/xml5.py	2008-12-10 18:29:35 +0000
+++ b/bzrlib/xml5.py	2008-12-10 22:39:43 +0000
@@ -74,6 +74,7 @@
             byid[ie.file_id] = ie
         if revision_id is not None:
             inv.root.revision = revision_id
+        self._check_cache_size(len(inv))
         return inv
 
     def _check_revisions(self, inv):

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2008-12-10 22:27:04 +0000
+++ b/bzrlib/xml8.py	2008-12-10 22:39:43 +0000
@@ -41,7 +41,7 @@
     ">":"&gt;",
     }
 # A cache of InventoryEntry objects
-_entry_cache = fifo_cache.FIFOCache(10*1024)
+_entry_cache = fifo_cache.FIFOCache(10*1000)
 
 
 def _ensure_utf8_re():
@@ -170,6 +170,31 @@
         if inv.root.revision is None:
             raise AssertionError()
 
+    def _check_cache_size(self, inv_size):
+        """Check that the _entry_cache is large enough.
+
+        We want the cache to be ~2x the size of an inventory. The reason is
+        because we use a FIFO cache, and how Inventory records are likely to
+        change. In general, you have a small number of records which change
+        often, and a lot of records which do not change at all. So when the
+        cache gets full, you actually flush out a lot of the records you are
+        interested in, which means you need to recreate all of those records.
+        An LRU Cache would be better, but the overhead negates the cache
+        coherency benefit.
+
+        One way to look at it, only the size of the cache > len(inv) is your
+        'working' set. And in general, it shouldn't be a problem to hold 2
+        inventories in memory anyway.
+
+        :param inv_size: The number of entries in an inventory.
+        """
+        # 1.5 times might also be reasonable.
+        recommended_cache_size = inv_size * 2
+        if _entry_cache.cache_size() < recommended_cache_size:
+            trace.mutter('Resizing the inventory entry cache to %d',
+                         recommended_cache_size)
+            _entry_cache.resize(recommended_cache_size)
+
     def write_inventory_to_lines(self, inv):
         """Return a list of lines with the encoded inventory."""
         return self.write_inventory(inv, None)
@@ -356,6 +381,7 @@
         for e in elt:
             ie = self._unpack_entry(e)
             inv.add(ie)
+        self._check_cache_size(len(inv))
         return inv
 
     def _unpack_entry(self, elt):