Rev 3894: Use resize logic to ensure our inventory entry cache is at an optimal size. in http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache
John Arbash Meinel
john at arbash-meinel.com
Wed Dec 10 22:40:09 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache
------------------------------------------------------------
revno: 3894
revision-id: john at arbash-meinel.com-20081210223943-aqedq91tf7e6tecs
parent: john at arbash-meinel.com-20081210222704-465gxu7k0wehug6o
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: xml_cache
timestamp: Wed 2008-12-10 16:39:43 -0600
message:
Use resize logic to ensure our inventory entry cache is at an optimal size.
We don't want to cache all entries across all inventories, but we are okay caching a
bit more than would fit in one total inventory.
-------------- next part --------------
=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py 2008-12-10 18:29:35 +0000
+++ b/bzrlib/xml5.py 2008-12-10 22:39:43 +0000
@@ -74,6 +74,7 @@
byid[ie.file_id] = ie
if revision_id is not None:
inv.root.revision = revision_id
+ self._check_cache_size(len(inv))
return inv
def _check_revisions(self, inv):
=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py 2008-12-10 22:27:04 +0000
+++ b/bzrlib/xml8.py 2008-12-10 22:39:43 +0000
@@ -41,7 +41,7 @@
">":">",
}
# A cache of InventoryEntry objects
-_entry_cache = fifo_cache.FIFOCache(10*1024)
+_entry_cache = fifo_cache.FIFOCache(10*1000)
def _ensure_utf8_re():
@@ -170,6 +170,31 @@
if inv.root.revision is None:
raise AssertionError()
+ def _check_cache_size(self, inv_size):
+ """Check that the _entry_cache is large enough.
+
+ We want the cache to be ~2x the size of an inventory. The reason is
+ because we use a FIFO cache, and how Inventory records are likely to
+ change. In general, you have a small number of records which change
+ often, and a lot of records which do not change at all. So when the
+ cache gets full, you actually flush out a lot of the records you are
+ interested in, which means you need to recreate all of those records.
+ An LRU Cache would be better, but the overhead negates the cache
+ coherency benefit.
+
+ One way to look at it, only the size of the cache > len(inv) is your
+ 'working' set. And in general, it shouldn't be a problem to hold 2
+ inventories in memory anyway.
+
+ :param inv_size: The number of entries in an inventory.
+ """
+ # 1.5 times might also be reasonable.
+ recommended_cache_size = inv_size * 2
+ if _entry_cache.cache_size() < recommended_cache_size:
+ trace.mutter('Resizing the inventory entry cache to %d',
+ recommended_cache_size)
+ _entry_cache.resize(recommended_cache_size)
+
def write_inventory_to_lines(self, inv):
"""Return a list of lines with the encoded inventory."""
return self.write_inventory(inv, None)
@@ -356,6 +381,7 @@
for e in elt:
ie = self._unpack_entry(e)
inv.add(ie)
+ self._check_cache_size(len(inv))
return inv
def _unpack_entry(self, elt):
More information about the bazaar-commits
mailing list