Rev 4750: Start exposing an GraphIndex.clear_cache() member. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-peak-mem-tweak

John Arbash Meinel john at arbash-meinel.com
Mon Oct 19 16:39:32 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-peak-mem-tweak

------------------------------------------------------------
revno: 4750
revision-id: john at arbash-meinel.com-20091019153925-pkvnaoho6a2aawj7
parent: john at arbash-meinel.com-20091019150658-tot6ofpswqytwzpj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-peak-mem-tweak
timestamp: Mon 2009-10-19 10:39:25 -0500
message:
  Start exposing an GraphIndex.clear_cache() member.
  This is exposed on GraphIndex, CombinedGraphIndex and BTreeGraphIndex.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-10-15 18:18:44 +0000
+++ b/bzrlib/btree_index.py	2009-10-19 15:39:25 +0000
@@ -853,6 +853,19 @@
             new_tips = next_tips
         return final_offsets
 
+    def clear_cache(self):
+        """Clear out any cached/memoized values.
+
+        This can be called at any time, but generally it is used when we have
+        extracted some information, but don't expect to be requesting any more
+        from this index.
+        """
+        # Note that we don't touch self._root_node or self._internal_node_cache
+        # We don't expect either of those to be big, and it can save
+        # round-trips in the future. We may re-evaluate this if InternalNode
+        # memory starts to be an issue.
+        self._leaf_node_cache.clear()
+
     def external_references(self, ref_list_num):
         if self._root_node is None:
             self._get_root_node()

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-10-14 13:54:09 +0000
+++ b/bzrlib/index.py	2009-10-19 15:39:25 +0000
@@ -461,6 +461,14 @@
             # there must be one line - the empty trailer line.
             raise errors.BadIndexData(self)
 
+    def clear_cache(self):
+        """Clear out any cached/memoized values.
+
+        This can be called at any time, but generally it is used when we have
+        extracted some information, but don't expect to be requesting any more
+        from this index.
+        """
+
     def external_references(self, ref_list_num):
         """Return references that are not present in this index.
         """
@@ -1226,6 +1234,11 @@
                 self.__class__.__name__,
                 ', '.join(map(repr, self._indices)))
 
+    def clear_cache(self):
+        """See GraphIndex.clear_cache()"""
+        for index in self._indices:
+            index.clear_cache()
+
     def get_parent_map(self, keys):
         """See graph.StackedParentsProvider.get_parent_map"""
         search_keys = set(keys)

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2009-10-09 15:02:19 +0000
+++ b/bzrlib/tests/test_btree_index.py	2009-10-19 15:39:25 +0000
@@ -639,6 +639,25 @@
         size = trans.put_file('index', stream)
         return btree_index.BTreeGraphIndex(trans, 'index', size)
 
+    def test_clear_cache(self):
+        nodes = self.make_nodes(160, 2, 2)
+        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
+        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
+        self.assertEqual([1, 4], index._row_lengths)
+        self.assertIsNot(None, index._root_node)
+        # NOTE: we don't want to affect the _internal_node_cache, as we expect
+        #       it will be small, and if we ever do touch this index again, it
+        #       will save round-trips. However, it requires a 3-level tree to
+        #       test this...
+        # self.assertTrue(len(index._internal_node_cache) > 0)
+        self.assertTrue(len(index._leaf_node_cache) > 0)
+        index.clear_cache()
+        # We don't touch _root_node or _internal_node_cache, both should be
+        # small, and can save a round trip or two
+        self.assertIsNot(None, index._root_node)
+        # self.assertTrue(len(index._internal_node_cache) > 0)
+        self.assertEqual(0, len(index._leaf_node_cache))
+
     def test_trivial_constructor(self):
         transport = get_transport('trace+' + self.get_url(''))
         index = btree_index.BTreeGraphIndex(transport, 'index', None)

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2009-09-09 18:52:56 +0000
+++ b/bzrlib/tests/test_index.py	2009-10-19 15:39:25 +0000
@@ -383,6 +383,12 @@
         size = trans.put_file('index', stream)
         return GraphIndex(trans, 'index', size)
 
+    def test_clear_cache(self):
+        index = self.make_index()
+        # For now, we just want to make sure the api is available. As this is
+        # old code, we don't really worry if it *does* anything.
+        index.clear_cache()
+
     def test_open_bad_index_no_error(self):
         trans = self.get_transport()
         trans.put_bytes('name', "not an index\n")
@@ -1071,6 +1077,30 @@
         index.insert_index(0, index1)
         self.assertEqual([(index1, ('key', ), '')], list(index.iter_all_entries()))
 
+    def test_clear_cache(self):
+        log = []
+
+        class ClearCacheProxy(object):
+
+            def __init__(self, index):
+                self._index = index
+
+            def __getattr__(self, name):
+                return getattr(self._index)
+
+            def clear_cache(self):
+                log.append(self._index)
+                return self._index.clear_cache()
+
+        index = CombinedGraphIndex([])
+        index1 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+        index.insert_index(0, ClearCacheProxy(index1))
+        index2 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+        index.insert_index(1, ClearCacheProxy(index2))
+        # CombinedGraphIndex should call 'clear_cache()' on all children
+        index.clear_cache()
+        self.assertEqual(sorted([index1, index2]), sorted(log))
+
     def test_iter_all_entries_empty(self):
         index = CombinedGraphIndex([])
         self.assertEqual([], list(index.iter_all_entries()))



More information about the bazaar-commits mailing list