Rev 4758: (jam) In GCCHKStreamSource clear caches after we fetch from a VF in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Oct 20 06:12:11 BST 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4758 [merge]
revision-id: pqm at pqm.ubuntu.com-20091020051209-telne1xq2bq6cp7m
parent: pqm at pqm.ubuntu.com-20091019105916-6z2jo34eqr6s0008
parent: john at arbash-meinel.com-20091020042527-ehogm27th7o4htt6
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-10-20 06:12:09 +0100
message:
  (jam) In GCCHKStreamSource clear caches after we fetch from a VF
modified:
  bzrlib/btree_index.py          index.py-20080624222253-p0x5f92uyh5hw734-7
  bzrlib/groupcompress.py        groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
  bzrlib/tests/per_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
  bzrlib/tests/test_groupcompress.py test_groupcompress.p-20080705181503-ccbxd6xuy1bdnrpu-13
  bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-10-15 18:18:44 +0000
+++ b/bzrlib/btree_index.py	2009-10-19 15:39:25 +0000
@@ -853,6 +853,19 @@
             new_tips = next_tips
         return final_offsets
 
+    def clear_cache(self):
+        """Clear out any cached/memoized values.
+
+        This can be called at any time, but generally it is used when we have
+        extracted some information, but don't expect to be requesting any more
+        from this index.
+        """
+        # Note that we don't touch self._root_node or self._internal_node_cache
+        # We don't expect either of those to be big, and it can save
+        # round-trips in the future. We may re-evaluate this if InternalNode
+        # memory starts to be an issue.
+        self._leaf_node_cache.clear()
+
     def external_references(self, ref_list_num):
         if self._root_node is None:
             self._get_root_node()

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-10-17 04:43:14 +0000
+++ b/bzrlib/groupcompress.py	2009-10-19 15:45:10 +0000
@@ -1265,6 +1265,11 @@
         else:
             return self.get_record_stream(keys, 'unordered', True)
 
+    def clear_cache(self):
+        """See VersionedFiles.clear_cache()"""
+        self._group_cache.clear()
+        self._index._graph_index.clear_cache()
+
     def _check_add(self, key, lines, random_id, check_content):
         """check that version_id and lines are safe to add."""
         version_id = key[-1]

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-10-14 13:54:09 +0000
+++ b/bzrlib/index.py	2009-10-19 15:45:10 +0000
@@ -232,6 +232,13 @@
         if self._nodes_by_key is not None and self._key_length > 1:
             self._update_nodes_by_key(key, value, node_refs)
 
+    def clear_cache(self):
+        """See GraphIndex.clear_cache()
+
+        This is a no-op, but we need the api to conform to a generic 'Index'
+        abstraction.
+        """
+        
     def finish(self):
         lines = [_SIGNATURE]
         lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
@@ -461,6 +468,14 @@
             # there must be one line - the empty trailer line.
             raise errors.BadIndexData(self)
 
+    def clear_cache(self):
+        """Clear out any cached/memoized values.
+
+        This can be called at any time, but generally it is used when we have
+        extracted some information, but don't expect to be requesting any more
+        from this index.
+        """
+
     def external_references(self, ref_list_num):
         """Return references that are not present in this index.
         """
@@ -1226,6 +1241,11 @@
                 self.__class__.__name__,
                 ', '.join(map(repr, self._indices)))
 
+    def clear_cache(self):
+        """See GraphIndex.clear_cache()"""
+        for index in self._indices:
+            index.clear_cache()
+
     def get_parent_map(self, keys):
         """See graph.StackedParentsProvider.get_parent_map"""
         search_keys = set(keys)

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-09-25 21:24:21 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-10-19 16:21:20 +0000
@@ -1105,7 +1105,10 @@
         for stream_info in self._fetch_revision_texts(revision_ids):
             yield stream_info
         self._revision_keys = [(rev_id,) for rev_id in revision_ids]
+        self.from_repository.revisions.clear_cache()
+        self.from_repository.signatures.clear_cache()
         yield self._get_inventory_stream(self._revision_keys)
+        self.from_repository.inventories.clear_cache()
         # TODO: The keys to exclude might be part of the search recipe
         # For now, exclude all parents that are at the edge of ancestry, for
         # which we have inventories
@@ -1114,7 +1117,9 @@
                         self._revision_keys)
         for stream_info in self._get_filtered_chk_streams(parent_keys):
             yield stream_info
+        self.from_repository.chk_bytes.clear_cache()
         yield self._get_text_stream()
+        self.from_repository.texts.clear_cache()
 
     def get_stream_for_missing_keys(self, missing_keys):
         # missing keys can only occur when we are byte copying and not

=== modified file 'bzrlib/tests/per_versionedfile.py'
--- a/bzrlib/tests/per_versionedfile.py	2009-08-26 16:44:27 +0000
+++ b/bzrlib/tests/per_versionedfile.py	2009-10-19 15:06:58 +0000
@@ -1581,6 +1581,10 @@
         # All texts should be output.
         self.assertEqual(set(keys), seen)
 
+    def test_clear_cache(self):
+        files = self.get_versionedfiles()
+        files.clear_cache()
+
     def test_construct(self):
         """Each parameterised test can be constructed on a transport."""
         files = self.get_versionedfiles()

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2009-10-09 15:02:19 +0000
+++ b/bzrlib/tests/test_btree_index.py	2009-10-20 04:25:27 +0000
@@ -124,6 +124,12 @@
 
 class TestBTreeBuilder(BTreeTestCase):
 
+    def test_clear_cache(self):
+        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
+        # This is a no-op, but we need the api to be consistent with other
+        # BTreeGraphIndex apis.
+        builder.clear_cache()
+
     def test_empty_1_0(self):
         builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
         # NamedTemporaryFile dies on builder.finish().read(). weird.
@@ -639,6 +645,27 @@
         size = trans.put_file('index', stream)
         return btree_index.BTreeGraphIndex(trans, 'index', size)
 
+    def test_clear_cache(self):
+        nodes = self.make_nodes(160, 2, 2)
+        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
+        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
+        self.assertEqual([1, 4], index._row_lengths)
+        self.assertIsNot(None, index._root_node)
+        internal_node_pre_clear = index._internal_node_cache.keys()
+        self.assertTrue(len(index._leaf_node_cache) > 0)
+        index.clear_cache()
+        # We don't touch _root_node or _internal_node_cache, both should be
+        # small, and can save a round trip or two
+        self.assertIsNot(None, index._root_node)
+        # NOTE: We don't want to affect the _internal_node_cache, as we expect
+        #       it will be small, and if we ever do touch this index again, it
+        #       will save round-trips.  This assertion isn't very strong,
+        #       becuase without a 3-level index, we don't have any internal
+        #       nodes cached.
+        self.assertEqual(internal_node_pre_clear,
+                         index._internal_node_cache.keys())
+        self.assertEqual(0, len(index._leaf_node_cache))
+
     def test_trivial_constructor(self):
         transport = get_transport('trace+' + self.get_url(''))
         index = btree_index.BTreeGraphIndex(transport, 'index', None)

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-10-17 04:43:14 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-10-19 15:06:58 +0000
@@ -459,7 +459,8 @@
                          ], block._dump())
 
 
-class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
+class TestCaseWithGroupCompressVersionedFiles(
+        tests.TestCaseWithMemoryTransport):
 
     def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
                      dir='.', inconsistency_fatal=True):
@@ -732,6 +733,17 @@
                               " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
                               " 0 8', \(\(\('a',\),\),\)\)")
 
+    def test_clear_cache(self):
+        vf = self.make_source_with_b(True, 'source')
+        vf.writer.end()
+        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
+                                           True):
+            pass
+        self.assertTrue(len(vf._group_cache) > 0)
+        vf.clear_cache()
+        self.assertEqual(0, len(vf._group_cache))
+
+
 
 class StubGCVF(object):
     def __init__(self, canned_get_blocks=None):

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2009-09-09 18:52:56 +0000
+++ b/bzrlib/tests/test_index.py	2009-10-19 15:45:10 +0000
@@ -173,6 +173,11 @@
             "key\x00\x00\t\x00data\n"
             "\n", contents)
 
+    def test_clear_cache(self):
+        builder = GraphIndexBuilder(reference_lists=2)
+        # This is a no-op, but the api should exist
+        builder.clear_cache()
+
     def test_node_references_are_byte_offsets(self):
         builder = GraphIndexBuilder(reference_lists=1)
         builder.add_node(('reference', ), 'data', ([], ))
@@ -383,6 +388,12 @@
         size = trans.put_file('index', stream)
         return GraphIndex(trans, 'index', size)
 
+    def test_clear_cache(self):
+        index = self.make_index()
+        # For now, we just want to make sure the api is available. As this is
+        # old code, we don't really worry if it *does* anything.
+        index.clear_cache()
+
     def test_open_bad_index_no_error(self):
         trans = self.get_transport()
         trans.put_bytes('name', "not an index\n")
@@ -1071,6 +1082,30 @@
         index.insert_index(0, index1)
         self.assertEqual([(index1, ('key', ), '')], list(index.iter_all_entries()))
 
+    def test_clear_cache(self):
+        log = []
+
+        class ClearCacheProxy(object):
+
+            def __init__(self, index):
+                self._index = index
+
+            def __getattr__(self, name):
+                return getattr(self._index)
+
+            def clear_cache(self):
+                log.append(self._index)
+                return self._index.clear_cache()
+
+        index = CombinedGraphIndex([])
+        index1 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+        index.insert_index(0, ClearCacheProxy(index1))
+        index2 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+        index.insert_index(1, ClearCacheProxy(index2))
+        # CombinedGraphIndex should call 'clear_cache()' on all children
+        index.clear_cache()
+        self.assertEqual(sorted([index1, index2]), sorted(log))
+
     def test_iter_all_entries_empty(self):
         index = CombinedGraphIndex([])
         self.assertEqual([], list(index.iter_all_entries()))

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2009-08-17 22:08:21 +0000
+++ b/bzrlib/versionedfile.py	2009-10-19 15:06:58 +0000
@@ -930,6 +930,13 @@
     def check_not_reserved_id(version_id):
         revision.check_not_reserved_id(version_id)
 
+    def clear_cache(self):
+        """Clear whatever caches this VersionedFile holds.
+
+        This is generally called after an operation has been performed, when we
+        don't expect to be using this versioned file again soon.
+        """
+
     def _check_lines_not_unicode(self, lines):
         """Check that lines being added to a versioned file are not unicode."""
         for line in lines:




More information about the bazaar-commits mailing list