Rev 4758: (jam) In GCCHKStreamSource clear caches after we fetch from a VF in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Oct 20 06:12:11 BST 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4758 [merge]
revision-id: pqm at pqm.ubuntu.com-20091020051209-telne1xq2bq6cp7m
parent: pqm at pqm.ubuntu.com-20091019105916-6z2jo34eqr6s0008
parent: john at arbash-meinel.com-20091020042527-ehogm27th7o4htt6
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-10-20 06:12:09 +0100
message:
(jam) In GCCHKStreamSource clear caches after we fetch from a VF
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
bzrlib/tests/per_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_groupcompress.py test_groupcompress.p-20080705181503-ccbxd6xuy1bdnrpu-13
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-10-15 18:18:44 +0000
+++ b/bzrlib/btree_index.py 2009-10-19 15:39:25 +0000
@@ -853,6 +853,19 @@
new_tips = next_tips
return final_offsets
+ def clear_cache(self):
+ """Clear out any cached/memoized values.
+
+ This can be called at any time, but generally it is used when we have
+ extracted some information, but don't expect to be requesting any more
+ from this index.
+ """
+ # Note that we don't touch self._root_node or self._internal_node_cache
+ # We don't expect either of those to be big, and it can save
+ # round-trips in the future. We may re-evaluate this if InternalNode
+ # memory starts to be an issue.
+ self._leaf_node_cache.clear()
+
def external_references(self, ref_list_num):
if self._root_node is None:
self._get_root_node()
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-10-17 04:43:14 +0000
+++ b/bzrlib/groupcompress.py 2009-10-19 15:45:10 +0000
@@ -1265,6 +1265,11 @@
else:
return self.get_record_stream(keys, 'unordered', True)
+ def clear_cache(self):
+ """See VersionedFiles.clear_cache()"""
+ self._group_cache.clear()
+ self._index._graph_index.clear_cache()
+
def _check_add(self, key, lines, random_id, check_content):
"""check that version_id and lines are safe to add."""
version_id = key[-1]
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-10-14 13:54:09 +0000
+++ b/bzrlib/index.py 2009-10-19 15:45:10 +0000
@@ -232,6 +232,13 @@
if self._nodes_by_key is not None and self._key_length > 1:
self._update_nodes_by_key(key, value, node_refs)
+ def clear_cache(self):
+ """See GraphIndex.clear_cache()
+
+ This is a no-op, but we need the api to conform to a generic 'Index'
+ abstraction.
+ """
+
def finish(self):
lines = [_SIGNATURE]
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
@@ -461,6 +468,14 @@
# there must be one line - the empty trailer line.
raise errors.BadIndexData(self)
+ def clear_cache(self):
+ """Clear out any cached/memoized values.
+
+ This can be called at any time, but generally it is used when we have
+ extracted some information, but don't expect to be requesting any more
+ from this index.
+ """
+
def external_references(self, ref_list_num):
"""Return references that are not present in this index.
"""
@@ -1226,6 +1241,11 @@
self.__class__.__name__,
', '.join(map(repr, self._indices)))
+ def clear_cache(self):
+ """See GraphIndex.clear_cache()"""
+ for index in self._indices:
+ index.clear_cache()
+
def get_parent_map(self, keys):
"""See graph.StackedParentsProvider.get_parent_map"""
search_keys = set(keys)
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-09-25 21:24:21 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-10-19 16:21:20 +0000
@@ -1105,7 +1105,10 @@
for stream_info in self._fetch_revision_texts(revision_ids):
yield stream_info
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
+ self.from_repository.revisions.clear_cache()
+ self.from_repository.signatures.clear_cache()
yield self._get_inventory_stream(self._revision_keys)
+ self.from_repository.inventories.clear_cache()
# TODO: The keys to exclude might be part of the search recipe
# For now, exclude all parents that are at the edge of ancestry, for
# which we have inventories
@@ -1114,7 +1117,9 @@
self._revision_keys)
for stream_info in self._get_filtered_chk_streams(parent_keys):
yield stream_info
+ self.from_repository.chk_bytes.clear_cache()
yield self._get_text_stream()
+ self.from_repository.texts.clear_cache()
def get_stream_for_missing_keys(self, missing_keys):
# missing keys can only occur when we are byte copying and not
=== modified file 'bzrlib/tests/per_versionedfile.py'
--- a/bzrlib/tests/per_versionedfile.py 2009-08-26 16:44:27 +0000
+++ b/bzrlib/tests/per_versionedfile.py 2009-10-19 15:06:58 +0000
@@ -1581,6 +1581,10 @@
# All texts should be output.
self.assertEqual(set(keys), seen)
+ def test_clear_cache(self):
+ files = self.get_versionedfiles()
+ files.clear_cache()
+
def test_construct(self):
"""Each parameterised test can be constructed on a transport."""
files = self.get_versionedfiles()
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-10-09 15:02:19 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-10-20 04:25:27 +0000
@@ -124,6 +124,12 @@
class TestBTreeBuilder(BTreeTestCase):
+ def test_clear_cache(self):
+ builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
+ # This is a no-op, but we need the api to be consistent with other
+ # BTreeGraphIndex apis.
+ builder.clear_cache()
+
def test_empty_1_0(self):
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
# NamedTemporaryFile dies on builder.finish().read(). weird.
@@ -639,6 +645,27 @@
size = trans.put_file('index', stream)
return btree_index.BTreeGraphIndex(trans, 'index', size)
+ def test_clear_cache(self):
+ nodes = self.make_nodes(160, 2, 2)
+ index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
+ self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
+ self.assertEqual([1, 4], index._row_lengths)
+ self.assertIsNot(None, index._root_node)
+ internal_node_pre_clear = index._internal_node_cache.keys()
+ self.assertTrue(len(index._leaf_node_cache) > 0)
+ index.clear_cache()
+ # We don't touch _root_node or _internal_node_cache, both should be
+ # small, and can save a round trip or two
+ self.assertIsNot(None, index._root_node)
+ # NOTE: We don't want to affect the _internal_node_cache, as we expect
+ # it will be small, and if we ever do touch this index again, it
+ # will save round-trips. This assertion isn't very strong,
+ # becuase without a 3-level index, we don't have any internal
+ # nodes cached.
+ self.assertEqual(internal_node_pre_clear,
+ index._internal_node_cache.keys())
+ self.assertEqual(0, len(index._leaf_node_cache))
+
def test_trivial_constructor(self):
transport = get_transport('trace+' + self.get_url(''))
index = btree_index.BTreeGraphIndex(transport, 'index', None)
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-10-17 04:43:14 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-10-19 15:06:58 +0000
@@ -459,7 +459,8 @@
], block._dump())
-class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
+class TestCaseWithGroupCompressVersionedFiles(
+ tests.TestCaseWithMemoryTransport):
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
dir='.', inconsistency_fatal=True):
@@ -732,6 +733,17 @@
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
" 0 8', \(\(\('a',\),\),\)\)")
+ def test_clear_cache(self):
+ vf = self.make_source_with_b(True, 'source')
+ vf.writer.end()
+ for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
+ True):
+ pass
+ self.assertTrue(len(vf._group_cache) > 0)
+ vf.clear_cache()
+ self.assertEqual(0, len(vf._group_cache))
+
+
class StubGCVF(object):
def __init__(self, canned_get_blocks=None):
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2009-09-09 18:52:56 +0000
+++ b/bzrlib/tests/test_index.py 2009-10-19 15:45:10 +0000
@@ -173,6 +173,11 @@
"key\x00\x00\t\x00data\n"
"\n", contents)
+ def test_clear_cache(self):
+ builder = GraphIndexBuilder(reference_lists=2)
+ # This is a no-op, but the api should exist
+ builder.clear_cache()
+
def test_node_references_are_byte_offsets(self):
builder = GraphIndexBuilder(reference_lists=1)
builder.add_node(('reference', ), 'data', ([], ))
@@ -383,6 +388,12 @@
size = trans.put_file('index', stream)
return GraphIndex(trans, 'index', size)
+ def test_clear_cache(self):
+ index = self.make_index()
+ # For now, we just want to make sure the api is available. As this is
+ # old code, we don't really worry if it *does* anything.
+ index.clear_cache()
+
def test_open_bad_index_no_error(self):
trans = self.get_transport()
trans.put_bytes('name', "not an index\n")
@@ -1071,6 +1082,30 @@
index.insert_index(0, index1)
self.assertEqual([(index1, ('key', ), '')], list(index.iter_all_entries()))
+ def test_clear_cache(self):
+ log = []
+
+ class ClearCacheProxy(object):
+
+ def __init__(self, index):
+ self._index = index
+
+ def __getattr__(self, name):
+ return getattr(self._index)
+
+ def clear_cache(self):
+ log.append(self._index)
+ return self._index.clear_cache()
+
+ index = CombinedGraphIndex([])
+ index1 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+ index.insert_index(0, ClearCacheProxy(index1))
+ index2 = self.make_index('name', 0, nodes=[(('key', ), '', ())])
+ index.insert_index(1, ClearCacheProxy(index2))
+ # CombinedGraphIndex should call 'clear_cache()' on all children
+ index.clear_cache()
+ self.assertEqual(sorted([index1, index2]), sorted(log))
+
def test_iter_all_entries_empty(self):
index = CombinedGraphIndex([])
self.assertEqual([], list(index.iter_all_entries()))
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2009-08-17 22:08:21 +0000
+++ b/bzrlib/versionedfile.py 2009-10-19 15:06:58 +0000
@@ -930,6 +930,13 @@
def check_not_reserved_id(version_id):
revision.check_not_reserved_id(version_id)
+ def clear_cache(self):
+ """Clear whatever caches this VersionedFile holds.
+
+ This is generally called after an operation has been performed, when we
+ don't expect to be using this versioned file again soon.
+ """
+
def _check_lines_not_unicode(self, lines):
"""Check that lines being added to a versioned file are not unicode."""
for line in lines:
More information about the bazaar-commits
mailing list