Rev 3775: Code cleanup. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer
John Arbash Meinel
john at arbash-meinel.com
Tue Oct 14 21:27:41 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer
------------------------------------------------------------
revno: 3775
revision-id: john at arbash-meinel.com-20081014202722-svyfvmyhcieabhyd
parent: john at arbash-meinel.com-20081014201953-gmbxn75fjj8vcie2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_buffer
timestamp: Tue 2008-10-14 15:27:22 -0500
message:
Code cleanup.
Move some functions around to better areas. Fix some function names, so that
they make more sense.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-10-14 20:19:53 +0000
+++ b/bzrlib/btree_index.py 2008-10-14 20:27:22 +0000
@@ -347,12 +347,6 @@
# First key triggers the first row
rows.append(_LeafBuilderRow())
key_count += 1
- # TODO: Flattening the node into a string key and a line should
- # probably be put into a pyrex function. We can do a quick
- # iter over all the entries to determine the final length,
- # and then do a single malloc() rather than lots of
- # intermediate mallocs as we build everything up.
- # ATM 3 / 13s are spent flattening nodes (10s is compressing)
string_key, line = _btree_serializer._flatten_node(node,
self.reference_lists)
self._add_key(string_key, line, rows)
@@ -628,13 +622,7 @@
def __ne__(self, other):
return not self.__eq__(other)
- def _get_root_node(self):
- if self._root_node is None:
- # We may not have a root node yet
- self._get_internal_nodes([0])
- return self._root_node
-
- def _cache_nodes(self, nodes, cache):
+ def _get_and_cache_nodes(self, nodes):
"""Read nodes and cache them in the lru.
The nodes list supplied is sorted and then read from disk, each node
@@ -647,9 +635,6 @@
:return: A dict of {node_pos: node}
"""
- if len(nodes) > cache._max_cache:
- trace.mutter('Requesting %s > %s nodes, not all will be cached',
- len(nodes), cache._max_cache)
found = {}
start_of_leaves = None
for node_pos, node in self._read_nodes(sorted(nodes)):
@@ -662,7 +647,6 @@
self._internal_node_cache.add(node_pos, node)
else:
self._leaf_node_cache.add(node_pos, node)
- cache.add(node_pos, node)
found[node_pos] = node
return found
@@ -686,29 +670,6 @@
num_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
return num_pages
- def _get_cached_nodes(self):
- """Determine what nodes we already have cached."""
- cached_nodes = set(self._internal_node_cache.keys())
- cached_nodes.update(self._leaf_node_cache.keys())
- if self._root_node is not None:
- cached_nodes.add(0)
- return cached_nodes
-
- def _find_layer_first_and_end(self, offset):
- """Find the start/stop nodes for the layer corresponding to offset.
-
- :return: (first, end)
- first is the first node in this layer
- end is the first node of the next layer
- """
- first = end = 0
- for roffset in self._row_offsets:
- first = end
- end = roffset
- if offset < roffset:
- break
- return first, end
-
def _expand_nodes(self, node_indexes):
"""Find extra nodes to download.
@@ -734,23 +695,22 @@
if 'index' in debug.debug_flags:
trace.mutter(' not expanding without knowing index size')
return node_indexes
- # TODO: Should this be cached instead?
num_pages = self._compute_num_pages()
# The idea here is to get nodes "next to" the ones we are already
# getting.
- cached_nodes = self._get_cached_nodes()
+ cached_keys = self._get_cached_keys()
- # if len(cached_nodes) < 2:
+ # if len(cached_keys) < 2:
# # We haven't read enough to justify expansion
# return node_indexes
# If reading recommended_pages would read the rest of the index, just
# do so.
- if num_pages - len(cached_nodes) <= self._recommended_pages:
+ if num_pages - len(cached_keys) <= self._recommended_pages:
# Read whatever is left
- if cached_nodes:
+ if cached_keys:
expanded = [x for x in xrange(num_pages)
- if x not in cached_nodes]
+ if x not in cached_keys]
else:
expanded = range(num_pages)
if 'index' in debug.debug_flags:
@@ -783,13 +743,13 @@
first, end = self._find_layer_first_and_end(pos)
previous = pos - 1
if (previous > 0
- and previous not in cached_nodes
+ and previous not in cached_keys
and previous not in final_nodes
and previous >= first):
next_tips.add(previous)
after = pos + 1
if (after < num_pages
- and after not in cached_nodes
+ and after not in cached_keys
and after not in final_nodes
and after < end):
next_tips.add(after)
@@ -807,6 +767,35 @@
trace.mutter('expanded: %s', final_nodes)
return final_nodes
+ def _find_layer_first_and_end(self, offset):
+ """Find the start/stop nodes for the layer corresponding to offset.
+
+ :return: (first, end)
+ first is the first node in this layer
+ end is the first node of the next layer
+ """
+ first = end = 0
+ for roffset in self._row_offsets:
+ first = end
+ end = roffset
+ if offset < roffset:
+ break
+ return first, end
+
+ def _get_cached_keys(self):
+ """Determine what nodes we already have cached."""
+ cached_keys = set(self._internal_node_cache.keys())
+ cached_keys.update(self._leaf_node_cache.keys())
+ if self._root_node is not None:
+ cached_keys.add(0)
+ return cached_keys
+
+ def _get_root_node(self):
+ if self._root_node is None:
+ # We may not have a root node yet
+ self._get_internal_nodes([0])
+ return self._root_node
+
def _get_nodes(self, cache, node_indexes):
found = {}
needed = []
@@ -819,10 +808,9 @@
except KeyError:
needed.append(idx)
if not needed:
- # trace.note('cached: %s\tnodes: %s', self._name[-14:], node_indexes)
return found
needed = self._expand_nodes(needed)
- found.update(self._cache_nodes(needed, cache))
+ found.update(self._get_and_cache_nodes(needed))
return found
def _get_internal_nodes(self, node_indexes):
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-10-14 19:31:34 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-10-14 20:27:22 +0000
@@ -1012,15 +1012,15 @@
index._recommended_pages = recommended_pages
return index
- def set_cached_nodes(self, index, cached_nodes):
- """Modify the index to give a canned answer for _get_cached_nodes()."""
- def _get_cached_nodes():
- cached = set(cached_nodes)
+ def set_cached_keys(self, index, cached_keys):
+ """Modify the index to give a canned answer for _get_cached_keys()."""
+ def _get_cached_keys():
+ cached = set(cached_keys)
return cached
- index._get_cached_nodes = _get_cached_nodes
+ index._get_cached_keys = _get_cached_keys
def prepare_index(self, index, node_ref_lists, key_length, key_count,
- row_lengths, cached_nodes):
+ row_lengths, cached_keys):
"""Setup the BTreeGraphIndex with some pre-canned information."""
index.node_ref_lists = node_ref_lists
index._key_length = key_length
@@ -1028,20 +1028,20 @@
index._row_lengths = row_lengths
index._compute_row_offsets()
index._root_node = btree_index._InternalNode('internal\noffset=0\n')
- self.set_cached_nodes(index, cached_nodes)
+ self.set_cached_keys(index, cached_keys)
def make_100_node_index(self):
index = self.make_index(4096*100, 6)
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=1000, row_lengths=[1, 99],
- cached_nodes=[0])
+ cached_keys=[0])
return index
def make_1000_node_index(self):
index = self.make_index(4096*1000, 6)
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=90000, row_lengths=[1, 9, 990],
- cached_nodes=[0])
+ cached_keys=[0])
return index
def assertNumPages(self, expected_pages, index, size):
@@ -1094,7 +1094,7 @@
index = self.make_index(4096*10, 5)
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=1000, row_lengths=[1, 9],
- cached_nodes=[0, 1, 2, 5, 6])
+ cached_keys=[0, 1, 2, 5, 6])
# It should fill the remaining nodes, regardless of the one requested
self.assertExpandNodes([3, 4, 7, 8, 9], index, [3])
self.assertExpandNodes([3, 4, 7, 8, 9], index, [8])
@@ -1121,7 +1121,7 @@
def test_stop_at_cached(self):
index = self.make_100_node_index()
- self.set_cached_nodes(index, [0, 10, 19])
+ self.set_cached_keys(index, [0, 10, 19])
self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [11])
self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [12])
self.assertExpandNodes([12, 13, 14, 15, 16, 17, 18], index, [15])
@@ -1131,7 +1131,7 @@
def test_cannot_fully_expand(self):
index = self.make_100_node_index()
- self.set_cached_nodes(index, [0, 10, 12])
+ self.set_cached_keys(index, [0, 10, 12])
# We don't go into an endless loop if we are bound by cached nodes
self.assertExpandNodes([11], index, [11])
@@ -1149,6 +1149,6 @@
self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [10])
self.assertExpandNodes([10, 11, 12, 13, 14, 15, 16], index, [13])
- self.set_cached_nodes(index, [0, 4, 12])
+ self.set_cached_keys(index, [0, 4, 12])
self.assertExpandNodes([5, 6, 7, 8, 9], index, [7])
self.assertExpandNodes([10, 11], index, [11])
More information about the bazaar-commits
mailing list