Rev 3778: Review comments from Martin. Code clarity/variable name/docstring updates. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer
John Arbash Meinel
john at arbash-meinel.com
Tue Oct 28 19:38:01 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/btree_buffer
------------------------------------------------------------
revno: 3778
revision-id: john at arbash-meinel.com-20081028193747-0qmbzudogd1pjstk
parent: john at arbash-meinel.com-20081014213527-4j9uc93aq1qmn43b
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_buffer
timestamp: Tue 2008-10-28 14:37:47 -0500
message:
Review comments from Martin. Code clarity/variable name/docstring updates.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-10-14 21:35:27 +0000
+++ b/bzrlib/btree_index.py 2008-10-28 19:37:47 +0000
@@ -651,64 +651,71 @@
return found
def _compute_recommended_pages(self):
- """Given the transport's recommended_page_size, determine num pages."""
+ """Convert transport's recommended_page_size into btree pages.
+
+ recommended_page_size is in bytes, we want to know how many _PAGE_SIZE
+ pages fit in that length.
+ """
recommended_read = self._transport.recommended_page_size()
recommended_pages = int(math.ceil(recommended_read /
float(_PAGE_SIZE)))
return recommended_pages
- def _compute_num_pages(self):
- """Determine the offset to the last page in this index."""
+ def _compute_total_pages_in_index(self):
+ """How many pages are in the index.
+
+ If we have read the header we will use the value stored there.
+ Otherwise it will be computed based on the length of the index.
+ """
if self._size is None:
- raise AssertionError('_compute_num_pages should not be called'
- ' when self._size is None')
+ raise AssertionError('_compute_total_pages_in_index should not be'
+ ' called when self._size is None')
if self._root_node is not None:
# This is the number of pages as defined by the header
return self._row_offsets[-1]
# This is the number of pages as defined by the size of the index. They
# should be indentical.
- num_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
- return num_pages
+ total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
+ return total_pages
- def _expand_nodes(self, node_indexes):
- """Find extra nodes to download.
+ def _expand_offsets(self, offsets):
+ """Find extra pages to download.
The idea is that we always want to make big-enough requests (like 64kB
for http), so that we don't waste round trips. So given the entries
- that we already have cached, and the new nodes being downloaded, figure
+ that we already have cached and the new pages being downloaded figure
out what other pages we might want to read.
- :param node_indexes: The nodes that have been requested to read.
- :return: A new list of nodes to download
+ See also doc/developers/btree_index_prefetch.txt for more details.
+
+ :param offsets: The offsets to be read
+ :return: A list of offsets to download
"""
if 'index' in debug.debug_flags:
- trace.mutter('expanding: %s\tnodes: %s', self._name, node_indexes)
+ trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)
- if len(node_indexes) >= self._recommended_pages:
+ if len(offsets) >= self._recommended_pages:
# Don't add more, we are already requesting more than enough
if 'index' in debug.debug_flags:
trace.mutter(' not expanding large request (%s >= %s)',
- len(node_indexes), self._recommended_pages)
- return node_indexes
+ len(offsets), self._recommended_pages)
+ return offsets
if self._size is None:
# Don't try anything, because we don't know where the file ends
if 'index' in debug.debug_flags:
trace.mutter(' not expanding without knowing index size')
- return node_indexes
- num_pages = self._compute_num_pages()
- # The idea here is to get nodes "next to" the ones we are already
- # getting.
- cached_keys = self._get_cached_keys()
-
+ return offsets
+ total_pages = self._compute_total_pages_in_index()
+ cached_offsets = self._get_offsets_to_cached_pages()
# If reading recommended_pages would read the rest of the index, just
# do so.
- if num_pages - len(cached_keys) <= self._recommended_pages:
+ if total_pages - len(cached_offsets) <= self._recommended_pages:
# Read whatever is left
- if cached_keys:
- expanded = [x for x in xrange(num_pages)
- if x not in cached_keys]
+ if cached_offsets:
+ expanded = [x for x in xrange(total_pages)
+ if x not in cached_offsets]
else:
- expanded = range(num_pages)
+ expanded = range(total_pages)
if 'index' in debug.debug_flags:
trace.mutter(' reading all unread pages: %s', expanded)
return expanded
@@ -720,10 +727,10 @@
# See doc/developers/btree_index_prefetch.txt for a discussion, and
# a possible implementation when we are guessing that the second
# layer index is small
- final_nodes = node_indexes
+ final_offsets = offsets
else:
tree_depth = len(self._row_lengths)
- if len(cached_keys) < tree_depth and len(node_indexes) == 1:
+ if len(cached_offsets) < tree_depth and len(offsets) == 1:
# We haven't read enough to justify expansion
# If we are only going to read the root node, and 1 leaf node,
# then it isn't worth expanding our request. Once we've read at
@@ -731,47 +738,58 @@
# start expanding our requests.
if 'index' in debug.debug_flags:
trace.mutter(' not expanding on first reads')
- return node_indexes
-
- # Expand requests to neighbors until we have at least
- # recommended_pages to request. We only want to expand requests
- # within a given layer. We cheat a little bit and assume all
- # requests will be in the same layer. This is true given the
- # current design, but if it changes this algorithm may perform
- # oddly.
- final_nodes = set(node_indexes)
- first = end = None
- new_tips = set(final_nodes)
- while len(final_nodes) < self._recommended_pages and new_tips:
- next_tips = set()
- for pos in new_tips:
- if first is None:
- first, end = self._find_layer_first_and_end(pos)
- previous = pos - 1
- if (previous > 0
- and previous not in cached_keys
- and previous not in final_nodes
- and previous >= first):
- next_tips.add(previous)
- after = pos + 1
- if (after < num_pages
- and after not in cached_keys
- and after not in final_nodes
- and after < end):
- next_tips.add(after)
- # This would keep us from going bigger than
- # recommended_pages by only expanding the first nodes.
- # However, if we are making a 'wide' request, it is
- # reasonable to expand all points equally.
- # if len(final_nodes) > recommended_pages:
- # break
- final_nodes.update(next_tips)
- new_tips = next_tips
-
- final_nodes = sorted(final_nodes)
+ return offsets
+ final_offsets = self._expand_to_neighbors(offsets, cached_offsets,
+ total_pages)
+
+ final_offsets = sorted(final_offsets)
if 'index' in debug.debug_flags:
- trace.mutter('expanded: %s', final_nodes)
- return final_nodes
+ trace.mutter('expanded: %s', final_offsets)
+ return final_offsets
+
+ def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):
+ """Expand requests to neighbors until we have enough pages.
+
+ This is called from _expand_offsets after policy has determined that we
+ want to expand.
+ We only want to expand requests within a given layer. We cheat a little
+ bit and assume all requests will be in the same layer. This is true
+ given the current design, but if it changes this algorithm may perform
+ oddly.
+
+ :param offsets: requested offsets
+ :param cached_offsets: offsets for pages we currently have cached
+ :return: A set() of offsets after expansion
+ """
+ final_offsets = set(offsets)
+ first = end = None
+ new_tips = set(final_offsets)
+ while len(final_offsets) < self._recommended_pages and new_tips:
+ next_tips = set()
+ for pos in new_tips:
+ if first is None:
+ first, end = self._find_layer_first_and_end(pos)
+ previous = pos - 1
+ if (previous > 0
+ and previous not in cached_offsets
+ and previous not in final_offsets
+ and previous >= first):
+ next_tips.add(previous)
+ after = pos + 1
+ if (after < total_pages
+ and after not in cached_offsets
+ and after not in final_offsets
+ and after < end):
+ next_tips.add(after)
+ # This would keep us from going bigger than
+ # recommended_pages by only expanding the first offsets.
+ # However, if we are making a 'wide' request, it is
+ # reasonable to expand all points equally.
+ # if len(final_offsets) > recommended_pages:
+ # break
+ final_offsets.update(next_tips)
+ new_tips = next_tips
+ return final_offsets
def _find_layer_first_and_end(self, offset):
"""Find the start/stop nodes for the layer corresponding to offset.
@@ -788,13 +806,13 @@
break
return first, end
- def _get_cached_keys(self):
+ def _get_offsets_to_cached_pages(self):
"""Determine what nodes we already have cached."""
- cached_keys = set(self._internal_node_cache.keys())
- cached_keys.update(self._leaf_node_cache.keys())
+ cached_offsets = set(self._internal_node_cache.keys())
+ cached_offsets.update(self._leaf_node_cache.keys())
if self._root_node is not None:
- cached_keys.add(0)
- return cached_keys
+ cached_offsets.add(0)
+ return cached_offsets
def _get_root_node(self):
if self._root_node is None:
@@ -815,7 +833,7 @@
needed.append(idx)
if not needed:
return found
- needed = self._expand_nodes(needed)
+ needed = self._expand_offsets(needed)
found.update(self._get_and_cache_nodes(needed))
return found
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-10-14 21:35:27 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-10-28 19:37:47 +0000
@@ -998,7 +998,7 @@
['c', 'd', 'f', 'g'])
-class TestExpandNodes(tests.TestCase):
+class TestExpandOffsets(tests.TestCase):
def make_index(self, size, recommended_pages=None):
"""Make an index with a generic size.
@@ -1012,15 +1012,15 @@
index._recommended_pages = recommended_pages
return index
- def set_cached_keys(self, index, cached_keys):
- """Modify the index to give a canned answer for _get_cached_keys()."""
- def _get_cached_keys():
- cached = set(cached_keys)
+ def set_cached_offsets(self, index, cached_offsets):
+ """Monkeypatch to give a canned answer for _get_offsets_for...()."""
+ def _get_offsets_to_cached_pages():
+ cached = set(cached_offsets)
return cached
- index._get_cached_keys = _get_cached_keys
+ index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
def prepare_index(self, index, node_ref_lists, key_length, key_count,
- row_lengths, cached_keys):
+ row_lengths, cached_offsets):
"""Setup the BTreeGraphIndex with some pre-canned information."""
index.node_ref_lists = node_ref_lists
index._key_length = key_length
@@ -1028,14 +1028,14 @@
index._row_lengths = row_lengths
index._compute_row_offsets()
index._root_node = btree_index._InternalNode('internal\noffset=0\n')
- self.set_cached_keys(index, cached_keys)
+ self.set_cached_offsets(index, cached_offsets)
def make_100_node_index(self):
index = self.make_index(4096*100, 6)
# Consider we've already made a single request at the middle
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=1000, row_lengths=[1, 99],
- cached_keys=[0, 50])
+ cached_offsets=[0, 50])
return index
def make_1000_node_index(self):
@@ -1043,22 +1043,24 @@
# Pretend we've already made a single request in the middle
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=90000, row_lengths=[1, 9, 990],
- cached_keys=[0, 5, 500])
+ cached_offsets=[0, 5, 500])
return index
def assertNumPages(self, expected_pages, index, size):
index._size = size
- self.assertEqual(expected_pages, index._compute_num_pages())
+ self.assertEqual(expected_pages, index._compute_total_pages_in_index())
- def assertExpandNodes(self, expected, index, node_indexes):
- self.assertEqual(expected, index._expand_nodes(node_indexes))
+ def assertExpandOffsets(self, expected, index, offsets):
+ self.assertEqual(expected, index._expand_offsets(offsets),
+ 'We did not get the expected value after expanding'
+ ' %s' % (offsets,))
def test_default_recommended_pages(self):
index = self.make_index(None)
# local transport recommends 4096 byte reads, which is 1 page
self.assertEqual(1, index._recommended_pages)
- def test__compute_num_pages(self):
+ def test__compute_total_pages_in_index(self):
index = self.make_index(None)
self.assertNumPages(1, index, 1024)
self.assertNumPages(1, index, 4095)
@@ -1079,100 +1081,100 @@
def test_unknown_size(self):
# We should not expand if we don't know the file size
index = self.make_index(None, 10)
- self.assertExpandNodes([0], index, [0])
- self.assertExpandNodes([1, 4, 9], index, [1, 4, 9])
+ self.assertExpandOffsets([0], index, [0])
+ self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
def test_more_than_recommended(self):
index = self.make_index(4096*100, 2)
- self.assertExpandNodes([1, 10], index, [1, 10])
- self.assertExpandNodes([1, 10, 20], index, [1, 10, 20])
+ self.assertExpandOffsets([1, 10], index, [1, 10])
+ self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
def test_read_all_from_root(self):
index = self.make_index(4096*10, 20)
- self.assertExpandNodes(range(10), index, [0])
+ self.assertExpandOffsets(range(10), index, [0])
def test_read_all_when_cached(self):
# We've read enough that we can grab all the rest in a single request
index = self.make_index(4096*10, 5)
self.prepare_index(index, node_ref_lists=0, key_length=1,
key_count=1000, row_lengths=[1, 9],
- cached_keys=[0, 1, 2, 5, 6])
+ cached_offsets=[0, 1, 2, 5, 6])
# It should fill the remaining nodes, regardless of the one requested
- self.assertExpandNodes([3, 4, 7, 8, 9], index, [3])
- self.assertExpandNodes([3, 4, 7, 8, 9], index, [8])
- self.assertExpandNodes([3, 4, 7, 8, 9], index, [9])
+ self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
+ self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
+ self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
def test_no_root_node(self):
index = self.make_index(4096*10, 5)
- self.assertExpandNodes([0], index, [0])
+ self.assertExpandOffsets([0], index, [0])
def test_include_neighbors(self):
index = self.make_100_node_index()
# We expand in both directions, until we have at least 'recommended'
# pages
- self.assertExpandNodes([9, 10, 11, 12, 13, 14, 15], index, [12])
- self.assertExpandNodes([88, 89, 90, 91, 92, 93, 94], index, [91])
+ self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
+ self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
# If we hit an 'edge' we continue in the other direction
- self.assertExpandNodes([1, 2, 3, 4, 5, 6], index, [2])
- self.assertExpandNodes([94, 95, 96, 97, 98, 99], index, [98])
+ self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
+ self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
# Requesting many nodes will expand all locations equally
- self.assertExpandNodes([1, 2, 3, 80, 81, 82], index, [2, 81])
- self.assertExpandNodes([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
+ self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
+ self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
[2, 10, 81])
def test_stop_at_cached(self):
index = self.make_100_node_index()
- self.set_cached_keys(index, [0, 10, 19])
- self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [11])
- self.assertExpandNodes([11, 12, 13, 14, 15, 16], index, [12])
- self.assertExpandNodes([12, 13, 14, 15, 16, 17, 18], index, [15])
- self.assertExpandNodes([13, 14, 15, 16, 17, 18], index, [16])
- self.assertExpandNodes([13, 14, 15, 16, 17, 18], index, [17])
- self.assertExpandNodes([13, 14, 15, 16, 17, 18], index, [18])
+ self.set_cached_offsets(index, [0, 10, 19])
+ self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
+ self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
+ self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
+ self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
+ self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
+ self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
def test_cannot_fully_expand(self):
index = self.make_100_node_index()
- self.set_cached_keys(index, [0, 10, 12])
+ self.set_cached_offsets(index, [0, 10, 12])
# We don't go into an endless loop if we are bound by cached nodes
- self.assertExpandNodes([11], index, [11])
+ self.assertExpandOffsets([11], index, [11])
def test_overlap(self):
index = self.make_100_node_index()
- self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [12, 13])
- self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [11, 14])
+ self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
+ self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
def test_stay_within_layer(self):
index = self.make_1000_node_index()
# When expanding a request, we won't read nodes from the next layer
- self.assertExpandNodes([1, 2, 3, 4], index, [2])
- self.assertExpandNodes([6, 7, 8, 9], index, [6])
- self.assertExpandNodes([6, 7, 8, 9], index, [9])
- self.assertExpandNodes([10, 11, 12, 13, 14, 15], index, [10])
- self.assertExpandNodes([10, 11, 12, 13, 14, 15, 16], index, [13])
+ self.assertExpandOffsets([1, 2, 3, 4], index, [2])
+ self.assertExpandOffsets([6, 7, 8, 9], index, [6])
+ self.assertExpandOffsets([6, 7, 8, 9], index, [9])
+ self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
+ self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
- self.set_cached_keys(index, [0, 4, 12])
- self.assertExpandNodes([5, 6, 7, 8, 9], index, [7])
- self.assertExpandNodes([10, 11], index, [11])
+ self.set_cached_offsets(index, [0, 4, 12])
+ self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
+ self.assertExpandOffsets([10, 11], index, [11])
def test_small_requests_unexpanded(self):
index = self.make_100_node_index()
- self.set_cached_keys(index, [0])
- self.assertExpandNodes([1], index, [1])
- self.assertExpandNodes([50], index, [50])
+ self.set_cached_offsets(index, [0])
+ self.assertExpandOffsets([1], index, [1])
+ self.assertExpandOffsets([50], index, [50])
# If we request more than one node, then we'll expand
- self.assertExpandNodes([49, 50, 51, 59, 60, 61], index, [50, 60])
+ self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
# The first pass does not expand
index = self.make_1000_node_index()
- self.set_cached_keys(index, [0])
- self.assertExpandNodes([1], index, [1])
- self.set_cached_keys(index, [0, 1])
- self.assertExpandNodes([100], index, [100])
- self.set_cached_keys(index, [0, 1, 100])
+ self.set_cached_offsets(index, [0])
+ self.assertExpandOffsets([1], index, [1])
+ self.set_cached_offsets(index, [0, 1])
+ self.assertExpandOffsets([100], index, [100])
+ self.set_cached_offsets(index, [0, 1, 100])
# But after the first depth, we will expand
- self.assertExpandNodes([2, 3, 4, 5, 6, 7], index, [2])
- self.assertExpandNodes([2, 3, 4, 5, 6, 7], index, [4])
- self.set_cached_keys(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
- self.assertExpandNodes([102, 103, 104, 105, 106, 107, 108], index,
- [105])
+ self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
+ self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
+ self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
+ self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
+ [105])
More information about the bazaar-commits
mailing list