Rev 3832: (jam) reading pack-names only issues a single read request. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Nov 12 05:49:05 GMT 2008
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3832
revision-id: pqm at pqm.ubuntu.com-20081112054902-n6p0qrvl4cb26ij8
parent: pqm at pqm.ubuntu.com-20081112012514-6y8u99lf11pk0rdm
parent: john at arbash-meinel.com-20081109184017-oze18v7ba0t1q9b8
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2008-11-12 05:49:02 +0000
message:
(jam) reading pack-names only issues a single read request.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 3824.1.2
revision-id: john at arbash-meinel.com-20081109184017-oze18v7ba0t1q9b8
parent: john at arbash-meinel.com-20081109183148-671s0f3miohmsyoy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_no_size
timestamp: Sun 2008-11-09 12:40:17 -0600
message:
iter_all_entries() shouldn't need to re-read the page.
Now when reading the pack-names file, we will issue a single request,
rather than three.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
------------------------------------------------------------
revno: 3824.1.1
revision-id: john at arbash-meinel.com-20081109183148-671s0f3miohmsyoy
parent: pqm at pqm.ubuntu.com-20081107151945-hwdojxj8yafpk350
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_no_size
timestamp: Sun 2008-11-09 12:31:48 -0600
message:
Fix _read_nodes() to only issue a single read if there is no known size.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-10-29 19:24:01 +0000
+++ b/bzrlib/btree_index.py 2008-11-09 18:40:17 +0000
@@ -881,15 +881,21 @@
return
start_of_leaves = self._row_offsets[-2]
end_of_leaves = self._row_offsets[-1]
- needed_nodes = range(start_of_leaves, end_of_leaves)
+ needed_offsets = range(start_of_leaves, end_of_leaves)
+ if needed_offsets == [0]:
+ # Special case when we only have a root node, as we have already
+ # read everything
+ nodes = [(0, self._root_node)]
+ else:
+ nodes = self._read_nodes(needed_offsets)
# We iterate strictly in-order so that we can use this function
# for spilling index builds to disk.
if self.node_ref_lists:
- for _, node in self._read_nodes(needed_nodes):
+ for _, node in nodes:
for key, (value, refs) in sorted(node.keys.items()):
yield (self, key, value, refs)
else:
- for _, node in self._read_nodes(needed_nodes):
+ for _, node in nodes:
for key, (value, refs) in sorted(node.keys.items()):
yield (self, key, value)
@@ -1246,6 +1252,7 @@
:param nodes: The nodes to read. 0 - first node, 1 - second node etc.
:return: None
"""
+ bytes = None
ranges = []
for index in nodes:
offset = index * _PAGE_SIZE
@@ -1255,11 +1262,12 @@
if self._size:
size = min(_PAGE_SIZE, self._size)
else:
- stream = self._transport.get(self._name)
- start = stream.read(_PAGE_SIZE)
- # Avoid doing this again
- self._size = len(start)
- size = min(_PAGE_SIZE, self._size)
+ # The only case where we don't know the size, is for very
+ # small indexes. So we read the whole thing
+ bytes = self._transport.get_bytes(self._name)
+ self._size = len(bytes)
+ ranges.append((0, len(bytes)))
+ break
else:
if offset > self._size:
raise AssertionError('tried to read past the end'
@@ -1269,7 +1277,10 @@
ranges.append((offset, size))
if not ranges:
return
- if self._file is None:
+ if bytes:
+ data_ranges = [(offset, bytes[offset:offset+_PAGE_SIZE])
+ for offset in xrange(0, len(bytes), _PAGE_SIZE)]
+ elif self._file is None:
data_ranges = self._transport.readv(self._name, ranges)
else:
data_ranges = []
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-10-28 19:39:57 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-11-09 18:40:17 +0000
@@ -579,9 +579,7 @@
# The entire index should have been requested (as we generally have the
# size available, and doing many small readvs is inappropriate).
# We can't tell how much was actually read here, but - check the code.
- self.assertEqual([('get', 'index'),
- ('readv', 'index', [(0, 72)], False, None)],
- transport._activity)
+ self.assertEqual([('get', 'index')], transport._activity)
def test_empty_key_count(self):
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
@@ -612,6 +610,28 @@
transport._activity)
self.assertEqual(1199, size)
+ def test__read_nodes_no_size_one_page_reads_once(self):
+ self.make_index(nodes=[(('key',), 'value', ())])
+ trans = get_transport('trace+' + self.get_url())
+ index = btree_index.BTreeGraphIndex(trans, 'index', None)
+ del trans._activity[:]
+ nodes = dict(index._read_nodes([0]))
+ self.assertEqual([0], nodes.keys())
+ node = nodes[0]
+ self.assertEqual([('key',)], node.keys.keys())
+ self.assertEqual([('get', 'index')], trans._activity)
+
+ def test__read_nodes_no_size_multiple_pages(self):
+ index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
+ index.key_count()
+ num_pages = index._row_offsets[-1]
+ # Reopen with a traced transport and no size
+ trans = get_transport('trace+' + self.get_url())
+ index = btree_index.BTreeGraphIndex(trans, 'index', None)
+ del trans._activity[:]
+ nodes = dict(index._read_nodes([0]))
+ self.assertEqual(range(num_pages), nodes.keys())
+
def test_2_levels_key_count_2_2(self):
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
nodes = self.make_nodes(160, 2, 2)
@@ -699,6 +719,15 @@
btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
btree_index.BTreeGraphIndex(transport1, 'index', 20))
+ def test_iter_all_only_root_no_size(self):
+ self.make_index(nodes=[(('key',), 'value', ())])
+ trans = get_transport('trace+' + self.get_url(''))
+ index = btree_index.BTreeGraphIndex(trans, 'index', None)
+ del trans._activity[:]
+ self.assertEqual([(('key',), 'value')],
+ [x[1:] for x in index.iter_all_entries()])
+ self.assertEqual([('get', 'index')], trans._activity)
+
def test_iter_all_entries_reads(self):
# iterating all entries reads the header, then does a linear
# read.
More information about the bazaar-commits
mailing list