Rev 3647: Dropping the number of nodes to 100k from 200k in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
John Arbash Meinel
john at arbash-meinel.com
Wed Aug 20 21:21:05 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
------------------------------------------------------------
revno: 3647
revision-id: john at arbash-meinel.com-20080820202100-hu678q27vjhcgrrl
parent: john at arbash-meinel.com-20080820193429-0v5jm5zd4gggejpx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Wed 2008-08-20 15:21:00 -0500
message:
Dropping the number of nodes to 100k from 200k
It is still enough to create a 3-level index, but all tests
run 2x faster.
We currently spend 1.635s creating and 3.733s adding nodes
and 9.842s flushing in the three_level and iter_all tests.
The test is 15s down from 68s which is a lot better.
It would be nice to have sub second tests, though.
For now, we'll live with this.
modified:
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
-------------- next part --------------
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-08-20 19:34:29 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-08-20 20:21:00 +0000
@@ -16,8 +16,8 @@
"""Tests for btree indices."""
+import pprint
import time
-import pprint
import zlib
from bzrlib import (
@@ -85,7 +85,9 @@
prefix = (str(prefix_pos) * 40,)
else:
prefix = ()
- for pos in range(count):
+ for pos in xrange(count):
+ # TODO: This creates odd keys. When count == 100,000, it
+ # creates a 240 byte key
key = prefix + (str(pos) * 40,)
value = "value:%s" % pos
if reference_lists:
@@ -95,7 +97,7 @@
# as many keys in each list as its index + the key depth
# mod 2 - this generates both 0 length lists and
# ones slightly longer than the number of lists.
- # It alsu ensures we have non homogeneous lists.
+ # It also ensures we have non homogeneous lists.
refs.append([])
for ref_pos in range(list_pos + pos % 2):
if pos % 2:
@@ -276,13 +278,13 @@
# pointer to the second node that the internal node is for, _not_
# the first, otherwise the first node overlaps with the last node of
# the prior internal node on that row.
- # We will be adding 200,000 nodes, so spill at 200,001 to prevent
+ # We will be adding 100,000 nodes, so spill at 100,001 to prevent
# having to flush anything out to disk.
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
- spill_at=200001)
- # 200K nodes is enough to create a two internal nodes on the second level
+ spill_at=100001)
+ # 100K nodes is enough to create a two internal nodes on the second level
tstart = time.time()
- nodes = self.make_nodes(100000, 2, 2)
+ nodes = self.make_nodes(50000, 2, 2)
delta_make = time.time() - tstart
tstart = time.time()
@@ -691,14 +693,15 @@
# iterating all entries reads the header, then does a linear
# read.
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
- spill_at=200001)
- # 200k nodes is enough to create a three-level index.
- nodes = self.make_nodes(100000, 2, 2)
+ spill_at=100001)
+ # 100k nodes is enough to create a three-level index, which shows that
+ # we skip the internal nodes and just read the leaf nodes.
+ nodes = self.make_nodes(50000, 2, 2)
for node in nodes:
builder.add_node(*node)
transport = get_transport('trace+' + self.get_url(''))
size = transport.put_file('index', builder.finish())
- self.assertEqual(4058469, size, 'number of expected bytes in the'
+ self.assertEqual(2026722, size, 'number of expected bytes in the'
' output changed')
del builder
index = btree_index.BTreeGraphIndex(transport, 'index', size)
@@ -712,7 +715,7 @@
self.assertEqual(3, len(index._row_lengths),
"Not enough rows: %r" % index._row_lengths)
# Should be as long as the nodes we supplied
- self.assertEqual(200000, len(found_nodes))
+ self.assertEqual(100000, len(found_nodes))
# Should have the same content
self.assertEqual(set(nodes), set(bare_nodes))
# Should have done linear scan IO up the index, ignoring
@@ -720,13 +723,14 @@
# The entire index should have been read
total_pages = sum(index._row_lengths)
self.assertEqual(total_pages, index._row_offsets[-1])
- self.assertEqual(4058469, size)
+ self.assertEqual(2026722, size)
# The start of the leaves
first_byte = index._row_offsets[-2] * btree_index._PAGE_SIZE
readv_request = []
for offset in range(first_byte, size, 4096):
readv_request.append((offset, 4096))
- readv_request[-1] = (readv_request[-1][0], 3429)
+ # The last page is truncated
+ readv_request[-1] = (readv_request[-1][0], 3298)
expected = [('readv', 'index', [(0, 4096)], False, None),
('readv', 'index', readv_request, False, None)]
if expected != transport._activity:
More information about the bazaar-commits
mailing list