Rev 6: Implement BTreeGraphIndex reader logic, stage one - make it work. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
Robert Collins
robertc at robertcollins.net
Tue Jul 1 10:51:17 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
------------------------------------------------------------
revno: 6
revision-id: robertc at robertcollins.net-20080701095116-ge9gvqt09zie8x42
parent: robertc at robertcollins.net-20080701023304-a01ibps9ycjkhzqj
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Tue 2008-07-01 19:51:16 +1000
message:
Implement BTreeGraphIndex reader logic, stage one - make it work.
modified:
btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-01 02:33:04 +0000
+++ b/btree_index.py 2008-07-01 09:51:16 +0000
@@ -17,10 +17,12 @@
"""B+Tree indices"""
+from bisect import bisect_right
import tempfile
import zlib
-from bzrlib import index, osutils
+from bzrlib import debug, index, lru_cache, osutils
+from bzrlib import errors as bzrerrors
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
from bzrlib.plugins.index2 import errors, chunk_writer
@@ -125,7 +127,7 @@
length)
internal_row.writer.write(_INTERNAL_FLAG)
internal_row.writer.write(_INTERNAL_OFFSET +
- str(self.rows[pos + 1].nodes - 1) + "\n")
+ str(self.rows[pos + 1].nodes) + "\n")
# add a new leaf
length = 4096
if self.rows[-1].nodes == 0:
@@ -138,9 +140,9 @@
key_line = string_key + "\n"
new_row = True
for row in reversed(self.rows[:-1]):
- # Mark the end of this node in the node above. If it
- # doesn't fit then flush the node out and mark the end
- # point of that internal node in the node above.
+ # Mark the start of the next node in the node above. If it
+ # doesn't fit then propogate upwards until we find one that
+ # it does fit into.
if row.writer.write(key_line):
finish_node(row)
else:
@@ -203,3 +205,324 @@
result.flush()
result.seek(0)
return result
+
+
+class _LeafNode(object):
+ """A leaf node for a serialised B+Tree index."""
+
+ def __init__(self, bytes, key_length, ref_list_length):
+ """Parse bytes to create a leaf node object."""
+ # splitlines mangles the \r delimiters.. don't use it.
+ self.keys = dict(self._parse_lines(bytes.split('\n'), key_length,
+ ref_list_length))
+
+ def _parse_lines(self, lines, key_length, ref_list_length):
+ nodes = []
+ for line in lines[1:]:
+ if line == '':
+ return nodes
+ elements = line.split('\0', key_length)
+ # keys are tuples
+ key = tuple(elements[:key_length])
+ line = elements[-1]
+ references, value = line.rsplit('\0', 1)
+ if ref_list_length:
+ ref_lists = []
+ for ref_string in references.split('\t'):
+ ref_lists.append(tuple([
+ tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
+ ]))
+ ref_lists = tuple(ref_lists)
+ node_value = (value, ref_lists)
+ else:
+ node_value = (value, ())
+ nodes.append((key, node_value))
+ return nodes
+
+
+class _InternalNode(object):
+ """An internal node for a serialised B+Tree index."""
+
+ def __init__(self, bytes):
+ """Parse bytes to create an internal node object."""
+ # splitlines mangles the \r delimiters.. don't use it.
+ self.keys = self._parse_lines(bytes.split('\n'))
+
+ def _parse_lines(self, lines):
+ nodes = []
+ self.offset = int(lines[1][7:])
+ for line in lines[2:]:
+ if line == '':
+ return nodes
+ nodes.append(tuple(line.split('\0')))
+ return nodes
+
+
+class BTreeGraphIndex(object):
+ """Access to nodes via the standard GraphIndex interface for B+Tree's.
+
+ Individual nodes are held in a LRU cache. This holds the root node in
+ memory except when very large walks are done.
+ """
+
+ def __init__(self, transport, name, size=None):
+ """Create a B+Tree index object on the index name.
+
+ :param transport: The transport to read data for the index from.
+ :param name: The file name of the index on transport.
+ :param size: Optional size of the index in bytes. This allows
+ compatibility with the GraphIndex API, as well as ensuring that
+ the initial read (to read the root node header) can be done
+ without over-reading even on empty indices, and on small indices
+ allows single-IO to read the entire index.
+ """
+ self._transport = transport
+ self._name = name
+ self._size = size
+ self._page_size = transport.recommended_page_size()
+ self._node_cache = lru_cache.LRUCache()
+ self._key_count = None
+
+ def __eq__(self, other):
+ """Equal when self and other were created with the same parameters."""
+ return (
+ type(self) == type(other) and
+ self._transport == other._transport and
+ self._name == other._name and
+ self._size == other._size)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def _cache_nodes(self, nodes):
+ """Read nodes and cache them in the lru.
+
+ The nodes list supplied is sorted and then read from disk, each node
+ being inserted it into the _node_cache.
+
+ Note: Asking for more nodes than the _node_cache can contain will
+ result in some of the results being immediately discarded, to prevent
+ this an assertion is raised if more nodes are asked for than are
+ cachable.
+ """
+ if len(nodes) > self._node_cache._max_cache:
+ raise AssertionError("Asked for more nodes than can cache.")
+ for node_pos, node in self._read_nodes(sorted(nodes)):
+ self._node_cache.add(node_pos, node)
+
+ def _get_node(self, node_index):
+ """Get a node, from cache or disk.
+
+ After getting it, the node will be cached.
+ """
+ try:
+ return self._node_cache[node_index]
+ except KeyError:
+ self._cache_nodes([node_index])
+ return self._node_cache[node_index]
+
+ def iter_all_entries(self):
+ """Iterate over all keys within the index.
+
+ :return: An iterable of (index, key, value) or (index, key, value, reference_lists).
+ The former tuple is used when there are no reference lists in the
+ index, making the API compatible with simple key:value index types.
+ There is no defined order for the result iteration - it will be in
+ the most efficient order for the index.
+ """
+ if 'evil' in debug.debug_flags:
+ trace.mutter_callsite(3,
+ "iter_all_entries scales with size of history.")
+ if self._key_count is None:
+ self._cache_nodes([0])
+ if not self._key_count:
+ return
+ internal_nodes = sum(self._row_lengths[:-1])
+ start_node = internal_nodes
+ end_node = start_node + self._row_lengths[-1]
+ needed_nodes = range(start_node, end_node)
+ if self.node_ref_lists:
+ for _, node in self._read_nodes(needed_nodes):
+ for key, (value, refs) in node.keys.items():
+ yield (self, key, value, refs)
+ else:
+ for _, node in self._read_nodes(needed_nodes):
+ for key, (value, refs) in node.keys.items():
+ yield (self, key, value)
+
+ def iter_entries(self, keys):
+ """Iterate over keys within the index.
+
+ :param keys: An iterable providing the keys to be retrieved.
+ :return: An iterable as per iter_all_entries, but restricted to the
+ keys supplied. No additional keys will be returned, and every
+ key supplied that is in the index will be returned.
+ """
+ keys = sorted(keys)
+ if not keys:
+ return
+ if self._key_count is None:
+ self._cache_nodes([0])
+ for key in keys:
+ # repeated single-bisection : 'make it work'
+ # FUTURE: look within the current node for the next key,
+ # consider sequential scans
+ # consider block reads
+ # gather stats on hit rate etc
+ # start at the root:
+ node = self._get_node(0)
+ # what row are we looking at
+ row = 0
+ # how many nodes have the previous rows eaten up
+ offset = self._row_lengths[row]
+ while type(node) != _LeafNode:
+ pos = bisect_right(node.keys, key)
+ # There are len(node.keys) pointers dividing len(node.keys) + 1
+ # child nodes. The first pointer is the first key in the second
+ # child node, and so on.
+ # pos indexes into the list of child nodes, any key equal to a
+ # key in node.keys indicates that the key is present to the right
+ # of that divider.
+ node_index = offset + node.offset + pos
+ row += 1
+ offset = offset + self._row_lengths[row]
+ node = self._get_node(node_index)
+ if key in node.keys:
+ value, refs = node.keys[key]
+ if self.node_ref_lists:
+ yield (self, key, value, refs)
+ else:
+ yield (self, key, value)
+
+ def iter_entries_prefix(self, keys):
+ """Iterate over keys within the index using prefix matching.
+
+ Prefix matching is applied within the tuple of a key, not to within
+ the bytestring of each key element. e.g. if you have the keys ('foo',
+ 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
+ only the former key is returned.
+
+ WARNING: Note that this method currently causes a full index parse
+ unconditionally (which is reasonably appropriate as it is a means for
+ thunking many small indices into one larger one and still supplies
+ iter_all_entries at the thunk layer).
+
+ :param keys: An iterable providing the key prefixes to be retrieved.
+ Each key prefix takes the form of a tuple the length of a key, but
+ with the last N elements 'None' rather than a regular bytestring.
+ The first element cannot be 'None'.
+ :return: An iterable as per iter_all_entries, but restricted to the
+ keys with a matching prefix to those supplied. No additional keys
+ will be returned, and every match that is in the index will be
+ returned.
+ """
+ raise NotImplementedError(self.iter_entries_prefix)
+
+ def key_count(self):
+ """Return an estimate of the number of keys in this index.
+
+ For BTreeGraphIndex the estimate is exact as it is contained in the
+ header.
+ """
+ if self._key_count is None:
+ self._cache_nodes([0])
+ return self._key_count
+
+ def _parse_header_from_bytes(self, bytes):
+ """Parse the header from a region of bytes.
+
+ :param bytes: The data to parse.
+ :return: An offset, data tuple such as readv yields, for the unparsed
+ data. (which may be of length 0).
+ """
+ signature = bytes[0:len(self._signature())]
+ if not signature == self._signature():
+ raise bzrerrors.BadIndexFormatSignature(self._name, GraphIndex)
+ lines = bytes[len(self._signature()):].splitlines()
+ options_line = lines[0]
+ if not options_line.startswith(_OPTION_NODE_REFS):
+ raise bzrerrors.BadIndexOptions(self)
+ try:
+ self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
+ except ValueError:
+ raise bzrerrors.BadIndexOptions(self)
+ options_line = lines[1]
+ if not options_line.startswith(_OPTION_KEY_ELEMENTS):
+ raise bzrerrors.BadIndexOptions(self)
+ try:
+ self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
+ except ValueError:
+ raise bzrerrors.BadIndexOptions(self)
+ options_line = lines[2]
+ if not options_line.startswith(_OPTION_LEN):
+ raise bzrerrors.BadIndexOptions(self)
+ try:
+ self._key_count = int(options_line[len(_OPTION_LEN):])
+ except ValueError:
+ raise bzrerrors.BadIndexOptions(self)
+ options_line = lines[3]
+ if not options_line.startswith(_OPTION_ROW_LENGTHS):
+ raise bzrerrors.BadIndexOptions(self)
+ try:
+ self._row_lengths = map(int, [length for length in
+ options_line[len(_OPTION_ROW_LENGTHS):].split(',')
+ if len(length)])
+ except ValueError:
+ raise bzrerrors.BadIndexOptions(self)
+ # calculate the bytes we have processed
+ header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)
+ return header_end, bytes[header_end:]
+
+ def _read_nodes(self, nodes):
+ """Read some nodes from disk into the LRU cache.
+
+ This performs a readv to get the node data into memory, and parses each
+ node, the yields it to the caller. The nodes are requested in the
+ supplied order. If possible doing sort() on the list before requesting
+ a read may improve performance.
+
+ :param nodes: The nodes to read. 0 - first node, 1 - second node etc.
+ :return: None
+ """
+ ranges = []
+ for index in nodes:
+ offset = index * 4096
+ size = 4096
+ if index == 0:
+ # Root node - special case
+ if self._size:
+ size = min(4096, self._size)
+ else:
+ stream = self._transport.get(self._name)
+ start = stream.read(4096)
+ # Avoid doing this again
+ self._size = len(start)
+ size = min(4096, self._size)
+ ranges.append((offset, size))
+ for offset, data in self._transport.readv(self._name, ranges):
+ if offset == 0:
+ # extract the header
+ offset, data = self._parse_header_from_bytes(data)
+ if len(data) == 0:
+ continue
+ bytes = zlib.decompress(data)
+ if bytes.startswith(_LEAF_FLAG):
+ node = _LeafNode(bytes, self._key_length, self.node_ref_lists)
+ elif bytes.startswith(_INTERNAL_FLAG):
+ node = _InternalNode(bytes)
+ else:
+ raise AssertionError("Unknown node type for %r" % bytes)
+ yield offset / 4096, node
+
+ def _signature(self):
+ """The file signature for this index type."""
+ return _BTSIGNATURE
+
+ def validate(self):
+ """Validate that everything in the index can be accessed."""
+ # just read and parse every node.
+ if self._key_count is None:
+ self._read_nodes([0]).next()
+ node_count = sum(self._row_lengths) - 1
+ for node in self._read_nodes(range(1, node_count + 1)):
+ pass
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-01 02:33:04 +0000
+++ b/tests/test_btree_index.py 2008-07-01 09:51:16 +0000
@@ -22,9 +22,10 @@
from bzrlib.plugins import index2
from bzrlib.plugins.index2 import errors, btree_index
from bzrlib.tests import TestCaseWithTransport
-
-
-class TestBTreeBuilder(TestCaseWithTransport):
+from bzrlib.transport import get_transport
+
+
+class BTreeTestCase(TestCaseWithTransport):
# test names here are suffixed by the key length and reference list count
# that they test.
@@ -55,11 +56,16 @@
else:
# serial of this ref in the ref list
refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
+ refs[-1] = tuple(refs[-1])
+ refs = tuple(refs)
else:
refs = ()
keys.append((key, value, refs))
return keys
+
+class TestBTreeBuilder(BTreeTestCase):
+
def test_empty_1_0(self):
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
content = builder.finish().read()
@@ -658,6 +664,34 @@
)
self.assertEqual(expected_leaf2, leaf2_bytes)
+ def test_second_internal_node_pointer(self):
+ # The left most pointer in the second internal node in a row should
+ # pointer to the second node that the internal node is for, _not_
+ # the first, otherwise the first node overlaps with the last node of
+ # the prior internal node on that row.
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ # 25K nodes is enough to create a two internal nodes on the second level
+ nodes = self.make_nodes(25000, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ del builder
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ # Seed the metadata, we're using internal calls now.
+ index.key_count()
+ internal_node1 = index._get_node(1)
+ internal_node2 = index._get_node(2)
+ # The left most node node2 points at should be one after the right most node pointed at by
+ # node1.
+ self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
+ # The left most key of the second node pointed at by internal_node2
+ # should be its first key. We can check this by looking for its first key
+ # in the second node it points at
+ pos = index._row_lengths[0] + index._row_lengths[1] + internal_node2.offset + 1
+ leaf = index._get_node(pos)
+ self.assertTrue(internal_node2.keys[0] in leaf.keys)
+
def test_2_leaves_2_2(self):
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
nodes = self.make_nodes(100, 2, 2)
@@ -680,3 +714,405 @@
)
self.assertEqual(expected_root, root_bytes)
# We assume the other leaf nodes have been written correctly - layering FTW.
+
+
+class TestBTreeIndex(BTreeTestCase):
+
+ def test_trivial_constructor(self):
+ transport = get_transport('trace+' + self.get_url(''))
+ index = btree_index.BTreeGraphIndex(transport, 'index')
+ # Checks the page size at load, but that isn't logged yet.
+ self.assertEqual([], transport._activity)
+
+ def test_with_size_constructor(self):
+ transport = get_transport('trace+' + self.get_url(''))
+ index = btree_index.BTreeGraphIndex(transport, 'index', 1)
+ # Checks the page size at load, but that isn't logged yet.
+ self.assertEqual([], transport._activity)
+
+ def test_empty_key_count_no_size(self):
+ builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
+ transport = get_transport('trace+' + self.get_url(''))
+ transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index')
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ self.assertEqual(0, index.key_count())
+ # The entire index should have been requested (as we generally have the
+ # size available, and doing many small readvs is inappropriate).
+ # We can't tell how much was actually read here, but - check the code.
+ self.assertEqual([('get', 'index'),
+ ('readv', 'index', [(0, 72)], False, None)],
+ transport._activity)
+
+ def test_empty_key_count(self):
+ builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ self.assertEqual(0, index.key_count())
+ # The entire index should have been read, as 4K > size
+ self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
+ transport._activity)
+ self.assertEqual(72, size)
+
+ def test_non_empty_key_count_2_2(self):
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ nodes = self.make_nodes(35, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ self.assertEqual(70, index.key_count())
+ # The entire index should have been read, as it is one page long.
+ self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
+ transport._activity)
+ self.assertEqual(4096, size)
+
+ def test_2_levels_key_count_2_2(self):
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ nodes = self.make_nodes(80, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ self.assertEqual(160, index.key_count())
+ # The entire index should not have been read.
+ self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
+ transport._activity)
+ self.assertEqual(4096*3, size)
+
+ def test_validate_trivial(self):
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ nodes = self.make_nodes(80, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ index.validate()
+ # The entire index should have been read linearly.
+ self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
+ ('readv', 'index', [(4096, 4096), (8192, 4096)], False, None)],
+ transport._activity)
+ self.assertEqual(4096*3, size)
+ # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
+ # node and make validate find them.
+
+ def test_eq_ne(self):
+ # two indices are equal when constructed with the same parameters:
+ transport1 = get_transport('trace+' + self.get_url(''))
+ transport2 = get_transport(self.get_url(''))
+ self.assertTrue(
+ btree_index.BTreeGraphIndex(transport1, 'index') ==
+ btree_index.BTreeGraphIndex(transport1, 'index'))
+ self.assertTrue(
+ btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
+ btree_index.BTreeGraphIndex(transport1, 'index', 20))
+ self.assertFalse(
+ btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
+ btree_index.BTreeGraphIndex(transport2, 'index', 20))
+ self.assertFalse(
+ btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
+ btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
+ self.assertFalse(
+ btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
+ btree_index.BTreeGraphIndex(transport1, 'index', 20))
+ self.assertFalse(
+ btree_index.BTreeGraphIndex(transport1, 'index') !=
+ btree_index.BTreeGraphIndex(transport1, 'index'))
+ self.assertFalse(
+ btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
+ btree_index.BTreeGraphIndex(transport1, 'index', 20))
+ self.assertTrue(
+ btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
+ btree_index.BTreeGraphIndex(transport2, 'index', 20))
+ self.assertTrue(
+ btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
+ btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
+ self.assertTrue(
+ btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
+ btree_index.BTreeGraphIndex(transport1, 'index', 20))
+
+ def test_iter_all_entries_reads(self):
+ # iterating all entries reads the header, then does a linear
+ # read.
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ # 25K nodes is enough to create a three-level index.
+ nodes = self.make_nodes(25000, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ del builder
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ found_nodes = list(index.iter_all_entries())
+ bare_nodes = []
+ for node in found_nodes:
+ self.assertTrue(node[0] is index)
+ bare_nodes.append(node[1:])
+ # Should be as long as the nodes we supplied
+ self.assertEqual(50000, len(found_nodes))
+ # Should have the same content
+ self.assertEqual(set(nodes), set(bare_nodes))
+ # Should have done linear scan IO up the index, ignoring
+ # the internal nodes:
+ self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
+ ('readv', 'index',
+ [(12288, 4096), (16384, 4096), (20480, 4096), (24576, 4096),
+ (28672, 4096), (32768, 4096), (36864, 4096), (40960, 4096),
+ (45056, 4096), (49152, 4096), (53248, 4096), (57344, 4096),
+ (61440, 4096), (65536, 4096), (69632, 4096), (73728, 4096),
+ (77824, 4096), (81920, 4096), (86016, 4096), (90112, 4096),
+ (94208, 4096), (98304, 4096), (102400, 4096), (106496, 4096),
+ (110592, 4096), (114688, 4096), (118784, 4096), (122880, 4096),
+ (126976, 4096), (131072, 4096), (135168, 4096), (139264, 4096),
+ (143360, 4096), (147456, 4096), (151552, 4096), (155648, 4096),
+ (159744, 4096), (163840, 4096), (167936, 4096), (172032, 4096),
+ (176128, 4096), (180224, 4096), (184320, 4096), (188416, 4096),
+ (192512, 4096), (196608, 4096), (200704, 4096), (204800, 4096),
+ (208896, 4096), (212992, 4096), (217088, 4096), (221184, 4096),
+ (225280, 4096), (229376, 4096), (233472, 4096), (237568, 4096),
+ (241664, 4096), (245760, 4096), (249856, 4096), (253952, 4096),
+ (258048, 4096), (262144, 4096), (266240, 4096), (270336, 4096),
+ (274432, 4096), (278528, 4096), (282624, 4096), (286720, 4096),
+ (290816, 4096), (294912, 4096), (299008, 4096), (303104, 4096),
+ (307200, 4096), (311296, 4096), (315392, 4096), (319488, 4096),
+ (323584, 4096), (327680, 4096), (331776, 4096), (335872, 4096),
+ (339968, 4096), (344064, 4096), (348160, 4096), (352256, 4096),
+ (356352, 4096), (360448, 4096), (364544, 4096), (368640, 4096),
+ (372736, 4096), (376832, 4096), (380928, 4096), (385024, 4096),
+ (389120, 4096), (393216, 4096), (397312, 4096), (401408, 4096),
+ (405504, 4096), (409600, 4096), (413696, 4096), (417792, 4096),
+ (421888, 4096), (425984, 4096), (430080, 4096), (434176, 4096),
+ (438272, 4096), (442368, 4096), (446464, 4096), (450560, 4096),
+ (454656, 4096), (458752, 4096), (462848, 4096), (466944, 4096),
+ (471040, 4096), (475136, 4096), (479232, 4096), (483328, 4096),
+ (487424, 4096), (491520, 4096), (495616, 4096), (499712, 4096),
+ (503808, 4096), (507904, 4096), (512000, 4096), (516096, 4096),
+ (520192, 4096), (524288, 4096), (528384, 4096), (532480, 4096),
+ (536576, 4096), (540672, 4096), (544768, 4096), (548864, 4096),
+ (552960, 4096), (557056, 4096), (561152, 4096), (565248, 4096),
+ (569344, 4096), (573440, 4096), (577536, 4096), (581632, 4096),
+ (585728, 4096), (589824, 4096), (593920, 4096), (598016, 4096),
+ (602112, 4096), (606208, 4096), (610304, 4096), (614400, 4096),
+ (618496, 4096), (622592, 4096), (626688, 4096), (630784, 4096),
+ (634880, 4096), (638976, 4096), (643072, 4096), (647168, 4096),
+ (651264, 4096), (655360, 4096), (659456, 4096), (663552, 4096),
+ (667648, 4096), (671744, 4096), (675840, 4096), (679936, 4096),
+ (684032, 4096), (688128, 4096), (692224, 4096), (696320, 4096),
+ (700416, 4096), (704512, 4096), (708608, 4096), (712704, 4096),
+ (716800, 4096), (720896, 4096), (724992, 4096), (729088, 4096),
+ (733184, 4096), (737280, 4096), (741376, 4096), (745472, 4096),
+ (749568, 4096), (753664, 4096), (757760, 4096), (761856, 4096),
+ (765952, 4096), (770048, 4096), (774144, 4096), (778240, 4096),
+ (782336, 4096), (786432, 4096), (790528, 4096), (794624, 4096),
+ (798720, 4096), (802816, 4096), (806912, 4096), (811008, 4096),
+ (815104, 4096), (819200, 4096), (823296, 4096), (827392, 4096),
+ (831488, 4096), (835584, 4096), (839680, 4096), (843776, 4096),
+ (847872, 4096), (851968, 4096), (856064, 4096), (860160, 4096),
+ (864256, 4096), (868352, 4096), (872448, 4096), (876544, 4096),
+ (880640, 4096), (884736, 4096), (888832, 4096), (892928, 4096),
+ (897024, 4096), (901120, 4096), (905216, 4096), (909312, 4096),
+ (913408, 4096), (917504, 4096), (921600, 4096), (925696, 4096),
+ (929792, 4096), (933888, 4096), (937984, 4096), (942080, 4096),
+ (946176, 4096), (950272, 4096), (954368, 4096), (958464, 4096),
+ (962560, 4096), (966656, 4096), (970752, 4096), (974848, 4096),
+ (978944, 4096), (983040, 4096), (987136, 4096), (991232, 4096),
+ (995328, 4096), (999424, 4096), (1003520, 4096), (1007616, 4096),
+ (1011712, 4096), (1015808, 4096), (1019904, 4096), (1024000,
+ 4096), (1028096, 4096), (1032192, 4096), (1036288, 4096),
+ (1040384, 4096), (1044480, 4096), (1048576, 4096), (1052672,
+ 4096), (1056768, 4096), (1060864, 4096), (1064960, 4096),
+ (1069056, 4096), (1073152, 4096), (1077248, 4096), (1081344,
+ 4096), (1085440, 4096), (1089536, 4096), (1093632, 4096),
+ (1097728, 4096), (1101824, 4096), (1105920, 4096), (1110016,
+ 4096), (1114112, 4096), (1118208, 4096), (1122304, 4096),
+ (1126400, 4096), (1130496, 4096), (1134592, 4096), (1138688,
+ 4096), (1142784, 4096), (1146880, 4096), (1150976, 4096),
+ (1155072, 4096), (1159168, 4096), (1163264, 4096), (1167360,
+ 4096), (1171456, 4096), (1175552, 4096), (1179648, 4096),
+ (1183744, 4096), (1187840, 4096), (1191936, 4096), (1196032,
+ 4096), (1200128, 4096), (1204224, 4096), (1208320, 4096),
+ (1212416, 4096), (1216512, 4096), (1220608, 4096), (1224704,
+ 4096), (1228800, 4096), (1232896, 4096), (1236992, 4096),
+ (1241088, 4096), (1245184, 4096), (1249280, 4096), (1253376,
+ 4096), (1257472, 4096), (1261568, 4096), (1265664, 4096),
+ (1269760, 4096), (1273856, 4096), (1277952, 4096), (1282048,
+ 4096), (1286144, 4096), (1290240, 4096), (1294336, 4096),
+ (1298432, 4096), (1302528, 4096), (1306624, 4096), (1310720,
+ 4096), (1314816, 4096), (1318912, 4096), (1323008, 4096),
+ (1327104, 4096), (1331200, 4096), (1335296, 4096), (1339392,
+ 4096), (1343488, 4096), (1347584, 4096), (1351680, 4096),
+ (1355776, 4096), (1359872, 4096), (1363968, 4096), (1368064,
+ 4096), (1372160, 4096), (1376256, 4096), (1380352, 4096),
+ (1384448, 4096), (1388544, 4096), (1392640, 4096), (1396736,
+ 4096), (1400832, 4096), (1404928, 4096), (1409024, 4096),
+ (1413120, 4096), (1417216, 4096), (1421312, 4096), (1425408,
+ 4096), (1429504, 4096), (1433600, 4096), (1437696, 4096),
+ (1441792, 4096), (1445888, 4096), (1449984, 4096), (1454080,
+ 4096), (1458176, 4096), (1462272, 4096), (1466368, 4096),
+ (1470464, 4096), (1474560, 4096), (1478656, 4096), (1482752,
+ 4096), (1486848, 4096), (1490944, 4096), (1495040, 4096),
+ (1499136, 4096), (1503232, 4096), (1507328, 4096), (1511424,
+ 4096), (1515520, 4096), (1519616, 4096), (1523712, 4096),
+ (1527808, 4096), (1531904, 4096), (1536000, 4096), (1540096,
+ 4096), (1544192, 4096), (1548288, 4096), (1552384, 4096),
+ (1556480, 4096), (1560576, 4096), (1564672, 4096), (1568768,
+ 4096), (1572864, 4096), (1576960, 4096), (1581056, 4096),
+ (1585152, 4096), (1589248, 4096), (1593344, 4096), (1597440,
+ 4096), (1601536, 4096), (1605632, 4096), (1609728, 4096),
+ (1613824, 4096), (1617920, 4096), (1622016, 4096), (1626112,
+ 4096), (1630208, 4096), (1634304, 4096), (1638400, 4096),
+ (1642496, 4096), (1646592, 4096), (1650688, 4096), (1654784,
+ 4096), (1658880, 4096), (1662976, 4096), (1667072, 4096),
+ (1671168, 4096), (1675264, 4096), (1679360, 4096), (1683456,
+ 4096), (1687552, 4096), (1691648, 4096), (1695744, 4096),
+ (1699840, 4096), (1703936, 4096), (1708032, 4096), (1712128,
+ 4096), (1716224, 4096), (1720320, 4096), (1724416, 4096),
+ (1728512, 4096), (1732608, 4096), (1736704, 4096), (1740800,
+ 4096), (1744896, 4096), (1748992, 4096), (1753088, 4096),
+ (1757184, 4096), (1761280, 4096), (1765376, 4096), (1769472,
+ 4096), (1773568, 4096), (1777664, 4096), (1781760, 4096),
+ (1785856, 4096), (1789952, 4096), (1794048, 4096), (1798144,
+ 4096), (1802240, 4096), (1806336, 4096), (1810432, 4096),
+ (1814528, 4096), (1818624, 4096), (1822720, 4096), (1826816,
+ 4096), (1830912, 4096), (1835008, 4096), (1839104, 4096),
+ (1843200, 4096), (1847296, 4096), (1851392, 4096), (1855488,
+ 4096), (1859584, 4096), (1863680, 4096), (1867776, 4096),
+ (1871872, 4096), (1875968, 4096), (1880064, 4096)], False, None)],
+ transport._activity)
+ # The entire index should have been read, as it is one page long.
+ self.assertEqual(1884160, size)
+ self.assertEqual(3, len(index._row_lengths))
+
+ def _test_iter_entries_references_resolved(self):
+ index = self.make_index(1, nodes=[
+ (('name', ), 'data', ([('ref', ), ('ref', )], )),
+ (('ref', ), 'refdata', ([], ))])
+ self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
+ (index, ('ref', ), 'refdata', ((), ))]),
+ set(index.iter_entries([('name',), ('ref',)])))
+
+ def test_iter_entries_references_2_refs_resolved(self):
+ # iterating some entries reads just the pages needed. For now, to
+ # get it working and start measuring, only 4K pages are read.
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ # 80 nodes is enough to create a two-level index.
+ nodes = self.make_nodes(80, 2, 2)
+ for node in nodes:
+ builder.add_node(*node)
+ transport = get_transport('trace+' + self.get_url(''))
+ size = transport.put_file('index', builder.finish())
+ del builder
+ index = btree_index.BTreeGraphIndex(transport, 'index', size)
+ del transport._activity[:]
+ self.assertEqual([], transport._activity)
+ # search for one key
+ found_nodes = list(index.iter_entries([nodes[30][0]]))
+ bare_nodes = []
+ for node in found_nodes:
+ self.assertTrue(node[0] is index)
+ bare_nodes.append(node[1:])
+ # Should be as long as the nodes we supplied
+ self.assertEqual(1, len(found_nodes))
+ # Should have the same content
+ self.assertEqual(nodes[30], bare_nodes[0])
+ # Should have read the root node, then one page:
+ self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
+ ('readv', 'index', [(4096, 4096), ], False, None)],
+ transport._activity)
+
+
+class TestBTreeNodes(BTreeTestCase):
+
+ def test_LeafNode_1_0(self):
+ node_bytes = ("type=leaf\n"
+ "0000000000000000000000000000000000000000\x00\x00value:0\n"
+ "1111111111111111111111111111111111111111\x00\x00value:1\n"
+ "2222222222222222222222222222222222222222\x00\x00value:2\n"
+ "3333333333333333333333333333333333333333\x00\x00value:3\n"
+ "4444444444444444444444444444444444444444\x00\x00value:4\n")
+ node = btree_index._LeafNode(node_bytes, 1, 0)
+ # We do direct access, or don't care about order, to leaf nodes most of
+ # the time, so a dict is useful:
+ self.assertEqual({
+ ("0000000000000000000000000000000000000000",): ("value:0", ()),
+ ("1111111111111111111111111111111111111111",): ("value:1", ()),
+ ("2222222222222222222222222222222222222222",): ("value:2", ()),
+ ("3333333333333333333333333333333333333333",): ("value:3", ()),
+ ("4444444444444444444444444444444444444444",): ("value:4", ()),
+ }, node.keys)
+
+ def test_LeafNode_2_2(self):
+ node_bytes = ("type=leaf\n"
+ "00\x0000\x00\t00\x00ref00\x00value:0\n"
+ "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
+ "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
+ "11\x0044\x00\t11\x00ref00\x00value:4\n"
+ ""
+ )
+ node = btree_index._LeafNode(node_bytes, 2, 2)
+ # We do direct access, or don't care about order, to leaf nodes most of
+ # the time, so a dict is useful:
+ self.assertEqual({
+ ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
+ ('00', '11'): ('value:1',
+ ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
+ ('11', '33'): ('value:3',
+ ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
+ ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
+ }, node.keys)
+
+ def test_InternalNode_1(self):
+ node_bytes = ("type=internal\n"
+ "offset=1\n"
+ "0000000000000000000000000000000000000000\n"
+ "1111111111111111111111111111111111111111\n"
+ "2222222222222222222222222222222222222222\n"
+ "3333333333333333333333333333333333333333\n"
+ "4444444444444444444444444444444444444444\n")
+ node = btree_index._InternalNode(node_bytes)
+ # We want to bisect to find the right children from this node, so a
+ # vector is most useful.
+ self.assertEqual([
+ ("0000000000000000000000000000000000000000",),
+ ("1111111111111111111111111111111111111111",),
+ ("2222222222222222222222222222222222222222",),
+ ("3333333333333333333333333333333333333333",),
+ ("4444444444444444444444444444444444444444",),
+ ], node.keys)
+ self.assertEqual(1, node.offset)
+
+ def test_LeafNode_2_2(self):
+ node_bytes = ("type=leaf\n"
+ "00\x0000\x00\t00\x00ref00\x00value:0\n"
+ "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
+ "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
+ "11\x0044\x00\t11\x00ref00\x00value:4\n"
+ ""
+ )
+ node = btree_index._LeafNode(node_bytes, 2, 2)
+ # We do direct access, or don't care about order, to leaf nodes most of
+ # the time, so a dict is useful:
+ self.assertEqual({
+ ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
+ ('00', '11'): ('value:1',
+ ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
+ ('11', '33'): ('value:3',
+ ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
+ ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
+ }, node.keys)
+
More information about the bazaar-commits
mailing list