Rev 4842: (jam) Update BTreeBuilder to remove ._keys and use StaticTuple in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Nov 30 22:04:48 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4842 [merge]
revision-id: pqm at pqm.ubuntu.com-20091130220445-vbfmmgocbgcs195q
parent: pqm at pqm.ubuntu.com-20091130212354-exss9hosbrdvo5tk
parent: john at arbash-meinel.com-20091130212107-2ocawio2fgx1fruo
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-11-30 22:04:45 +0000
message:
(jam) Update BTreeBuilder to remove ._keys and use StaticTuple
modified:
bzrlib/_btree_serializer_py.py _parse_btree_py.py-20080703034413-3q25bklkenti3p8p-3
bzrlib/_static_tuple_py.py _keys_type_py.py-20090908213415-o1ww98k9a8aqm0bm-1
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/static_tuple.py static_tuple.py-20091012195926-qa7zh2stf4xtblm8-1
bzrlib/tests/test__static_tuple.py test__keys_type.py-20090908204220-aa346ccw4l37jzt7-2
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/_btree_serializer_py.py'
--- a/bzrlib/_btree_serializer_py.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/_btree_serializer_py.py 2009-11-07 00:28:26 +0000
@@ -17,27 +17,33 @@
"""B+Tree index parsing."""
+from bzrlib import static_tuple
+
+
def _parse_leaf_lines(bytes, key_length, ref_list_length):
lines = bytes.split('\n')
nodes = []
+ as_st = static_tuple.StaticTuple.from_sequence
+ stuple = static_tuple.StaticTuple
for line in lines[1:]:
if line == '':
return nodes
elements = line.split('\0', key_length)
# keys are tuples
- key = tuple(elements[:key_length])
+ key = as_st(elements[:key_length]).intern()
line = elements[-1]
references, value = line.rsplit('\0', 1)
if ref_list_length:
ref_lists = []
for ref_string in references.split('\t'):
- ref_lists.append(tuple([
- tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
- ]))
- ref_lists = tuple(ref_lists)
- node_value = (value, ref_lists)
+ ref_list = as_st([as_st(ref.split('\0')).intern()
+ for ref in ref_string.split('\r') if ref])
+ ref_lists.append(ref_list)
+ ref_lists = as_st(ref_lists)
+ node_value = stuple(value, ref_lists)
else:
- node_value = (value, ())
+ node_value = stuple(value, stuple())
+ # No need for StaticTuple here as it is put into a dict
nodes.append((key, node_value))
return nodes
=== modified file 'bzrlib/_static_tuple_py.py'
--- a/bzrlib/_static_tuple_py.py 2009-10-27 03:39:16 +0000
+++ b/bzrlib/_static_tuple_py.py 2009-11-28 21:54:08 +0000
@@ -58,7 +58,7 @@
return StaticTuple.from_sequence(tuple.__add__(self,other))
def as_tuple(self):
- return self
+ return tuple(self)
def intern(self):
return _interned_tuples.setdefault(self, self)
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-10-29 16:15:43 +0000
+++ b/bzrlib/btree_index.py 2009-11-07 01:58:11 +0000
@@ -31,6 +31,7 @@
index,
lru_cache,
osutils,
+ static_tuple,
trace,
)
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
@@ -159,16 +160,16 @@
:param value: The value to associate with the key. It may be any
bytes as long as it does not contain \0 or \n.
"""
+ # Ensure that 'key' is a StaticTuple
+ key = static_tuple.StaticTuple.from_sequence(key).intern()
# we don't care about absent_references
node_refs, _ = self._check_key_ref_value(key, references, value)
if key in self._nodes:
raise errors.BadIndexDuplicateKey(key, self)
- # TODO: StaticTuple
- self._nodes[key] = (node_refs, value)
- self._keys.add(key)
+ self._nodes[key] = static_tuple.StaticTuple(node_refs, value)
if self._nodes_by_key is not None and self._key_length > 1:
self._update_nodes_by_key(key, value, node_refs)
- if len(self._keys) < self._spill_at:
+ if len(self._nodes) < self._spill_at:
return
self._spill_mem_keys_to_disk()
@@ -203,7 +204,6 @@
self._backing_indices[backing_pos] = None
else:
self._backing_indices.append(new_backing)
- self._keys = set()
self._nodes = {}
self._nodes_by_key = None
@@ -463,14 +463,22 @@
efficient order for the index (keys iteration order in this case).
"""
keys = set(keys)
- local_keys = keys.intersection(self._keys)
+ # Note: We don't use keys.intersection() here. If you read the C api,
+ # set.intersection(other) special cases when other is a set and
+ # will iterate the smaller of the two and lookup in the other.
+ # It does *not* do this for any other type (even dict, unlike
+ # some other set functions.) Since we expect keys is generally <<
+ # self._nodes, it is faster to iterate over it in a list
+ # comprehension
+ nodes = self._nodes
+ local_keys = [key for key in keys if key in nodes]
if self.reference_lists:
for key in local_keys:
- node = self._nodes[key]
+ node = nodes[key]
yield self, key, node[1], node[0]
else:
for key in local_keys:
- node = self._nodes[key]
+ node = nodes[key]
yield self, key, node[1]
# Find things that are in backing indices that have not been handled
# yet.
@@ -586,7 +594,7 @@
For InMemoryGraphIndex the estimate is exact.
"""
- return len(self._keys) + sum(backing.key_count() for backing in
+ return len(self._nodes) + sum(backing.key_count() for backing in
self._backing_indices if backing is not None)
def validate(self):
@@ -624,11 +632,11 @@
def _parse_lines(self, lines):
nodes = []
self.offset = int(lines[1][7:])
+ as_st = static_tuple.StaticTuple.from_sequence
for line in lines[2:]:
if line == '':
break
- # TODO: Switch to StaticTuple here.
- nodes.append(tuple(map(intern, line.split('\0'))))
+ nodes.append(as_st(map(intern, line.split('\0'))).intern())
return nodes
=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py 2009-11-30 21:23:54 +0000
+++ b/bzrlib/builtins.py 2009-11-30 22:04:45 +0000
@@ -43,6 +43,7 @@
reconfigure,
rename_map,
revision as _mod_revision,
+ static_tuple,
symbol_versioning,
timestamp,
transport,
@@ -436,8 +437,7 @@
for node in bt.iter_all_entries():
# Node is made up of:
# (index, key, value, [references])
- refs_as_tuples = tuple([tuple([tuple(ref) for ref in ref_list])
- for ref_list in node[3]])
+ refs_as_tuples = static_tuple.as_tuples(node[3])
as_tuple = (tuple(node[1]), node[2], refs_as_tuples)
self.outf.write('%s\n' % (as_tuple,))
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-10-23 15:46:01 +0000
+++ b/bzrlib/groupcompress.py 2009-11-28 21:54:08 +0000
@@ -31,6 +31,7 @@
knit,
osutils,
pack,
+ static_tuple,
trace,
)
from bzrlib.btree_index import BTreeBuilder
@@ -1877,8 +1878,11 @@
if not random_id:
present_nodes = self._get_entries(keys)
for (index, key, value, node_refs) in present_nodes:
- if node_refs != keys[key][1]:
- details = '%s %s %s' % (key, (value, node_refs), keys[key])
+ # Sometimes these are passed as a list rather than a tuple
+ node_refs = static_tuple.as_tuples(node_refs)
+ passed = static_tuple.as_tuples(keys[key])
+ if node_refs != passed[1]:
+ details = '%s %s %s' % (key, (value, node_refs), passed)
if self._inconsistency_fatal:
raise errors.KnitCorrupt(self, "inconsistent details"
" in add_records: %s" %
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-10-29 05:54:49 +0000
+++ b/bzrlib/index.py 2009-11-07 01:58:11 +0000
@@ -93,9 +93,10 @@
:param key_elements: The number of bytestrings in each key.
"""
self.reference_lists = reference_lists
- self._keys = set()
# A dict of {key: (absent, ref_lists, value)}
self._nodes = {}
+ # Keys that are referenced but not actually present in this index
+ self._absent_keys = set()
self._nodes_by_key = None
self._key_length = key_elements
self._optimize_for_size = False
@@ -165,9 +166,9 @@
return
key_dict = self._nodes_by_key
if self.reference_lists:
- key_value = key, value, node_refs
+ key_value = StaticTuple(key, value, node_refs)
else:
- key_value = key, value
+ key_value = StaticTuple(key, value)
for subkey in key[:-1]:
key_dict = key_dict.setdefault(subkey, {})
key_dict[key[-1]] = key_value
@@ -189,6 +190,7 @@
This may contain duplicates if the same key is
referenced in multiple lists.
"""
+ as_st = StaticTuple.from_sequence
self._check_key(key)
if _newline_null_re.search(value) is not None:
raise errors.BadIndexValue(value)
@@ -203,10 +205,8 @@
if reference not in self._nodes:
self._check_key(reference)
absent_references.append(reference)
- # TODO: StaticTuple
- node_refs.append(tuple(reference_list))
- # TODO: StaticTuple
- return tuple(node_refs), absent_references
+ node_refs.append(as_st(reference_list))
+ return as_st(node_refs), absent_references
def add_node(self, key, value, references=()):
"""Add a node to the index.
@@ -227,8 +227,9 @@
# There may be duplicates, but I don't think it is worth worrying
# about
self._nodes[reference] = ('a', (), '')
+ self._absent_keys.update(absent_references)
+ self._absent_keys.discard(key)
self._nodes[key] = ('', node_refs, value)
- self._keys.add(key)
if self._nodes_by_key is not None and self._key_length > 1:
self._update_nodes_by_key(key, value, node_refs)
@@ -243,7 +244,8 @@
lines = [_SIGNATURE]
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
- lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
+ key_count = len(self._nodes) - len(self._absent_keys)
+ lines.append(_OPTION_LEN + str(key_count) + '\n')
prefix_length = sum(len(x) for x in lines)
# references are byte offsets. To avoid having to do nasty
# polynomial work to resolve offsets (references to later in the
@@ -463,7 +465,6 @@
node_value = value
self._nodes[key] = node_value
# cache the keys for quick set intersections
- self._keys = set(self._nodes)
if trailers != 1:
# there must be one line - the empty trailer line.
raise errors.BadIndexData(self)
@@ -484,10 +485,11 @@
raise ValueError('No ref list %d, index has %d ref lists'
% (ref_list_num, self.node_ref_lists))
refs = set()
- for key, (value, ref_lists) in self._nodes.iteritems():
+ nodes = self._nodes
+ for key, (value, ref_lists) in nodes.iteritems():
ref_list = ref_lists[ref_list_num]
- refs.update(ref_list)
- return refs - self._keys
+ refs.update([ref for ref in ref_list if ref not in nodes])
+ return refs
def _get_nodes_by_key(self):
if self._nodes_by_key is None:
@@ -620,14 +622,17 @@
def _iter_entries_from_total_buffer(self, keys):
"""Iterate over keys when the entire index is parsed."""
- keys = keys.intersection(self._keys)
+ # Note: See the note in BTreeBuilder.iter_entries for why we don't use
+ # .intersection() here
+ nodes = self._nodes
+ keys = [key for key in keys if key in nodes]
if self.node_ref_lists:
for key in keys:
- value, node_refs = self._nodes[key]
+ value, node_refs = nodes[key]
yield self, key, value, node_refs
else:
for key in keys:
- yield self, key, self._nodes[key]
+ yield self, key, nodes[key]
def iter_entries(self, keys):
"""Iterate over keys within the index.
@@ -1507,15 +1512,18 @@
defined order for the result iteration - it will be in the most
efficient order for the index (keys iteration order in this case).
"""
- keys = set(keys)
+ # Note: See BTreeBuilder.iter_entries for an explanation of why we
+ # aren't using set().intersection() here
+ nodes = self._nodes
+ keys = [key for key in keys if key in nodes]
if self.reference_lists:
- for key in keys.intersection(self._keys):
- node = self._nodes[key]
+ for key in keys:
+ node = nodes[key]
if not node[0]:
yield self, key, node[2], node[1]
else:
- for key in keys.intersection(self._keys):
- node = self._nodes[key]
+ for key in keys:
+ node = nodes[key]
if not node[0]:
yield self, key, node[2]
@@ -1595,7 +1603,7 @@
For InMemoryGraphIndex the estimate is exact.
"""
- return len(self._keys)
+ return len(self._nodes) - len(self._absent_keys)
def validate(self):
"""In memory index's have no known corruption at the moment."""
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2009-10-29 05:54:49 +0000
+++ b/bzrlib/knit.py 2009-11-30 21:21:07 +0000
@@ -69,6 +69,7 @@
lru_cache,
pack,
progress,
+ static_tuple,
trace,
tsort,
tuned_gzip,
@@ -2944,10 +2945,15 @@
if not random_id:
present_nodes = self._get_entries(keys)
for (index, key, value, node_refs) in present_nodes:
+ parents = node_refs[:1]
+ # Sometimes these are passed as a list rather than a tuple
+ passed = static_tuple.as_tuples(keys[key])
+ passed_parents = passed[1][:1]
if (value[0] != keys[key][0][0] or
- node_refs[:1] != keys[key][1][:1]):
+ parents != passed_parents):
+ node_refs = static_tuple.as_tuples(node_refs)
raise KnitCorrupt(self, "inconsistent details in add_records"
- ": %s %s" % ((value, node_refs), keys[key]))
+ ": %s %s" % ((value, node_refs), passed))
del keys[key]
result = []
if self._parents:
=== modified file 'bzrlib/static_tuple.py'
--- a/bzrlib/static_tuple.py 2009-11-02 17:39:39 +0000
+++ b/bzrlib/static_tuple.py 2009-11-28 21:54:08 +0000
@@ -40,3 +40,17 @@
if type(obj) is not StaticTuple:
raise TypeError('We expected a StaticTuple not a %s' % (type(obj),))
return obj
+
+
+def as_tuples(obj):
+ """Ensure that the object and any referenced objects are plain tuples.
+
+ :param obj: a list, tuple or StaticTuple
+ :return: a plain tuple instance, with all children also being tuples.
+ """
+ result = []
+ for item in obj:
+ if isinstance(item, (tuple, list, StaticTuple)):
+ item = as_tuples(item)
+ result.append(item)
+ return tuple(result)
=== modified file 'bzrlib/tests/test__static_tuple.py'
--- a/bzrlib/tests/test__static_tuple.py 2009-11-02 17:15:20 +0000
+++ b/bzrlib/tests/test__static_tuple.py 2009-11-28 21:54:08 +0000
@@ -148,9 +148,34 @@
k = self.module.StaticTuple('foo')
t = k.as_tuple()
self.assertEqual(('foo',), t)
+ self.assertIsInstance(t, tuple)
+ self.assertFalse(isinstance(t, self.module.StaticTuple))
k = self.module.StaticTuple('foo', 'bar')
t = k.as_tuple()
self.assertEqual(('foo', 'bar'), t)
+ k2 = self.module.StaticTuple(1, k)
+ t = k2.as_tuple()
+ self.assertIsInstance(t, tuple)
+ # For pickling to work, we need to keep the sub-items as StaticTuple so
+ # that it knows that they also need to be converted.
+ self.assertIsInstance(t[1], self.module.StaticTuple)
+ self.assertEqual((1, ('foo', 'bar')), t)
+
+ def test_as_tuples(self):
+ k1 = self.module.StaticTuple('foo', 'bar')
+ t = static_tuple.as_tuples(k1)
+ self.assertIsInstance(t, tuple)
+ self.assertEqual(('foo', 'bar'), t)
+ k2 = self.module.StaticTuple(1, k1)
+ t = static_tuple.as_tuples(k2)
+ self.assertIsInstance(t, tuple)
+ self.assertIsInstance(t[1], tuple)
+ self.assertEqual((1, ('foo', 'bar')), t)
+ mixed = (1, k1)
+ t = static_tuple.as_tuples(mixed)
+ self.assertIsInstance(t, tuple)
+ self.assertIsInstance(t[1], tuple)
+ self.assertEqual((1, ('foo', 'bar')), t)
def test_len(self):
k = self.module.StaticTuple()
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-10-29 16:15:43 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-11-07 01:58:11 +0000
@@ -359,23 +359,19 @@
# Test the parts of the index that take up memory are doing so
# predictably.
self.assertEqual(1, len(builder._nodes))
- self.assertEqual(1, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
builder.add_node(*nodes[1])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(1, len(builder._backing_indices))
self.assertEqual(2, builder._backing_indices[0].key_count())
# now back to memory
builder.add_node(*nodes[2])
self.assertEqual(1, len(builder._nodes))
- self.assertEqual(1, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
# And spills to a second backing index combing all
builder.add_node(*nodes[3])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(2, len(builder._backing_indices))
self.assertEqual(None, builder._backing_indices[0])
@@ -384,7 +380,6 @@
builder.add_node(*nodes[4])
builder.add_node(*nodes[5])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(2, len(builder._backing_indices))
self.assertEqual(2, builder._backing_indices[0].key_count())
@@ -448,23 +443,19 @@
# Test the parts of the index that take up memory are doing so
# predictably.
self.assertEqual(1, len(builder._nodes))
- self.assertEqual(1, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
builder.add_node(*nodes[1])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(1, len(builder._backing_indices))
self.assertEqual(2, builder._backing_indices[0].key_count())
# now back to memory
builder.add_node(*nodes[2])
self.assertEqual(1, len(builder._nodes))
- self.assertEqual(1, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
# And spills to a second backing index but doesn't combine
builder.add_node(*nodes[3])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(2, len(builder._backing_indices))
for backing_index in builder._backing_indices:
@@ -473,7 +464,6 @@
builder.add_node(*nodes[4])
builder.add_node(*nodes[5])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(3, len(builder._backing_indices))
for backing_index in builder._backing_indices:
@@ -538,11 +528,9 @@
builder.add_node(*nodes[0])
# Test the parts of the index that take up memory are doing so
# predictably.
- self.assertEqual(1, len(builder._keys))
self.assertEqual(1, len(builder._nodes))
self.assertIs(None, builder._nodes_by_key)
builder.add_node(*nodes[1])
- self.assertEqual(0, len(builder._keys))
self.assertEqual(0, len(builder._nodes))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(1, len(builder._backing_indices))
@@ -551,7 +539,6 @@
old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
builder.add_node(*nodes[2])
self.assertEqual(1, len(builder._nodes))
- self.assertEqual(1, len(builder._keys))
self.assertIsNot(None, builder._nodes_by_key)
self.assertNotEqual({}, builder._nodes_by_key)
# We should have a new entry
@@ -559,7 +546,6 @@
# And spills to a second backing index combing all
builder.add_node(*nodes[3])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(2, len(builder._backing_indices))
self.assertEqual(None, builder._backing_indices[0])
@@ -568,7 +554,6 @@
builder.add_node(*nodes[4])
builder.add_node(*nodes[5])
self.assertEqual(0, len(builder._nodes))
- self.assertEqual(0, len(builder._keys))
self.assertIs(None, builder._nodes_by_key)
self.assertEqual(2, len(builder._backing_indices))
self.assertEqual(2, builder._backing_indices[0].key_count())
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2009-10-19 15:45:10 +0000
+++ b/bzrlib/tests/test_index.py 2009-11-07 01:58:11 +0000
@@ -235,7 +235,7 @@
builder.add_node(('2-key', ), '', (references, ))
stream = builder.finish()
contents = stream.read()
- self.assertEqual(
+ self.assertEqualDiff(
"Bazaar Graph Index 1\nnode_ref_lists=1\nkey_elements=1\nlen=1\n"
"0\x00a\x00\x00\n"
"1\x00a\x00\x00\n"
More information about the bazaar-commits
mailing list