Rev 3652: Two quick tweaks. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 25 20:02:17 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup
------------------------------------------------------------
revno: 3652
revision-id: john at arbash-meinel.com-20080825190216-vdkyinz5p5e5s8kq
parent: john at arbash-meinel.com-20080825183623-n5h3h1d5ky8yr7d6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index_builder_cleanup
timestamp: Mon 2008-08-25 14:02:16 -0500
message:
Two quick tweaks.
Change _iter_mem_nodes to use sorted order.
That way we can sort purely on the 'key' which
we know is the sort key anyway. This shaves off
a *lot* of time spent in 'sorted()'.
Also, if 'references' is in our output nodes,
we know we've already checked that it is a valid
key, so we don't need to check it again.
This shaves another 600ms or so for a bzr.dev tree.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-08-25 16:42:50 +0000
+++ b/bzrlib/btree_index.py 2008-08-25 19:02:16 +0000
@@ -169,7 +169,10 @@
node_refs = []
for reference_list in references:
for reference in reference_list:
- self._check_key(reference)
+ # If reference is in self._nodes, then we have already checked
+ # it
+ if reference not in self._nodes:
+ self._check_key(reference)
node_refs.append(tuple(reference_list))
if key in self._nodes:
raise errors.BadIndexDuplicateKey(key, self)
@@ -190,7 +193,7 @@
key_dict[key[-1]] = key_value
if len(self._keys) < self._spill_at:
return
- iterators_to_combine = [iter(sorted(self._iter_mem_nodes()))]
+ iterators_to_combine = [self._iter_mem_nodes()]
pos = -1
for pos, backing in enumerate(self._backing_indices):
if backing is None:
@@ -228,12 +231,15 @@
def _iter_mem_nodes(self):
"""Iterate over the nodes held in memory."""
+ nodes = self._nodes
if self.reference_lists:
- return ((self, key, value, references)
- for key, (references, value) in self._nodes.iteritems())
+ for key in sorted(nodes):
+ references, value = nodes[key]
+ yield self, key, value, references
else:
- return ((self, key, value)
- for key, (references, value) in self._nodes.iteritems())
+ for key in sorted(nodes):
+ references, value = nodes[key]
+ yield self, key, value
def _iter_smallest(self, iterators_to_combine):
if len(iterators_to_combine) == 1:
@@ -422,7 +428,7 @@
"iter_all_entries scales with size of history.")
# Doing serial rather than ordered would be faster; but this shouldn't
# be getting called routinely anyway.
- iterators = [iter(sorted(self._iter_mem_nodes()))]
+ iterators = [self._iter_mem_nodes()]
for backing in self._backing_indices:
if backing is not None:
iterators.append(backing.iter_all_entries())
More information about the bazaar-commits
mailing list