Rev 3652: Two quick tweaks. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup

John Arbash Meinel john at arbash-meinel.com
Mon Aug 25 20:02:17 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup

------------------------------------------------------------
revno: 3652
revision-id: john at arbash-meinel.com-20080825190216-vdkyinz5p5e5s8kq
parent: john at arbash-meinel.com-20080825183623-n5h3h1d5ky8yr7d6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index_builder_cleanup
timestamp: Mon 2008-08-25 14:02:16 -0500
message:
  Two quick tweaks.
  Change _iter_mem_nodes to use sorted order.
  That way we can sort purely on the 'key' which
  we know is the sort key anyway. This shaves off
  a *lot* of time spent in 'sorted()'.
  Also, if 'references' is in our output nodes,
  we know we've already checked that it is a valid
  key, so we don't need to check it again.
  This shaves another 600ms or so for a bzr.dev tree.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2008-08-25 16:42:50 +0000
+++ b/bzrlib/btree_index.py	2008-08-25 19:02:16 +0000
@@ -169,7 +169,10 @@
         node_refs = []
         for reference_list in references:
             for reference in reference_list:
-                self._check_key(reference)
+                # If reference is in self._nodes, then we have already checked
+                # it
+                if reference not in self._nodes:
+                    self._check_key(reference)
             node_refs.append(tuple(reference_list))
         if key in self._nodes:
             raise errors.BadIndexDuplicateKey(key, self)
@@ -190,7 +193,7 @@
             key_dict[key[-1]] = key_value
         if len(self._keys) < self._spill_at:
             return
-        iterators_to_combine = [iter(sorted(self._iter_mem_nodes()))]
+        iterators_to_combine = [self._iter_mem_nodes()]
         pos = -1
         for pos, backing in enumerate(self._backing_indices):
             if backing is None:
@@ -228,12 +231,15 @@
 
     def _iter_mem_nodes(self):
         """Iterate over the nodes held in memory."""
+        nodes = self._nodes
         if self.reference_lists:
-            return ((self, key, value, references)
-                    for key, (references, value) in self._nodes.iteritems())
+            for key in sorted(nodes):
+                references, value = nodes[key]
+                yield self, key, value, references
         else:
-            return ((self, key, value)
-                    for key, (references, value) in self._nodes.iteritems())
+            for key in sorted(nodes):
+                references, value = nodes[key]
+                yield self, key, value
 
     def _iter_smallest(self, iterators_to_combine):
         if len(iterators_to_combine) == 1:
@@ -422,7 +428,7 @@
                 "iter_all_entries scales with size of history.")
         # Doing serial rather than ordered would be faster; but this shouldn't
         # be getting called routinely anyway.
-        iterators = [iter(sorted(self._iter_mem_nodes()))]
+        iterators = [self._iter_mem_nodes()]
         for backing in self._backing_indices:
             if backing is not None:
                 iterators.append(backing.iter_all_entries())



More information about the bazaar-commits mailing list