Rev 3523: Refactor the large function into multiple small ones. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

John Arbash Meinel john at arbash-meinel.com
Mon Jun 30 23:55:04 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

------------------------------------------------------------
revno: 3523
revision-id: john at arbash-meinel.com-20080630225434-z1fw5e24joflaba1
parent: john at arbash-meinel.com-20080630222058-vomk3xczseqxwt9a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: multi_walker
timestamp: Mon 2008-06-30 17:54:34 -0500
message:
  Refactor the large function into multiple small ones.
-------------- next part --------------
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py	2008-06-30 22:20:58 +0000
+++ b/bzrlib/tree.py	2008-06-30 22:54:34 +0000
@@ -910,6 +910,12 @@
 class MultiWalker(object):
     """Walk multiple trees simultaneously, getting combined results."""
 
+    # Note: This could be written to not assume you can do out-of-order
+    #       lookups. Instead any nodes that don't match in all trees could be
+    #       marked as 'deferred', and then returned in the final cleanup loop.
+    #       For now, I think it is "nicer" to return things as close to the
+    #       "master_tree" order as we can.
+
     def __init__(self, master_tree, other_trees):
         """Create a new MultiWalker.
 
@@ -971,11 +977,13 @@
         if not isinstance(path2, unicode):
             raise TypeError("'path2' must be a unicode string, not %s: %r"
                             % (type(path2), path2))
-        dirname1, basename1 = os.path.split(path1)
-        key1 = (dirname1.split(u'/'), basename1)
-        dirname2, basename2 = os.path.split(path2)
-        key2 = (dirname2.split(u'/'), basename2)
-        return cmp(key1, key2)
+        return cmp(MultiWalker._path_key(path1), MultiWalker._path_key(path2))
+
+    @staticmethod
+    def _path_key(other):
+        path = other[0]
+        dirname, basename = os.path.split(path)
+        return (dirname.split(u'/'), basename)
 
     def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
         """Lookup an inventory entry by file_id.
@@ -1011,82 +1019,109 @@
 
     def iter_all(self):
         """Match up the values in the different trees."""
+        for result in self._walk_master_tree():
+            yield result
+        self._finish_others()
+        for result in self._walk_others():
+            yield result
+
+    def _walk_master_tree(self):
+        """First pass, walk all trees in lock-step.
+        
+        When we are done, all nodes in the master_tree will have been
+        processed. _other_walkers, _other_entries, and _others_extra will be
+        set on 'self' for future processing.
+        """
+        # This iterator has the most "inlining" done, because it tends to touch
+        # every file in the tree, while the others only hit nodes that don't
+        # match.
         master_iterator = self._master_tree.iter_entries_by_dir()
 
         other_walkers = [other.iter_entries_by_dir()
                          for other in self._other_trees]
         other_entries = [self._step_one(walker) for walker in other_walkers]
-
         # Track extra nodes in the other trees
         others_extra = [{} for i in xrange(len(self._other_trees))]
 
         master_has_more = True
+        step_one = self._step_one
+        lookup_by_file_id = self._lookup_by_file_id
+        out_of_order_processed = self._out_of_order_processed
+
         while master_has_more:
-            (master_has_more, master_path,
-             master_ie) = self._step_one(master_iterator)
+            (master_has_more, path, master_ie) = step_one(master_iterator)
             if not master_has_more:
                 break
 
             file_id = master_ie.file_id
             other_values = []
+            other_values_append = other_values.append
             next_other_entries = []
+            next_other_entries_append = next_other_entries.append
             for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
                 if not other_has_more:
-                    other_values.append(self._lookup_by_file_id(
+                    other_values_append(lookup_by_file_id(
                         others_extra[idx], self._other_trees[idx], file_id))
-                    next_other_entries.append((False, None, None))
+                    next_other_entries_append((False, None, None))
                 elif file_id == other_ie.file_id:
-                    # This walker matched, so consume this path, and go on to
-                    # the next
-                    other_values.append((other_path, other_ie))
-                    next_other_entries.append(self._step_one(other_walkers[idx]))
+                    # This is the critical code path, as most of the entries
+                    # should match between most trees.
+                    other_values_append((other_path, other_ie))
+                    next_other_entries_append(step_one(other_walkers[idx]))
                 else:
                     # This walker did not match, step it until it either
                     # matches, or we know we are past the current walker.
+                    other_walker = other_walkers[idx]
+                    other_extra = others_extra[idx]
                     while (other_has_more and
-                           self._cmp_path_by_dirblock(other_path, master_path) < 0):
+                           self._cmp_path_by_dirblock(other_path, path) < 0):
                         other_file_id = other_ie.file_id
-                        if other_file_id not in self._out_of_order_processed:
-                            others_extra[idx][other_file_id] = (other_path,
-                                                                other_ie)
+                        if other_file_id not in out_of_order_processed:
+                            other_extra[other_file_id] = (other_path, other_ie)
                         other_has_more, other_path, other_ie = \
-                            self._step_one(other_walkers[idx])
+                            step_one(other_walker)
                     if other_has_more and other_ie.file_id == file_id:
-                        # We ended up walking to this point, match and continue
-                        other_values.append((other_path, other_ie))
+                        # We ended up walking to this point, match and step
+                        # again
+                        other_values_append((other_path, other_ie))
                         other_has_more, other_path, other_ie = \
-                            self._step_one(other_walkers[idx])
+                            step_one(other_walker)
                     else:
                         # This record isn't in the normal order, see if it
-                        # exists at all,
-                        other_values.append(self._lookup_by_file_id(
-                            others_extra[idx], self._other_trees[idx], file_id))
-                    next_other_entries.append((other_has_more, other_path,
+                        # exists at all.
+                        other_values_append(lookup_by_file_id(
+                            other_extra, self._other_trees[idx], file_id))
+                    next_other_entries_append((other_has_more, other_path,
                                                other_ie))
             other_entries = next_other_entries
 
             # We've matched all the walkers, yield this datapoint
-            yield master_path, file_id, master_ie, other_values
+            yield path, file_id, master_ie, other_values
+        self._other_walkers = other_walkers
+        self._other_entries = other_entries
+        self._others_extra = others_extra
 
-        # Make sure we finished iterating all the other trees
-        for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
+    def _finish_others(self):
+        """Finish walking the other iterators, so we get all entries."""
+        for idx, info in enumerate(self._other_entries):
+            other_extra = self._others_extra[idx]
+            (other_has_more, other_path, other_ie) = info
             while other_has_more:
                 other_file_id = other_ie.file_id
                 if other_file_id not in self._out_of_order_processed:
-                    others_extra[idx][other_file_id] = (other_path, other_ie)
+                    other_extra[other_file_id] = (other_path, other_ie)
                 other_has_more, other_path, other_ie = \
-                    self._step_one(other_walkers[idx])
-
-        # We have walked all of the master tree, now we want to find any extra
-        # nodes in the other trees
-        def path_key(other):
-            path = other[0]
-            dirname, basename = os.path.split(path)
-            return (dirname.split(u'/'), basename)
-
-        for idx, other_extra in enumerate(others_extra):
-            # TODO: we could use a key=XXX rather than cmp=XXX
-            others = sorted(other_extra.itervalues(), key=path_key)
+                    self._step_one(self._other_walkers[idx])
+        del self._other_entries
+
+    def _walk_others(self):
+        """Finish up by walking all the 'deferred' nodes."""
+        # TODO: One alternative would be to grab all possible unprocessed
+        #       file_ids, and then sort by path, and then yield them. That
+        #       might ensure better ordering, in case a caller strictly
+        #       requires parents before children.
+        for idx, other_extra in enumerate(self._others_extra):
+            others = sorted(other_extra.itervalues(), key=self._path_key)
             for other_path, other_ie in others:
                 file_id = other_ie.file_id
                 # We don't need to check out_of_order_processed here, because
@@ -1095,9 +1130,10 @@
                 other_extra.pop(file_id)
                 other_values = [(None, None) for i in xrange(idx)]
                 other_values.append((other_path, other_ie))
-                for alt_idx, alt_extra in enumerate(others_extra[idx+1:]):
+                for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]):
                     alt_idx = alt_idx + idx + 1
+                    alt_extra = self._others_extra[alt_idx]
+                    alt_tree = self._other_trees[alt_idx]
                     other_values.append(self._lookup_by_file_id(
-                        others_extra[alt_idx], self._other_trees[alt_idx],
-                        file_id))
+                                            alt_extra, alt_tree, file_id))
                 yield other_path, file_id, None, other_values



More information about the bazaar-commits mailing list