Rev 3523: Refactor the large function into multiple small ones. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 30 23:55:04 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker
------------------------------------------------------------
revno: 3523
revision-id: john at arbash-meinel.com-20080630225434-z1fw5e24joflaba1
parent: john at arbash-meinel.com-20080630222058-vomk3xczseqxwt9a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: multi_walker
timestamp: Mon 2008-06-30 17:54:34 -0500
message:
Refactor the large function into multiple small ones.
-------------- next part --------------
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py 2008-06-30 22:20:58 +0000
+++ b/bzrlib/tree.py 2008-06-30 22:54:34 +0000
@@ -910,6 +910,12 @@
class MultiWalker(object):
"""Walk multiple trees simultaneously, getting combined results."""
+ # Note: This could be written to not assume you can do out-of-order
+ # lookups. Instead any nodes that don't match in all trees could be
+ # marked as 'deferred', and then returned in the final cleanup loop.
+ # For now, I think it is "nicer" to return things as close to the
+ # "master_tree" order as we can.
+
def __init__(self, master_tree, other_trees):
"""Create a new MultiWalker.
@@ -971,11 +977,13 @@
if not isinstance(path2, unicode):
raise TypeError("'path2' must be a unicode string, not %s: %r"
% (type(path2), path2))
- dirname1, basename1 = os.path.split(path1)
- key1 = (dirname1.split(u'/'), basename1)
- dirname2, basename2 = os.path.split(path2)
- key2 = (dirname2.split(u'/'), basename2)
- return cmp(key1, key2)
+ return cmp(MultiWalker._path_key(path1), MultiWalker._path_key(path2))
+
+ @staticmethod
+ def _path_key(other):
+ path = other[0]
+ dirname, basename = os.path.split(path)
+ return (dirname.split(u'/'), basename)
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
"""Lookup an inventory entry by file_id.
@@ -1011,82 +1019,109 @@
def iter_all(self):
"""Match up the values in the different trees."""
+ for result in self._walk_master_tree():
+ yield result
+ self._finish_others()
+ for result in self._walk_others():
+ yield result
+
+ def _walk_master_tree(self):
+ """First pass, walk all trees in lock-step.
+
+ When we are done, all nodes in the master_tree will have been
+ processed. _other_walkers, _other_entries, and _others_extra will be
+ set on 'self' for future processing.
+ """
+ # This iterator has the most "inlining" done, because it tends to touch
+ # every file in the tree, while the others only hit nodes that don't
+ # match.
master_iterator = self._master_tree.iter_entries_by_dir()
other_walkers = [other.iter_entries_by_dir()
for other in self._other_trees]
other_entries = [self._step_one(walker) for walker in other_walkers]
-
# Track extra nodes in the other trees
others_extra = [{} for i in xrange(len(self._other_trees))]
master_has_more = True
+ step_one = self._step_one
+ lookup_by_file_id = self._lookup_by_file_id
+ out_of_order_processed = self._out_of_order_processed
+
while master_has_more:
- (master_has_more, master_path,
- master_ie) = self._step_one(master_iterator)
+ (master_has_more, path, master_ie) = step_one(master_iterator)
if not master_has_more:
break
file_id = master_ie.file_id
other_values = []
+ other_values_append = other_values.append
next_other_entries = []
+ next_other_entries_append = next_other_entries.append
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
if not other_has_more:
- other_values.append(self._lookup_by_file_id(
+ other_values_append(lookup_by_file_id(
others_extra[idx], self._other_trees[idx], file_id))
- next_other_entries.append((False, None, None))
+ next_other_entries_append((False, None, None))
elif file_id == other_ie.file_id:
- # This walker matched, so consume this path, and go on to
- # the next
- other_values.append((other_path, other_ie))
- next_other_entries.append(self._step_one(other_walkers[idx]))
+ # This is the critical code path, as most of the entries
+ # should match between most trees.
+ other_values_append((other_path, other_ie))
+ next_other_entries_append(step_one(other_walkers[idx]))
else:
# This walker did not match, step it until it either
# matches, or we know we are past the current walker.
+ other_walker = other_walkers[idx]
+ other_extra = others_extra[idx]
while (other_has_more and
- self._cmp_path_by_dirblock(other_path, master_path) < 0):
+ self._cmp_path_by_dirblock(other_path, path) < 0):
other_file_id = other_ie.file_id
- if other_file_id not in self._out_of_order_processed:
- others_extra[idx][other_file_id] = (other_path,
- other_ie)
+ if other_file_id not in out_of_order_processed:
+ other_extra[other_file_id] = (other_path, other_ie)
other_has_more, other_path, other_ie = \
- self._step_one(other_walkers[idx])
+ step_one(other_walker)
if other_has_more and other_ie.file_id == file_id:
- # We ended up walking to this point, match and continue
- other_values.append((other_path, other_ie))
+ # We ended up walking to this point, match and step
+ # again
+ other_values_append((other_path, other_ie))
other_has_more, other_path, other_ie = \
- self._step_one(other_walkers[idx])
+ step_one(other_walker)
else:
# This record isn't in the normal order, see if it
- # exists at all,
- other_values.append(self._lookup_by_file_id(
- others_extra[idx], self._other_trees[idx], file_id))
- next_other_entries.append((other_has_more, other_path,
+ # exists at all.
+ other_values_append(lookup_by_file_id(
+ other_extra, self._other_trees[idx], file_id))
+ next_other_entries_append((other_has_more, other_path,
other_ie))
other_entries = next_other_entries
# We've matched all the walkers, yield this datapoint
- yield master_path, file_id, master_ie, other_values
+ yield path, file_id, master_ie, other_values
+ self._other_walkers = other_walkers
+ self._other_entries = other_entries
+ self._others_extra = others_extra
- # Make sure we finished iterating all the other trees
- for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
+ def _finish_others(self):
+ """Finish walking the other iterators, so we get all entries."""
+ for idx, info in enumerate(self._other_entries):
+ other_extra = self._others_extra[idx]
+ (other_has_more, other_path, other_ie) = info
while other_has_more:
other_file_id = other_ie.file_id
if other_file_id not in self._out_of_order_processed:
- others_extra[idx][other_file_id] = (other_path, other_ie)
+ other_extra[other_file_id] = (other_path, other_ie)
other_has_more, other_path, other_ie = \
- self._step_one(other_walkers[idx])
-
- # We have walked all of the master tree, now we want to find any extra
- # nodes in the other trees
- def path_key(other):
- path = other[0]
- dirname, basename = os.path.split(path)
- return (dirname.split(u'/'), basename)
-
- for idx, other_extra in enumerate(others_extra):
- # TODO: we could use a key=XXX rather than cmp=XXX
- others = sorted(other_extra.itervalues(), key=path_key)
+ self._step_one(self._other_walkers[idx])
+ del self._other_entries
+
+ def _walk_others(self):
+ """Finish up by walking all the 'deferred' nodes."""
+ # TODO: One alternative would be to grab all possible unprocessed
+ # file_ids, and then sort by path, and then yield them. That
+ # might ensure better ordering, in case a caller strictly
+ # requires parents before children.
+ for idx, other_extra in enumerate(self._others_extra):
+ others = sorted(other_extra.itervalues(), key=self._path_key)
for other_path, other_ie in others:
file_id = other_ie.file_id
# We don't need to check out_of_order_processed here, because
@@ -1095,9 +1130,10 @@
other_extra.pop(file_id)
other_values = [(None, None) for i in xrange(idx)]
other_values.append((other_path, other_ie))
- for alt_idx, alt_extra in enumerate(others_extra[idx+1:]):
+ for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]):
alt_idx = alt_idx + idx + 1
+ alt_extra = self._others_extra[alt_idx]
+ alt_tree = self._other_trees[alt_idx]
other_values.append(self._lookup_by_file_id(
- others_extra[alt_idx], self._other_trees[alt_idx],
- file_id))
+ alt_extra, alt_tree, file_id))
yield other_path, file_id, None, other_values
More information about the bazaar-commits
mailing list