Rev 3517: Handle when the other tree has extra nodes, and we need to yield them. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 30 22:28:21 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker
------------------------------------------------------------
revno: 3517
revision-id: john at arbash-meinel.com-20080630212750-h05siuu3vd40dhhf
parent: john at arbash-meinel.com-20080630194801-gdodhpossmf1n85a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: multi_walker
timestamp: Mon 2008-06-30 16:27:50 -0500
message:
Handle when the other tree has extra nodes, and we need to yield them.
-------------- next part --------------
=== modified file 'bzrlib/tests/test_tree.py'
--- a/bzrlib/tests/test_tree.py 2008-06-30 19:48:01 +0000
+++ b/bzrlib/tests/test_tree.py 2008-06-30 21:27:50 +0000
@@ -172,6 +172,9 @@
tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
iterator = tree.iter_entries_by_dir()
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+
root_id = tree.path2id('')
self.assertStepOne(True, '', root_id, iterator)
self.assertStepOne(True, 'a', 'a-id', iterator)
@@ -180,31 +183,42 @@
self.assertStepOne(False, None, None, iterator)
self.assertStepOne(False, None, None, iterator)
- def assertWalkerNext(self, exp_path, exp_file_id, master_is_None,
+ def assertWalkerNext(self, exp_path, exp_file_id, master_has_node,
exp_other_paths, iterator):
"""Check what happens when we step the iterator.
:param path: The path for this entry
:param file_id: The file_id for this entry
- :param master_is_None: Is this node None for the master tree?
+ :param master_has_node: Does the master tree have this entry?
:param exp_other_paths: A list of other_path values.
:param iterator: The iterator to step
"""
path, file_id, master_ie, other_values = iterator.next()
- self.assertEqual(exp_path, path)
- self.assertEqual(exp_file_id, file_id)
- if master_is_None:
- self.assertIs(None, master_ie)
+ self.assertEqual((exp_path, exp_file_id), (path, file_id),
+ 'Master entry did not match')
+ if master_has_node:
+ self.assertIsNot(None, master_ie, 'master should have an entry')
else:
- self.assertIsNot(None, master_ie)
- self.assertEqual(len(exp_other_paths), len(other_values))
+ self.assertIs(None, master_ie, 'master should not have an entry')
+ self.assertEqual(len(exp_other_paths), len(other_values),
+ 'Wrong number of other entries')
for exp_other_path, (other_path, other_ie) in \
zip(exp_other_paths, other_values):
- self.assertEqual(exp_other_path, other_path)
+ self.assertEqual(exp_other_path, other_path, "Other path incorrect")
if exp_other_path is None:
- self.assertIs(None, other_ie)
+ self.assertIs(None, other_ie, "Other should not have an entry")
else:
- self.assertEqual(file_id, other_ie.file_id)
+ self.assertEqual(file_id, other_ie.file_id,
+ "Incorrect other entry")
+
+ def lock_and_get_basis_and_root_id(self, tree):
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ basis_tree = tree.basis_tree()
+ basis_tree.lock_read()
+ self.addCleanup(basis_tree.unlock)
+ root_id = tree.path2id('')
+ return basis_tree, root_id
def test_simple_stepping(self):
tree = self.make_branch_and_tree('tree')
@@ -212,20 +226,15 @@
tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
tree.commit('first', rev_id='first-rev-id')
- basis_tree = tree.basis_tree()
- tree.lock_read()
- self.addCleanup(tree.unlock)
- basis_tree.lock_read()
- self.addCleanup(basis_tree.unlock)
- root_id = tree.path2id('')
+ basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
walker = _mod_tree.MultiWalker(tree, [basis_tree])
iterator = walker.iter_all()
- self.assertWalkerNext(u'', root_id, False, [u''], iterator)
- self.assertWalkerNext(u'a', 'a-id', False, [u'a'], iterator)
- self.assertWalkerNext(u'b', 'b-id', False, [u'b'], iterator)
- self.assertWalkerNext(u'b/c', 'c-id', False, [u'b/c'], iterator)
+ self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+ self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+ self.assertWalkerNext(u'b', 'b-id', True, [u'b'], iterator)
+ self.assertWalkerNext(u'b/c', 'c-id', True, [u'b/c'], iterator)
self.assertRaises(StopIteration, iterator.next)
def test_master_has_extra(self):
@@ -234,20 +243,66 @@
tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
tree.commit('first', rev_id='first-rev-id')
- basis_tree = tree.basis_tree()
tree.add(['c'], ['c-id'])
- tree.lock_read()
- self.addCleanup(tree.unlock)
- basis_tree.lock_read()
- self.addCleanup(basis_tree.unlock)
-
- root_id = tree.path2id('')
- walker = _mod_tree.MultiWalker(tree, [basis_tree])
- iterator = walker.iter_all()
- self.assertWalkerNext(u'', root_id, False, [u''], iterator)
- self.assertWalkerNext(u'a', 'a-id', False, [u'a'], iterator)
+ basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+ walker = _mod_tree.MultiWalker(tree, [basis_tree])
+ iterator = walker.iter_all()
+ self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+ self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+ self.assertWalkerNext(u'b', 'b-id', True, [u'b'], iterator)
+ self.assertWalkerNext(u'c', 'c-id', True, [None], iterator)
+ self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
+ self.assertRaises(StopIteration, iterator.next)
+
+ def test_master_renamed_to_earlier(self):
+ """The record is still present, it just shows up early."""
+ tree = self.make_branch_and_tree('tree')
+ self.build_tree(['tree/a', 'tree/c', 'tree/d'])
+ tree.add(['a', 'c', 'd'], ['a-id', 'c-id', 'd-id'])
+ tree.commit('first', rev_id='first-rev-id')
+ tree.rename_one('d', 'b')
+
+ basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+ walker = _mod_tree.MultiWalker(tree, [basis_tree])
+ iterator = walker.iter_all()
+ self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+ self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+ self.assertWalkerNext(u'b', 'd-id', True, [u'd'], iterator)
+ self.assertWalkerNext(u'c', 'c-id', True, [u'c'], iterator)
+ self.assertRaises(StopIteration, iterator.next)
+
+ def test_master_renamed_to_later(self):
+ tree = self.make_branch_and_tree('tree')
+ self.build_tree(['tree/a', 'tree/b', 'tree/d'])
+ tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
+ tree.commit('first', rev_id='first-rev-id')
+ tree.rename_one('b', 'e')
+
+ basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+ walker = _mod_tree.MultiWalker(tree, [basis_tree])
+ iterator = walker.iter_all()
+ self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+ self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+ self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
+ self.assertWalkerNext(u'e', 'b-id', True, [u'b'], iterator)
+ self.assertRaises(StopIteration, iterator.next)
+
+ def test_other_extra(self):
+ tree = self.make_branch_and_tree('tree')
+ self.build_tree(['tree/a', 'tree/b', 'tree/d'])
+ tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
+ tree.commit('first', rev_id='first-rev-id')
+ tree.remove(['b'])
+
+ basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+ walker = _mod_tree.MultiWalker(tree, [basis_tree])
+ iterator = walker.iter_all()
+ self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+ self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+ self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
self.assertWalkerNext(u'b', 'b-id', False, [u'b'], iterator)
- self.assertWalkerNext(u'c', 'c-id', False, [None], iterator)
- self.assertWalkerNext(u'd', 'd-id', False, [u'd'], iterator)
self.assertRaises(StopIteration, iterator.next)
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py 2008-06-30 19:48:01 +0000
+++ b/bzrlib/tree.py 2008-06-30 21:27:50 +0000
@@ -954,6 +954,9 @@
0 if paths are equal
and a positive number if ``path2`` sorts first
"""
+ # Shortcut this special case
+ if path1 == path2:
+ return 0
# This is stolen from _dirstate_helpers_py.py, only switching it to
# Unicode objects. Consider using encode_utf8() and then using the
# optimized versions, or maybe writing optimized unicode versions.
@@ -964,9 +967,9 @@
raise TypeError("'path2' must be a unicode string, not %s: %r"
% (type(path2), path2))
dirname1, basename1 = os.path.split(path1)
- key1 = (dirname1.split('/'), basename1)
+ key1 = (dirname1.split(u'/'), basename1)
dirname2, basename2 = os.path.split(path2)
- key2 = (dirname2.split('/'), basename2)
+ key2 = (dirname2.split(u'/'), basename2)
return cmp(key1, key2)
def iter_all(self):
@@ -977,6 +980,25 @@
for other in self._other_trees]
other_entries = [self._step_one(walker) for walker in other_walkers]
+ # Track extra nodes in the other trees
+ others_extra = [{} for i in xrange(len(self._other_trees))]
+
+ def lookup_by_file_id(idx, file_id):
+ # TODO: Is id2path better as the first call, or is
+ # inventory[file_id] better as a first check?
+ if file_id in others_extra[idx]:
+ return others_extra[idx].pop(file_id)
+ try:
+ cur_path = self._other_trees[idx].id2path(file_id)
+ except errors.NoSuchId:
+ cur_path = None
+ if cur_path is None:
+ return (None, None)
+ else:
+ cur_ie = self._other_trees[idx].inventory[file_id]
+ return (cur_path, cur_ie)
+
+
master_has_more = True
while master_has_more:
@@ -988,33 +1010,52 @@
file_id = master_ie.file_id
other_values = []
next_other_entries = []
- for other_walker, (other_has_more, other_path, other_ie) in \
- zip(other_walkers, other_entries):
+ for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
if not other_has_more:
- other_values.append((None, None))
- next_other_entries.append(False, None, None)
+ other_values.append(lookup_by_file_id(idx, file_id))
+ next_other_entries.append((False, None, None))
elif file_id == other_ie.file_id:
# This walker matched, so consume this path, and go on to
# the next
other_values.append((other_path, other_ie))
- next_other_entries.append(self._step_one(other_walker))
+ next_other_entries.append(self._step_one(other_walkers[idx]))
else:
# This walker did not match, step it until it either
# matches, or we know we are past the current walker.
while (other_has_more and
self._cmp_path_by_dirblock(other_path, master_path) < 0):
+ other_file_id = other_ie.file_id
+ others_extra[idx][other_file_id] = (other_path,
+ other_ie)
other_has_more, other_path, other_ie = \
- self._step_one(other_walker)
+ self._step_one(other_walkers[idx])
if other_has_more and other_ie.file_id == file_id:
# We ended up walking to this point, match and continue
other_values.append((other_path, other_ie))
other_has_more, other_path, other_ie = \
- self._step_one(other_walker)
+ self._step_one(other_walkers[idx])
else:
- other_values.append((None, None))
+ # This record isn't in the normal order, see if it
+ # exists at all,
+ other_values.append(lookup_by_file_id(idx, file_id))
next_other_entries.append((other_has_more, other_path,
other_ie))
other_entries = next_other_entries
# We've matched all the walkers, yield this datapoint
yield master_path, file_id, master_ie, other_values
+
+ # We have walked all of the master tree, now we want to find any extra
+ # nodes in the other trees
+ for idx, other_extra in enumerate(others_extra):
+ # TODO: we could use a key=XXX rather than cmp=XXX
+ others = sorted(other_extra.itervalues(),
+ cmp=self._cmp_path_by_dirblock)
+ for other_path, other_ie in others:
+ file_id = other_ie.file_id
+ other_extra.pop(file_id)
+ other_values = [(other_path, other_ie)]
+ for alt_idx, alt_extra in enumerate(others_extra[idx+1:]):
+ other_values.append(lookup_by_file_id(alt_idx + idx,
+ file_id))
+ yield other_path, file_id, None, other_values
More information about the bazaar-commits
mailing list