Rev 3517: Handle when the other tree has extra nodes, and we need to yield them. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

John Arbash Meinel john at arbash-meinel.com
Mon Jun 30 22:28:21 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

------------------------------------------------------------
revno: 3517
revision-id: john at arbash-meinel.com-20080630212750-h05siuu3vd40dhhf
parent: john at arbash-meinel.com-20080630194801-gdodhpossmf1n85a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: multi_walker
timestamp: Mon 2008-06-30 16:27:50 -0500
message:
  Handle when the other tree has extra nodes, and we need to yield them.
-------------- next part --------------
=== modified file 'bzrlib/tests/test_tree.py'
--- a/bzrlib/tests/test_tree.py	2008-06-30 19:48:01 +0000
+++ b/bzrlib/tests/test_tree.py	2008-06-30 21:27:50 +0000
@@ -172,6 +172,9 @@
         tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
 
         iterator = tree.iter_entries_by_dir()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+
         root_id = tree.path2id('')
         self.assertStepOne(True, '', root_id, iterator)
         self.assertStepOne(True, 'a', 'a-id', iterator)
@@ -180,31 +183,42 @@
         self.assertStepOne(False, None, None, iterator)
         self.assertStepOne(False, None, None, iterator)
 
-    def assertWalkerNext(self, exp_path, exp_file_id, master_is_None,
+    def assertWalkerNext(self, exp_path, exp_file_id, master_has_node,
                          exp_other_paths, iterator):
         """Check what happens when we step the iterator.
 
         :param path: The path for this entry
         :param file_id: The file_id for this entry
-        :param master_is_None: Is this node None for the master tree?
+        :param master_has_node: Does the master tree have this entry?
         :param exp_other_paths: A list of other_path values.
         :param iterator: The iterator to step
         """
         path, file_id, master_ie, other_values = iterator.next()
-        self.assertEqual(exp_path, path)
-        self.assertEqual(exp_file_id, file_id)
-        if master_is_None:
-            self.assertIs(None, master_ie)
+        self.assertEqual((exp_path, exp_file_id), (path, file_id),
+                         'Master entry did not match')
+        if master_has_node:
+            self.assertIsNot(None, master_ie, 'master should have an entry')
         else:
-            self.assertIsNot(None, master_ie)
-        self.assertEqual(len(exp_other_paths), len(other_values))
+            self.assertIs(None, master_ie, 'master should not have an entry')
+        self.assertEqual(len(exp_other_paths), len(other_values),
+                            'Wrong number of other entries')
         for exp_other_path, (other_path, other_ie) in \
                 zip(exp_other_paths, other_values):
-            self.assertEqual(exp_other_path, other_path)
+            self.assertEqual(exp_other_path, other_path, "Other path incorrect")
             if exp_other_path is None:
-                self.assertIs(None, other_ie)
+                self.assertIs(None, other_ie, "Other should not have an entry")
             else:
-                self.assertEqual(file_id, other_ie.file_id)
+                self.assertEqual(file_id, other_ie.file_id,
+                                 "Incorrect other entry")
+
+    def lock_and_get_basis_and_root_id(self, tree):
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        basis_tree = tree.basis_tree()
+        basis_tree.lock_read()
+        self.addCleanup(basis_tree.unlock)
+        root_id = tree.path2id('')
+        return basis_tree, root_id
 
     def test_simple_stepping(self):
         tree = self.make_branch_and_tree('tree')
@@ -212,20 +226,15 @@
         tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
 
         tree.commit('first', rev_id='first-rev-id')
-        basis_tree = tree.basis_tree()
-        tree.lock_read()
-        self.addCleanup(tree.unlock)
-        basis_tree.lock_read()
-        self.addCleanup(basis_tree.unlock)
 
-        root_id = tree.path2id('')
+        basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
 
         walker = _mod_tree.MultiWalker(tree, [basis_tree])
         iterator = walker.iter_all()
-        self.assertWalkerNext(u'', root_id, False, [u''], iterator)
-        self.assertWalkerNext(u'a', 'a-id', False, [u'a'], iterator)
-        self.assertWalkerNext(u'b', 'b-id', False, [u'b'], iterator)
-        self.assertWalkerNext(u'b/c', 'c-id', False, [u'b/c'], iterator)
+        self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+        self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+        self.assertWalkerNext(u'b', 'b-id', True, [u'b'], iterator)
+        self.assertWalkerNext(u'b/c', 'c-id', True, [u'b/c'], iterator)
         self.assertRaises(StopIteration, iterator.next)
 
     def test_master_has_extra(self):
@@ -234,20 +243,66 @@
         tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
 
         tree.commit('first', rev_id='first-rev-id')
-        basis_tree = tree.basis_tree()
 
         tree.add(['c'], ['c-id'])
-        tree.lock_read()
-        self.addCleanup(tree.unlock)
-        basis_tree.lock_read()
-        self.addCleanup(basis_tree.unlock)
-
-        root_id = tree.path2id('')
-        walker = _mod_tree.MultiWalker(tree, [basis_tree])
-        iterator = walker.iter_all()
-        self.assertWalkerNext(u'', root_id, False, [u''], iterator)
-        self.assertWalkerNext(u'a', 'a-id', False, [u'a'], iterator)
+        basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+        walker = _mod_tree.MultiWalker(tree, [basis_tree])
+        iterator = walker.iter_all()
+        self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+        self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+        self.assertWalkerNext(u'b', 'b-id', True, [u'b'], iterator)
+        self.assertWalkerNext(u'c', 'c-id', True, [None], iterator)
+        self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
+        self.assertRaises(StopIteration, iterator.next)
+
+    def test_master_renamed_to_earlier(self):
+        """The record is still present, it just shows up early."""
+        tree = self.make_branch_and_tree('tree')
+        self.build_tree(['tree/a', 'tree/c', 'tree/d'])
+        tree.add(['a', 'c', 'd'], ['a-id', 'c-id', 'd-id'])
+        tree.commit('first', rev_id='first-rev-id')
+        tree.rename_one('d', 'b')
+
+        basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+        walker = _mod_tree.MultiWalker(tree, [basis_tree])
+        iterator = walker.iter_all()
+        self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+        self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+        self.assertWalkerNext(u'b', 'd-id', True, [u'd'], iterator)
+        self.assertWalkerNext(u'c', 'c-id', True, [u'c'], iterator)
+        self.assertRaises(StopIteration, iterator.next)
+
+    def test_master_renamed_to_later(self):
+        tree = self.make_branch_and_tree('tree')
+        self.build_tree(['tree/a', 'tree/b', 'tree/d'])
+        tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
+        tree.commit('first', rev_id='first-rev-id')
+        tree.rename_one('b', 'e')
+
+        basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+
+        walker = _mod_tree.MultiWalker(tree, [basis_tree])
+        iterator = walker.iter_all()
+        self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+        self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+        self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
+        self.assertWalkerNext(u'e', 'b-id', True, [u'b'], iterator)
+        self.assertRaises(StopIteration, iterator.next)
+
+    def test_other_extra(self):
+        tree = self.make_branch_and_tree('tree')
+        self.build_tree(['tree/a', 'tree/b', 'tree/d'])
+        tree.add(['a', 'b', 'd'], ['a-id', 'b-id', 'd-id'])
+        tree.commit('first', rev_id='first-rev-id')
+        tree.remove(['b'])
+
+        basis_tree, root_id = self.lock_and_get_basis_and_root_id(tree)
+        walker = _mod_tree.MultiWalker(tree, [basis_tree])
+        iterator = walker.iter_all()
+        self.assertWalkerNext(u'', root_id, True, [u''], iterator)
+        self.assertWalkerNext(u'a', 'a-id', True, [u'a'], iterator)
+        self.assertWalkerNext(u'd', 'd-id', True, [u'd'], iterator)
         self.assertWalkerNext(u'b', 'b-id', False, [u'b'], iterator)
-        self.assertWalkerNext(u'c', 'c-id', False, [None], iterator)
-        self.assertWalkerNext(u'd', 'd-id', False, [u'd'], iterator)
         self.assertRaises(StopIteration, iterator.next)

=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py	2008-06-30 19:48:01 +0000
+++ b/bzrlib/tree.py	2008-06-30 21:27:50 +0000
@@ -954,6 +954,9 @@
             0 if paths are equal
             and a positive number if ``path2`` sorts first
         """
+        # Shortcut this special case
+        if path1 == path2:
+            return 0
         # This is stolen from _dirstate_helpers_py.py, only switching it to
         # Unicode objects. Consider using encode_utf8() and then using the
         # optimized versions, or maybe writing optimized unicode versions.
@@ -964,9 +967,9 @@
             raise TypeError("'path2' must be a unicode string, not %s: %r"
                             % (type(path2), path2))
         dirname1, basename1 = os.path.split(path1)
-        key1 = (dirname1.split('/'), basename1)
+        key1 = (dirname1.split(u'/'), basename1)
         dirname2, basename2 = os.path.split(path2)
-        key2 = (dirname2.split('/'), basename2)
+        key2 = (dirname2.split(u'/'), basename2)
         return cmp(key1, key2)
 
     def iter_all(self):
@@ -977,6 +980,25 @@
                          for other in self._other_trees]
         other_entries = [self._step_one(walker) for walker in other_walkers]
 
+        # Track extra nodes in the other trees
+        others_extra = [{} for i in xrange(len(self._other_trees))]
+
+        def lookup_by_file_id(idx, file_id):
+            # TODO: Is id2path better as the first call, or is
+            #       inventory[file_id] better as a first check?
+            if file_id in others_extra[idx]:
+                return others_extra[idx].pop(file_id)
+            try:
+                cur_path = self._other_trees[idx].id2path(file_id)
+            except errors.NoSuchId:
+                cur_path = None
+            if cur_path is None:
+                return (None, None)
+            else:
+                cur_ie = self._other_trees[idx].inventory[file_id]
+                return (cur_path, cur_ie)
+
+
 
         master_has_more = True
         while master_has_more:
@@ -988,33 +1010,52 @@
             file_id = master_ie.file_id
             other_values = []
             next_other_entries = []
-            for other_walker, (other_has_more, other_path, other_ie) in \
-                zip(other_walkers, other_entries):
+            for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
                 if not other_has_more:
-                    other_values.append((None, None))
-                    next_other_entries.append(False, None, None)
+                    other_values.append(lookup_by_file_id(idx, file_id))
+                    next_other_entries.append((False, None, None))
                 elif file_id == other_ie.file_id:
                     # This walker matched, so consume this path, and go on to
                     # the next
                     other_values.append((other_path, other_ie))
-                    next_other_entries.append(self._step_one(other_walker))
+                    next_other_entries.append(self._step_one(other_walkers[idx]))
                 else:
                     # This walker did not match, step it until it either
                     # matches, or we know we are past the current walker.
                     while (other_has_more and
                            self._cmp_path_by_dirblock(other_path, master_path) < 0):
+                        other_file_id = other_ie.file_id
+                        others_extra[idx][other_file_id] = (other_path,
+                                                            other_ie)
                         other_has_more, other_path, other_ie = \
-                            self._step_one(other_walker)
+                            self._step_one(other_walkers[idx])
                     if other_has_more and other_ie.file_id == file_id:
                         # We ended up walking to this point, match and continue
                         other_values.append((other_path, other_ie))
                         other_has_more, other_path, other_ie = \
-                            self._step_one(other_walker)
+                            self._step_one(other_walkers[idx])
                     else:
-                        other_values.append((None, None))
+                        # This record isn't in the normal order, see if it
+                        # exists at all,
+                        other_values.append(lookup_by_file_id(idx, file_id))
                     next_other_entries.append((other_has_more, other_path,
                                                other_ie))
             other_entries = next_other_entries
 
             # We've matched all the walkers, yield this datapoint
             yield master_path, file_id, master_ie, other_values
+
+        # We have walked all of the master tree, now we want to find any extra
+        # nodes in the other trees
+        for idx, other_extra in enumerate(others_extra):
+            # TODO: we could use a key=XXX rather than cmp=XXX
+            others = sorted(other_extra.itervalues(),
+                            cmp=self._cmp_path_by_dirblock)
+            for other_path, other_ie in others:
+                file_id = other_ie.file_id
+                other_extra.pop(file_id)
+                other_values = [(other_path, other_ie)]
+                for alt_idx, alt_extra in enumerate(others_extra[idx+1:]):
+                    other_values.append(lookup_by_file_id(alt_idx + idx,
+                                                          file_id))
+                yield other_path, file_id, None, other_values



More information about the bazaar-commits mailing list