Rev 3515: Start working on a special walker that can iterate several trees at once. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

John Arbash Meinel john at arbash-meinel.com
Mon Jun 30 20:25:05 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/multi_walker

------------------------------------------------------------
revno: 3515
revision-id: john at arbash-meinel.com-20080630192436-hv4ppxq7dpp38mii
parent: pqm at pqm.ubuntu.com-20080630055535-42tx43kb228k4p94
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: multi_walker
timestamp: Mon 2008-06-30 14:24:36 -0500
message:
  Start working on a special walker that can iterate several trees at once.
-------------- next part --------------
=== modified file 'bzrlib/tests/test_tree.py'
--- a/bzrlib/tests/test_tree.py	2007-07-31 19:42:15 +0000
+++ b/bzrlib/tests/test_tree.py	2008-06-30 19:24:36 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006 Canonical Ltd
+# Copyright (C) 2006, 2008 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -16,7 +16,12 @@
 
 """Tests for Tree and InterTree."""
 
-from bzrlib import errors
+from bzrlib import (
+    errors,
+    revision,
+    tests,
+    tree as _mod_tree,
+    )
 from bzrlib.tests import TestCaseWithTransport
 from bzrlib.tree import InterTree
 
@@ -138,3 +143,88 @@
             specific_files=['known_file', 'unknown_file'] ,
             require_versioned=False)
         self.assertEqual(len(delta.added), 1)
+
+
+class TestMultiWalker(TestCaseWithTransport):
+
+    def assertStepOne(self, has_more, path, file_id, iterator):
+        retval = _mod_tree.MultiWalker._step_one(iterator)
+        if not has_more:
+            self.assertIs(None, path)
+            self.assertIs(None, file_id)
+            self.assertEqual((False, None, None), retval)
+        else:
+            self.assertEqual((has_more, path, file_id),
+                             (retval[0], retval[1], retval[2].file_id))
+
+    def test__step_one_empty(self):
+        tree = self.make_branch_and_tree('empty')
+        repo = tree.branch.repository
+        empty_tree = repo.revision_tree(revision.NULL_REVISION)
+
+        iterator = empty_tree.iter_entries_by_dir()
+        self.assertStepOne(False, None, None, iterator)
+        self.assertStepOne(False, None, None, iterator)
+
+    def test__step_one(self):
+        tree = self.make_branch_and_tree('tree')
+        self.build_tree(['tree/a', 'tree/b/', 'tree/b/c'])
+        tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
+
+        iterator = tree.iter_entries_by_dir()
+        root_id = tree.path2id('')
+        self.assertStepOne(True, '', root_id, iterator)
+        self.assertStepOne(True, 'a', 'a-id', iterator)
+        self.assertStepOne(True, 'b', 'b-id', iterator)
+        self.assertStepOne(True, 'b/c', 'c-id', iterator)
+        self.assertStepOne(False, None, None, iterator)
+        self.assertStepOne(False, None, None, iterator)
+
+    def test_simple_stepping(self):
+        tree = self.make_branch_and_tree('tree')
+        self.build_tree(['tree/a', 'tree/b/', 'tree/b/c'])
+        tree.add(['a', 'b', 'b/c'], ['a-id', 'b-id', 'c-id'])
+
+        tree.commit('first', rev_id='first-rev-id')
+        basis_tree = tree.basis_tree()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        basis_tree.lock_read()
+        self.addCleanup(basis_tree.unlock)
+
+        walker = _mod_tree.MultiWalker(tree, [basis_tree])
+        iterator = walker.iter_all()
+        master_path, file_id, master_ie, other_values = iterator.next()
+        root_id = tree.path2id('')
+        self.assertEqual('', master_path)
+        self.assertEqual(root_id, file_id)
+        self.assertEqual(1, len(other_values))
+        other_path, other_ie = other_values[0]
+        self.assertEqual('', other_path)
+        self.assertEqual(root_id, other_ie.file_id)
+
+        master_path, file_id, master_ie, other_values = iterator.next()
+        self.assertEqual(u'a', master_path)
+        self.assertEqual('a-id', file_id)
+        self.assertEqual(1, len(other_values))
+        other_path, other_ie = other_values[0]
+        self.assertEqual(u'a', other_path)
+        self.assertEqual('a-id', other_ie.file_id)
+
+        master_path, file_id, master_ie, other_values = iterator.next()
+        self.assertEqual(u'b', master_path)
+        self.assertEqual('b-id', file_id)
+        self.assertEqual(1, len(other_values))
+        other_path, other_ie = other_values[0]
+        self.assertEqual(u'b', other_path)
+        self.assertEqual('b-id', other_ie.file_id)
+
+        master_path, file_id, master_ie, other_values = iterator.next()
+        self.assertEqual(u'b/c', master_path)
+        self.assertEqual('c-id', file_id)
+        self.assertEqual(1, len(other_values))
+        other_path, other_ie = other_values[0]
+        self.assertEqual(u'b/c', other_path)
+        self.assertEqual('c-id', other_ie.file_id)
+
+        self.assertRaises(StopIteration, iterator.next)

=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py	2008-06-26 00:17:06 +0000
+++ b/bzrlib/tree.py	2008-06-30 19:24:36 +0000
@@ -905,3 +905,75 @@
             # the parent's path is necessarily known at this point.
             yield(file_id, (path, to_path), changed_content, versioned, parent,
                   name, kind, executable)
+
+
+class MultiWalker(object):
+    """Walk multiple trees simultaneously, getting combined results."""
+
+    def __init__(self, master_tree, other_trees):
+        """Create a new MultiWalker.
+
+        All trees being walked must implement "iter_entries_by_dir()", such
+        that they yield (path, object) tuples, where that object will have a
+        '.file_id' member, that can be used to check equality.
+
+        :param master_tree: All trees will be 'slaved' to the master_tree. Such
+            that nodes in master_tree will be used as 'first-pass' sync points.
+            Any nodes that aren't in master_tree will be merged in a second
+            pass.
+        :param other_trees: A list of other trees to walk simultaneously.
+        """
+        self._master_tree = master_tree
+        self._other_trees = other_trees
+
+    @staticmethod
+    def _step_one(iterator):
+        """Step an iter_entries_by_dir iterator.
+
+        :return: (has_more, path, ie)
+            If has_more is False, path and ie will be None.
+        """
+        try:
+            path, ie = iterator.next()
+        except StopIteration:
+            return False, None, None
+        else:
+            return True, path, ie
+
+    def iter_all(self):
+        """Match up the values in the different trees."""
+        import pdb; pdb.set_trace()
+        master_iterator = self._master_tree.iter_entries_by_dir()
+
+        other_walkers = [other.iter_entries_by_dir()
+                         for other in self._other_trees]
+        other_entries = [self._step_one(walker) for walker in other_walkers]
+
+        master_has_more = True
+        while master_has_more:
+            (master_has_more, master_path,
+             master_ie) = self._step_one(master_iterator)
+            if not master_has_more:
+                break
+
+            master_file_id = master_ie.file_id
+            other_values = []
+            next_other_entries = []
+            for other_walker, (other_has_more, other_path, other_ie) in \
+                zip(other_walkers, other_entries):
+                if not other_has_more:
+                    other_values.append((None, None))
+                    next_other_entries.append(False, None, None)
+                elif master_file_id == other_ie.file_id:
+                    # This walker matched, so consume this path, and go on to
+                    # the next
+                    other_values.append((other_path, other_ie))
+                    next_other_entries.append(self._step_one(other_walker))
+                else:
+                    # This walker did not match, step it until it either
+                    # matches, or we know we are past the current walker.
+                    raise NotImplementedError
+            other_entries = next_other_entries
+
+            # We've matched all the walkers, yield this datapoint
+            yield master_path, master_file_id, master_ie, other_values



More information about the bazaar-commits mailing list