Rev 2387: Implement _bisect_recursive, which uses multiple bisect calls to in http://bzr.arbash-meinel.com/branches/bzr/dirstate

John Arbash Meinel john at arbash-meinel.com
Sun Feb 25 14:46:46 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/dirstate

------------------------------------------------------------
revno: 2387
revision-id: john at arbash-meinel.com-20070225144550-3rcyjf6ui5oo0gaf
parent: john at arbash-meinel.com-20070224164555-575dae290us6da8o
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Sun 2007-02-25 08:45:50 -0600
message:
  Implement _bisect_recursive, which uses multiple bisect calls to
  handle renames and finding entries in subdirs.
  As is, this could be hooked into paths2ids() if the dirstate has not been loaded yet.
  However, it doesn't quite provide enough, since the parsed entries would probably not
  be saved. Further, the multiple bisect calls are less efficient then they could be,
  because they do not remember the last bisect call.
  We should explore switching to a caching structure, which maintains all records that
  have been processed, in a structure that can be in-memory searched before going back
  to disk.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-24 16:45:55 +0000
+++ b/bzrlib/dirstate.py	2007-02-25 14:45:50 +0000
@@ -716,6 +716,70 @@
 
         return found
 
+    def _bisect_recursive(self, dir_name_list):
+        """Bisect for entries for all paths and their children.
+
+        This will use bisect to find all records for the supplied paths. It
+        will then continue to bisect for any records which are marked as
+        directories. (and renames?)
+
+        :param paths: A sorted list of (dir, name) pairs
+             eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]
+        :return: A dictionary mapping (dir, name, file_id) => [tree_info]
+        """
+        # Map from (dir, name, file_id) => [tree_info]
+        found = {}
+
+        found_dir_names = set()
+
+        # Directories that have been read
+        processed_dirs = set()
+        # Get the ball rolling with the first bisect for all entries.
+        newly_found = self._bisect(dir_name_list)
+
+        while newly_found:
+            # Directories that need to be read
+            pending_dirs = set()
+            paths_to_search = set()
+            for entry_list in newly_found.itervalues():
+                for dir_name_id, trees_info in entry_list:
+                    found[dir_name_id] = trees_info
+                    found_dir_names.add(dir_name_id[:2])
+                    is_dir = False
+                    for tree_info in trees_info:
+                        minikind = tree_info[0]
+                        if minikind == 'd':
+                            if is_dir:
+                                # We already processed this one as a directory,
+                                # we don't need to do the extra work again.
+                                continue
+                            subdir, name, file_id = dir_name_id
+                            path = osutils.pathjoin(subdir, name)
+                            is_dir = True
+                            if path not in processed_dirs:
+                                pending_dirs.add(path)
+                        elif minikind == 'r':
+                            # Rename, we need to directly search the target
+                            # which is contained in the fingerprint column
+                            dir_name = osutils.split(tree_info[1])
+                            if dir_name[0] in pending_dirs:
+                                # This entry will be found in the dir search
+                                continue
+                            # TODO: We need to check if this entry has
+                            #       already been found. Otherwise we might be
+                            #       hitting infinite recursion.
+                            if dir_name not in found_dir_names:
+                                paths_to_search.add(dir_name)
+            # Now we have a list of paths to look for directly, and
+            # directory blocks that need to be read.
+            # newly_found is mixing the keys between (dir, name) and path
+            # entries, but that is okay, because we only really care about the
+            # targets.
+            newly_found = self._bisect(sorted(paths_to_search))
+            newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))
+            processed_dirs.update(pending_dirs)
+        return found
+
     def _empty_parent_info(self):
         return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
                                                     len(self._ghosts))

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-02-24 16:45:55 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-02-25 14:45:50 +0000
@@ -1032,12 +1032,26 @@
         This takes the basic dirstate, and moves the paths around.
         """
         tree, state, expected = self.create_basic_dirstate()
+        # Rename a file
         tree.rename_one('a', 'b/g')
+        # And a directory
+        tree.rename_one('b/d', 'h')
 
         old_a = expected['a']
         expected['a'] = (old_a[0], [('r', 'b/g', 0, False, ''), old_a[1][1]])
         expected['b/g'] = (('b', 'g', 'a-id'), [old_a[1][0],
                                                 ('r', 'a', 0, False, '')])
+        old_d = expected['b/d']
+        expected['b/d'] = (old_d[0], [('r', 'h', 0, False, ''), old_d[1][1]])
+        expected['h'] = (('', 'h', 'd-id'), [old_d[1][0],
+                                             ('r', 'b/d', 0, False, '')])
+
+        old_e = expected['b/d/e']
+        expected['b/d/e'] = (old_e[0], [('r', 'h/e', 0, False, ''),
+                             old_e[1][1]])
+        expected['h/e'] = (('h', 'e', 'e-id'), [old_e[1][0],
+                                                ('r', 'b/d/e', 0, False, '')])
+
         state.unlock()
         try:
             new_state = dirstate.DirState.from_tree(tree, 'dirstate')
@@ -1082,7 +1096,7 @@
     def assertBisectDirBlocks(self, expected_map, map_keys, state, paths):
         """Assert that bisecting for dirbblocks returns the right result.
 
-        :param expected: A map from path => expected values
+        :param expected_map: A map from key => expected values
         :param map_keys: A nested list of paths we expect to be returned.
             Something like [['a', 'b', 'f'], ['b/c', 'b/d']]
         :param state: The DirState object.
@@ -1102,6 +1116,27 @@
 
         self.assertEqual(expected, result)
 
+    def assertBisectRecursive(self, expected_map, map_keys, state, paths):
+        """Assert the return value of a recursive bisection.
+
+        :param expected_map: A map from key => entry value
+        :param map_keys: A list of paths we expect to be returned.
+            Something like ['a', 'b', 'f', 'b/d', 'b/d2']
+        :param state: The DirState object.
+        :param paths: A list of files and directories. It will be broken up
+            into (dir, name) pairs and sorted before calling _bisect_recursive.
+        """
+        expected = {}
+        for key in map_keys:
+            entry = expected_map[key]
+            dir_name_id, trees_info = entry
+            expected[dir_name_id] = trees_info
+
+        dir_names = sorted(osutils.split(p) for p in paths)
+        result = state._bisect_recursive(dir_names)
+
+        self.assertEqual(expected, result)
+
     def test_bisect_each(self):
         """Find a single record using bisect."""
         tree, state, expected = self.create_basic_dirstate()
@@ -1185,6 +1220,12 @@
         # Search for the pre and post renamed entries
         self.assertBisect(expected, [['a']], state, ['a'])
         self.assertBisect(expected, [['b/g']], state, ['b/g'])
+        self.assertBisect(expected, [['b/d']], state, ['b/d'])
+        self.assertBisect(expected, [['h']], state, ['h'])
+
+        # What about b/d/e? shouldn't that also get 2 directory entries?
+        self.assertBisect(expected, [['b/d/e']], state, ['b/d/e'])
+        self.assertBisect(expected, [['h/e']], state, ['h/e'])
 
     def test_bisect_dirblocks(self):
         tree, state, expected = self.create_duplicated_dirstate()
@@ -1210,3 +1251,42 @@
         self.assertBisectDirBlocks(expected, [None], state, ['c'])
         self.assertBisectDirBlocks(expected, [None], state, ['b/d/e'])
         self.assertBisectDirBlocks(expected, [None], state, ['f'])
+
+    def test_bisect_recursive_each(self):
+        tree, state, expected = self.create_basic_dirstate()
+        self.assertBisectRecursive(expected, ['a'], state, ['a'])
+        self.assertBisectRecursive(expected, ['b/c'], state, ['b/c'])
+        self.assertBisectRecursive(expected, ['b/d/e'], state, ['b/d/e'])
+        self.assertBisectRecursive(expected, ['b/d', 'b/d/e'],
+                                   state, ['b/d'])
+        self.assertBisectRecursive(expected, ['b', 'b/c', 'b/d', 'b/d/e'],
+                                   state, ['b'])
+        self.assertBisectRecursive(expected, ['', 'a', 'b', 'f', 'b/c',
+                                              'b/d', 'b/d/e'],
+                                   state, [''])
+
+    def test_bisect_recursive_multiple(self):
+        tree, state, expected = self.create_basic_dirstate()
+        self.assertBisectRecursive(expected, ['a', 'b/c'], state, ['a', 'b/c'])
+        self.assertBisectRecursive(expected, ['b/d', 'b/d/e'],
+                                   state, ['b/d', 'b/d/e'])
+
+    def test_bisect_recursive_missing(self):
+        tree, state, expected = self.create_basic_dirstate()
+        self.assertBisectRecursive(expected, [], state, ['d'])
+        self.assertBisectRecursive(expected, [], state, ['b/e'])
+        self.assertBisectRecursive(expected, [], state, ['g'])
+        self.assertBisectRecursive(expected, ['a'], state, ['a', 'g'])
+
+    def test_bisect_recursive_renamed(self):
+        tree, state, expected = self.create_renamed_dirstate()
+
+        # Looking for either renamed item should find the other
+        self.assertBisectRecursive(expected, ['a', 'b/g'], state, ['a'])
+        self.assertBisectRecursive(expected, ['a', 'b/g'], state, ['b/g'])
+        # Looking in the containing directory should find the rename target,
+        # and anything in a subdir of the renamed target.
+        self.assertBisectRecursive(expected, ['a', 'b', 'b/c', 'b/d',
+                                              'b/d/e', 'b/g', 'h', 'h/e'],
+                                   state, ['b'])
+



More information about the bazaar-commits mailing list