Rev 2334: Start a dedicated low level iterator function in dirstate. in file:///home/robertc/source/baz/dirstate2/

Robert Collins robertc at robertcollins.net
Tue Mar 13 02:52:46 GMT 2007


At file:///home/robertc/source/baz/dirstate2/

------------------------------------------------------------
revno: 2334
revision-id: robertc at robertcollins.net-20070313025234-3jhyogdj37n36heo
parent: robertc at robertcollins.net-20070312052908-h1rm7g7pnr5zegio
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate2
timestamp: Tue 2007-03-13 13:52:34 +1100
message:
  Start a dedicated low level iterator function in dirstate.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-03-10 04:50:30 +0000
+++ b/bzrlib/dirstate.py	2007-03-13 02:52:34 +0000
@@ -1451,6 +1451,26 @@
             raise Exception("can't pack %s" % inv_entry)
         return (minikind, fingerprint, size, executable, tree_data)
 
+    def iter_dirs_in_trees(self, paths, trees):
+        """Iterate over directories selected by paths in some trees.
+
+        :return: An iterator that yields (full-dir, iterator-of-dir-contents)
+            items. full-dir is a boolean flag indicating whether the directory
+            is being returned because the entire directory was selected in the
+            current working tree (index 0), or because some files within it
+            were selected (from any number of trees). Each directory name *may*
+            be returned more than once, because there may be multiple parent
+            ids for the directory, but if this occurs, the iterator will return
+            partitioned output. Each path-id combination is thus only ever
+            output once.
+            """
+        self._read_dirblocks_if_needed()
+        for block in self._dirblocks:
+            if block[0] == '' and block[1] and block[1][0][0][1] == '':
+                yield (False, block)
+            else:
+                yield (True, block)
+
     def _iter_entries(self):
         """Iterate over all the entries in the dirstate.
 

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-03-09 15:52:13 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-03-13 02:52:34 +0000
@@ -21,6 +21,7 @@
 import time
 
 from bzrlib import (
+    bzrdir,
     dirstate,
     errors,
     osutils,
@@ -1902,3 +1903,107 @@
         for path in paths:
             self.assertBisect(dirblocks, split_dirblocks, path, cache=cache)
 
+
+class TestIterDirectoriesInTrees(TestCaseWithDirState):
+    """Tests for iter_dirs_in_trees.
+    
+ui fan out selection:
+start with a list of (path, trees)
+ - output path, id, detail-in-each-tree for each item in the list, for all threes id is present in.
+ - for each detail-in-each-tree where there is a directory, add its children to the list to examine.
+ - always process the shortest path next.
+
+Dirblock style:
+ - gather all the information for a single directory at once.
+ - any directory is output once and only once (?for any one tree?). [corner case alert!]
+ - output may be a subset of actual dirstate raw data.
+
+
+Common case: all trees have the same information, no moves.
+
+NOTES: This explicitly does not gather all children of a directory, only those of the tree which we
+decided to gather children for. We might be gathering children for multiple trees within the same path though.
+
+Corner cases (all are symmetrical)
+ - select tree 0, tree 1 data should be totally ignored. Things that can go
+   wrong: we might include ids present in tree 1 and not tree 0. We might
+   examine the contents of a directory in tree 1 thats not versioned in 0,
+   leading to lots of unknowns rather than one unknown.
+ - a file in 0 is moved to where a directory in a tree 1 is, this should not
+   recurse into the directory even if the subdir is versioned in tree 0 (at a
+   non-selected path).
+ - a file in 0 is moved to somewhere else and becomes a directory in 1, the directory contents
+   should be gathered.
+ - several things are moved to a directory containing other things, the
+   returned directory information should only contain them
+ - a path in 0 was at path B in 1, path B in 0 contains some other id. The
+   other id should not be returned.
+ - starting at dir X, if a path is moved to dir /foo/bar, and another to /foo,
+   we should only ever emit /foo in total, not a fake /foo with bar, then /foo.
+ - if [0] /foo/bar/gam is a directory that is [1]/foo, which still contains
+   [1]/foo/bar then starting at bar, we should output the block for bar,
+   selecting [0] and [1], the block for /foo/bar/gam, selecting [0]
+   only (gam is at /foo in [1]), and the block for [1]/foo, which has the data
+   for /foo/bar, but we must not recurse into /foo/bar again.  finally emit a
+   fake block for [0]/foo/bar
+   A related case is where a path /foo/bar stays constnat - its 'f', 'f' in
+   both trees, same id, but the containing directory changes id. When we choose
+   to output the directory for id A, we may filter things not present in A
+   because we dont know about id B yet, so if we then output it again, we
+   should filter things not present in B, or present in A, because we've output
+   A before. To construct this case, have a directory that links in /foo from
+   its id A in index 0, and also links in another directory /later, which
+   contains a link to /foo under id B in index 1.
+
+ - the user will be looking at each entry at a target index. If we are
+   returning an unmodified dirblock (good), we need to be able to say 'these
+   indexes are relevant in this dirblock'.
+   One confusing case of id emission is when we select a dirblock because of its id in tree 1 and it contains a file id that is present in tree 0, and redirected in tree 1
+   0  1
+   f  r
+
+   here we should not include that data in the output. This means users of the output can say:
+   for key, details in dirblock: stuff, safely.
+    """
+
+    def make_branch_and_tree(self, relpath='.'):
+        """Create format4 working trees to allow relatively easy setup."""
+        return TestCaseWithDirState.make_branch_and_tree(self, relpath,
+            format=bzrdir.format_registry.make_bzrdir('dirstate'))
+
+    def test_empty_tree(self):
+        tree = self.make_branch_and_tree()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        # the output for iter_dirs_in_trees for an empty tree - one with 
+        # just a tree root, should be one dirblock, the one for the root.
+        # it should not claim to be a real directory - ever - because we
+        # dont want to search for unknowns in this path
+        result = list(tree.current_dirstate().iter_dirs_in_trees([''], [0]))
+        state = tree.current_dirstate()
+        expected = [(False, state._dirblocks[0]),
+            (True, state._dirblocks[1])]
+        self.assertEqual(expected, result)
+
+    def test_all_paths(self):
+        tree = self.make_branch_and_tree()
+        self.build_tree(['file', 'dir/', 'dir/file', 'dir2/', 'dir2/fileindir2'])
+        tree.add(['file', 'dir', 'dir/file', 'dir2', 'dir2/fileindir2'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        # the output for iter_dirs_in_trees for this tree should be
+        # the root dirblock (marked as unreal), the contents of root (marked real),
+        # and the contents of dir and dir two, also marked real). for an empty tree - one with 
+        # just a tree root, should be one dirblock, the one for the root.
+        # it should not claim to be a real directory - ever - because we
+        # dont want to search for unknowns in this path
+        result = []
+        # suck the nested iterators.
+        for details in tree.current_dirstate().iter_dirs_in_trees([''], [0]):
+            result.append((details[0], (details[1][0], list(details[1][1]))))
+        state = tree.current_dirstate()
+        expected = [(False, state._dirblocks[0]),
+            (True, state._dirblocks[1]),
+            (True, state._dirblocks[2]),
+            (True, state._dirblocks[3])]
+        self.assertEqual(expected, result)



More information about the bazaar-commits mailing list