Rev 2334: Start a dedicated low level iterator function in dirstate. in file:///home/robertc/source/baz/dirstate2/
Robert Collins
robertc at robertcollins.net
Tue Mar 13 02:52:46 GMT 2007
At file:///home/robertc/source/baz/dirstate2/
------------------------------------------------------------
revno: 2334
revision-id: robertc at robertcollins.net-20070313025234-3jhyogdj37n36heo
parent: robertc at robertcollins.net-20070312052908-h1rm7g7pnr5zegio
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate2
timestamp: Tue 2007-03-13 13:52:34 +1100
message:
Start a dedicated low level iterator function in dirstate.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/test_dirstate.py test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-03-10 04:50:30 +0000
+++ b/bzrlib/dirstate.py 2007-03-13 02:52:34 +0000
@@ -1451,6 +1451,26 @@
raise Exception("can't pack %s" % inv_entry)
return (minikind, fingerprint, size, executable, tree_data)
+ def iter_dirs_in_trees(self, paths, trees):
+ """Iterate over directories selected by paths in some trees.
+
+ :return: An iterator that yields (full-dir, iterator-of-dir-contents)
+ items. full-dir is a boolean flag indicating whether the directory
+ is being returned because the entire directory was selected in the
+ current working tree (index 0), or because some files within it
+ were selected (from any number of trees). Each directory name *may*
+ be returned more than once, because there may be multiple parent
+ ids for the directory, but if this occurs, the iterator will return
+ partitioned output. Each path-id combination is thus only ever
+ output once.
+ """
+ self._read_dirblocks_if_needed()
+ for block in self._dirblocks:
+ if block[0] == '' and block[1] and block[1][0][0][1] == '':
+ yield (False, block)
+ else:
+ yield (True, block)
+
def _iter_entries(self):
"""Iterate over all the entries in the dirstate.
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2007-03-09 15:52:13 +0000
+++ b/bzrlib/tests/test_dirstate.py 2007-03-13 02:52:34 +0000
@@ -21,6 +21,7 @@
import time
from bzrlib import (
+ bzrdir,
dirstate,
errors,
osutils,
@@ -1902,3 +1903,107 @@
for path in paths:
self.assertBisect(dirblocks, split_dirblocks, path, cache=cache)
+
+class TestIterDirectoriesInTrees(TestCaseWithDirState):
+ """Tests for iter_dirs_in_trees.
+
+ui fan out selection:
+start with a list of (path, trees)
+ - output path, id, detail-in-each-tree for each item in the list, for all threes id is present in.
+ - for each detail-in-each-tree where there is a directory, add its children to the list to examine.
+ - always process the shortest path next.
+
+Dirblock style:
+ - gather all the information for a single directory at once.
+ - any directory is output once and only once (?for any one tree?). [corner case alert!]
+ - output may be a subset of actual dirstate raw data.
+
+
+Common case: all trees have the same information, no moves.
+
+NOTES: This explicitly does not gather all children of a directory, only those of the tree which we
+decided to gather children for. We might be gathering children for multiple trees within the same path though.
+
+Corner cases (all are symmetrical)
+ - select tree 0, tree 1 data should be totally ignored. Things that can go
+ wrong: we might include ids present in tree 1 and not tree 0. We might
+ examine the contents of a directory in tree 1 thats not versioned in 0,
+ leading to lots of unknowns rather than one unknown.
+ - a file in 0 is moved to where a directory in a tree 1 is, this should not
+ recurse into the directory even if the subdir is versioned in tree 0 (at a
+ non-selected path).
+ - a file in 0 is moved to somewhere else and becomes a directory in 1, the directory contents
+ should be gathered.
+ - several things are moved to a directory containing other things, the
+ returned directory information should only contain them
+ - a path in 0 was at path B in 1, path B in 0 contains some other id. The
+ other id should not be returned.
+ - starting at dir X, if a path is moved to dir /foo/bar, and another to /foo,
+ we should only ever emit /foo in total, not a fake /foo with bar, then /foo.
+ - if [0] /foo/bar/gam is a directory that is [1]/foo, which still contains
+ [1]/foo/bar then starting at bar, we should output the block for bar,
+ selecting [0] and [1], the block for /foo/bar/gam, selecting [0]
+ only (gam is at /foo in [1]), and the block for [1]/foo, which has the data
+ for /foo/bar, but we must not recurse into /foo/bar again. finally emit a
+ fake block for [0]/foo/bar
+ A related case is where a path /foo/bar stays constnat - its 'f', 'f' in
+ both trees, same id, but the containing directory changes id. When we choose
+ to output the directory for id A, we may filter things not present in A
+ because we dont know about id B yet, so if we then output it again, we
+ should filter things not present in B, or present in A, because we've output
+ A before. To construct this case, have a directory that links in /foo from
+ its id A in index 0, and also links in another directory /later, which
+ contains a link to /foo under id B in index 1.
+
+ - the user will be looking at each entry at a target index. If we are
+ returning an unmodified dirblock (good), we need to be able to say 'these
+ indexes are relevant in this dirblock'.
+ One confusing case of id emission is when we select a dirblock because of its id in tree 1 and it contains a file id that is present in tree 0, and redirected in tree 1
+ 0 1
+ f r
+
+ here we should not include that data in the output. This means users of the output can say:
+ for key, details in dirblock: stuff, safely.
+ """
+
+ def make_branch_and_tree(self, relpath='.'):
+ """Create format4 working trees to allow relatively easy setup."""
+ return TestCaseWithDirState.make_branch_and_tree(self, relpath,
+ format=bzrdir.format_registry.make_bzrdir('dirstate'))
+
+ def test_empty_tree(self):
+ tree = self.make_branch_and_tree()
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ # the output for iter_dirs_in_trees for an empty tree - one with
+ # just a tree root, should be one dirblock, the one for the root.
+ # it should not claim to be a real directory - ever - because we
+ # dont want to search for unknowns in this path
+ result = list(tree.current_dirstate().iter_dirs_in_trees([''], [0]))
+ state = tree.current_dirstate()
+ expected = [(False, state._dirblocks[0]),
+ (True, state._dirblocks[1])]
+ self.assertEqual(expected, result)
+
+ def test_all_paths(self):
+ tree = self.make_branch_and_tree()
+ self.build_tree(['file', 'dir/', 'dir/file', 'dir2/', 'dir2/fileindir2'])
+ tree.add(['file', 'dir', 'dir/file', 'dir2', 'dir2/fileindir2'])
+ tree.lock_read()
+ self.addCleanup(tree.unlock)
+ # the output for iter_dirs_in_trees for this tree should be
+ # the root dirblock (marked as unreal), the contents of root (marked real),
+ # and the contents of dir and dir two, also marked real). for an empty tree - one with
+ # just a tree root, should be one dirblock, the one for the root.
+ # it should not claim to be a real directory - ever - because we
+ # dont want to search for unknowns in this path
+ result = []
+ # suck the nested iterators.
+ for details in tree.current_dirstate().iter_dirs_in_trees([''], [0]):
+ result.append((details[0], (details[1][0], list(details[1][1]))))
+ state = tree.current_dirstate()
+ expected = [(False, state._dirblocks[0]),
+ (True, state._dirblocks[1]),
+ (True, state._dirblocks[2]),
+ (True, state._dirblocks[3])]
+ self.assertEqual(expected, result)
More information about the bazaar-commits
mailing list