Rev 2337: various notes about find_ids_across_trees in sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

Robert Collins robertc at robertcollins.net
Fri Feb 16 14:24:43 GMT 2007


At sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

------------------------------------------------------------
revno: 2337
revision-id: robertc at robertcollins.net-20070216142321-2m20xds02xuigsv6
parent: robertc at robertcollins.net-20070216071959-8jibybk31injks6p
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate
timestamp: Sat 2007-02-17 01:23:21 +1100
message:
  various notes about find_ids_across_trees
modified:
  bzrlib/tree.py                 tree.py-20050309040759-9d5f2496be663e77
  bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py	2007-02-13 01:42:33 +0000
+++ b/bzrlib/tree.py	2007-02-16 14:23:21 +0000
@@ -404,28 +404,80 @@
     """
     if not filenames:
         return None
-    specified_ids = _find_filename_ids_across_trees(filenames, trees, 
-                                                    require_versioned)
-    return _find_children_across_trees(specified_ids, trees)
-
-
-def _find_filename_ids_across_trees(filenames, trees, require_versioned):
+    specified_path_ids = _find_ids_across_trees(filenames, trees,
+        require_versioned)
+    return _find_children_across_trees(specified_path_ids, trees)
+#    specified_ids = [id for path, id in _find_path_ids_across_trees(filenames, trees, require_versioned)]
+#    return _find_children_across_trees(specified_ids, trees)
+
+def find_path_ids_across_trees(filenames, trees, require_versioned=True):
+    """Find the paths and ids corresponding to specified filenames.
+    
+    All matches in all trees will be used, and all children of matched
+    directories will be included
+
+    :param filenames: The filenames to find file_ids for
+    :param trees: The trees to find file_ids within
+    :param require_versioned: if true, all specified filenames must occur in
+        at least one tree.
+    :return: a set of (path, file ids) for the specified filenames and their
+        children. The returned path is the path of the id in the first tree
+        that contains it. This matters when files have been moved 
+    """
+    if not filenames:
+        return set()
+    # This function needs to know the ids for filenames in all trees, then
+    # search for those same files and children in all the other trees.
+    # it is complicated by the same path in two trees being able to have
+    # different ids, which might both be present in both trees.
+    # consider two trees, which have had 'mv foo bar' and 'mv baz foo' done
+    # in this case, a diff of 'foo' should should changes to both the current
+    # 'bar' and the current 'foo' which was baz. Its arguable that if 
+    # the situation is 'mv parent/foo bar' and 'mv baz parent/foo', that 
+    # we should return the current bar and the current parent/foo' - at the 
+    # moment we do, but we loop around all ids and all trees: I*T checks.
+    
+    # Updating this algorithm to be fast in the common case:
+    # nothing has moved, all files have the same id in parent, child and there
+    # are only two trees (or one is working tree and the others are parents).
+    # walk the dirstate. as we find each path, gather the paths of that
+    # id in all trees. add a mapping from the id to the path in those trees.
+    # now lookup children by id, again in all trees; for these trees that
+    # nothing has moved in, the id->path mapping will allow us to find the
+    # parent trivially. To answer 'has anything been moved' in one of the
+    # dirstate parent trees though, we will need to stare harder at it.
+
+    #  Now, given a path index, that is trivial for any one tree, and given
+    #  that we can ask for additional data from a dirstate tree, its a single
+    #  pass, though it will require scanning the entire tree to find paths
+    #  that were at the current location.
+    # ideal results?: There are three things: tree, path, id. Pathologically
+    # we can have completely disjoint ids for each tree; but we cannot have 
+    # disjoin paths for each tree, except if we scan each tree for the 
+    # different ids from other trees.
+
+    specified_path_ids = _find_ids_across_trees(filenames, trees,
+        require_versioned)
+    return _find_path_id_children_across_trees(specified_path_ids, trees)
+
+
+def _find_ids_across_trees(filenames, trees, require_versioned):
     """Find the ids corresponding to specified filenames.
     
-    All matches in all trees will be used.
+    All matches in all trees will be used, but subdirectories are not scanned.
 
     :param filenames: The filenames to find file_ids for
     :param trees: The trees to find file_ids within
     :param require_versioned: if true, all specified filenames must occur in
-    at least one tree.
-    :return: a set of file ids for the specified filenames
+        at least one tree.
+    :return: a set of (path, file ids) for the specified filenames
     """
     not_versioned = []
     interesting_ids = set()
     for tree_path in filenames:
         not_found = True
         for tree in trees:
-            file_id = tree.inventory.path2id(tree_path)
+            file_id = tree.path2id(tree_path)
             if file_id is not None:
                 interesting_ids.add(file_id)
                 not_found = False
@@ -452,7 +504,7 @@
         new_pending = set()
         for file_id in pending:
             for tree in trees:
-                if file_id not in tree:
+                if not tree.has_id(file_id):
                     continue
                 entry = tree.inventory[file_id]
                 for child in getattr(entry, 'children', {}).itervalues():
@@ -494,8 +546,8 @@
             supplied and True all the 'specific_files' must be versioned, or
             a PathsNotVersionedError will be thrown.
         """
-        # NB: show_status depends on being able to pass in non-versioned files and
-        # report them as unknown
+        # NB: show_status depends on being able to pass in non-versioned files
+        # and report them as unknown
         trees = (self.source, self.target)
         if extra_trees is not None:
             trees = trees + tuple(extra_trees)

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2007-02-15 17:18:30 +0000
+++ b/bzrlib/workingtree.py	2007-02-16 14:23:21 +0000
@@ -560,7 +560,7 @@
 
     def has_id(self, file_id):
         # files that have been deleted are excluded
-        inv = self._inventory
+        inv = self.inventory
         if not inv.has_id(file_id):
             return False
         path = inv.id2path(file_id)

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-16 07:19:59 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-16 14:23:21 +0000
@@ -227,6 +227,8 @@
         This is relatively expensive: we have to walk the entire dirstate.
         Ideally we would not, and can deprecate this function.
         """
+        #: uncomment to trap on inventory requests.
+        # import pdb;pdb.set_trace()
         state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         rows = state._iter_rows()



More information about the bazaar-commits mailing list