Rev 3520: Found a problem with the algorithm. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/lazy_revno

Wed Aug 6 23:45:52 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/lazy_revno

------------------------------------------------------------
revno: 3520
revision-id: john at arbash-meinel.com-20080806224535-nw726e1khk08fsd3
parent: john at arbash-meinel.com-20080806185632-eo9x611gqznui6qe
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_revno
timestamp: Wed 2008-08-06 17:45:35 -0500
message:
  Found a problem with the algorithm.
  Namely, when pruning nodes that are not in the graph, it is possible to prune
  the first parent, which changes the right-parent to become the left-hand
  parent. Which confuses a lot, because it makes the node look like it
  is a descendant, rather than a merge.
  Needs proper tests.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2008-08-06 18:56:32 +0000
+++ b/bzrlib/graph.py	2008-08-06 22:45:35 +0000
@@ -1742,7 +1742,15 @@
         ancestor of node. If a nodes 'merged_into' is known, then it is used
         instead.
         """
-        # TODO: Handle if mainline_node is None
+        # TODO: change this when node.merged_into is None, we do a local
+        #       lookup, rather than always going the whole way to the tip
+        #       For example, we could start at 10 nodes past the merged_into
+        #       node, and then go to 20, etc. Until we finally find the
+        #       tip. This isn't guaranteed to find it quickly, but it should
+        #       find it with fewer overall nodes than searching from tip every
+        #       time. (Worst case it takes the tip to find it, but we should be
+        #       caching info about the intermediate nodes anyway, so we don't
+        #       have to walk them a second time.)
         if node.merged_into is not None:
             cur_node = self._get_node(self._mainline_nodes[
                                         node.merged_into])
@@ -2001,8 +2009,22 @@
         def cull_parent_map():
             culled_parent_map = {}
             for revision_id, parent_ids in interesting_parent_map.iteritems():
-                culled_parent_map[revision_id] = [p for p in parent_ids
-                                            if p in interesting_parent_map]
+                if not parent_ids:
+                    culled_parent_map[revision_id] = ()
+                else:
+                    # XXX: Need tests for this case, where we are pruning a
+                    #      node's ancestry because it isn't in the interesting
+                    #      map, but we don't want to cause it to look like it
+                    #      *has* an interesting parent.
+                    if parent_ids[0] not in interesting_parent_map:
+                        # The left-hand ancestor is not interesting, so we just
+                        # pretend this is a ghost, it shouldn't effect
+                        # numbering (maybe)
+                        culled_parent_map[revision_id] = ()
+                    else:
+                        culled_parent_ids = tuple([p for p in parent_ids
+                                                if p in interesting_parent_map])
+                        culled_parent_map[revision_id] = culled_parent_ids
             return culled_parent_map
         return cull_parent_map()
 
@@ -2026,16 +2048,20 @@
              end_of_merge) in tsort.merge_sort(culled_parent_map,
                                                merged_into_node.revision_id,
                                                None, generate_revno=True):
+            # We have filtered our parent_map to only include a certain number
+            # of interesting nodes, so nodes that were not meant to be included
+            # may be incorrectly numbered. Don't include them
+            node = self._get_node(rev_id)
+            if (node.mainline_revno != mainline_revno
+                or node.merged_into != merged_into):
+                continue
+
+            # After culling, only certain nodes would be valid, don't cache the
+            # others
             # The only difference here, is that merge_sort is going to start at
             # 1, rather than descended_from
             real_dotted_revno = ((dotted_revno[0] + mainline_revno - 1,)
                                  + dotted_revno[1:])
-            if dotted_revno[0] == 0:
-                # TODO: We can't strictly trust branches that come in from
-                #       another source.
-                continue
-                import pdb; pdb.set_trace()
-            node = self._get_node(rev_id)
             if node.dotted_revno is not None:
                 if real_dotted_revno != node.dotted_revno:
                     print real_dotted_revno, node.dotted_revno