Rev 4378: Implement _heads_from_candidate_keys using a gdfo algorithm. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

John Arbash Meinel john at arbash-meinel.com
Mon Jun 8 21:12:27 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

------------------------------------------------------------
revno: 4378
revision-id: john at arbash-meinel.com-20090608201222-gzehwzf2q98gmk0d
parent: john at arbash-meinel.com-20090608193114-mzex30467ukbk0dj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Mon 2009-06-08 15:12:22 -0500
message:
  Implement _heads_from_candidate_keys using a gdfo algorithm.
  
  It seems to pass all test cases. Best guess is that gdfo solves the earlier
  problem of 'shortcuts' causing invalid results.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2009-06-08 19:31:14 +0000
+++ b/bzrlib/graph.py	2009-06-08 20:12:22 +0000
@@ -1174,7 +1174,7 @@
     """Represents a single object in the known graph."""
 
     __slots__ = ('key', 'parent_keys', 'children', 'linear_dominator',
-                 'dominator_distance', 'gdfo')
+                 'dominator_distance', 'gdfo', 'ancestor_of', 'was_walked')
 
     def __init__(self, key, parent_keys):
         self.key = key
@@ -1184,6 +1184,10 @@
         self.dominator_distance = 0
         # Greatest distance from origin
         self.gdfo = None
+        # This will become a tuple of known heads that have this node as an
+        # ancestor
+        self.ancestor_of = None
+        self.was_walked = False
 
     def __repr__(self):
         return '%s(%s  gdfo:%s par:%s child:%s dom:%s %s)' % (
@@ -1367,13 +1371,67 @@
             def get_linear_head():
                 return max(candidate_nodes, key=get_distance)
             return set([get_linear_head()])
-        if len(candidate_nodes) > 2:
-            # Not supported yet
-            g = Graph(self)
-            return g.heads(candidate_nodes.keys())
-        # So we know we have 2 candidate nodes, lets see what we can find
-        g = Graph(self)
-        return g.heads(candidate_nodes.keys())
+        return self._heads_from_candidate_nodes(candidate_nodes)
+
+    def _heads_from_candidate_nodes(self, candidate_nodes):
+        queue = []
+        for node in candidate_nodes.itervalues():
+            assert node.ancestor_of is None
+            node.ancestor_of = (node.key,)
+            queue.append((-node.gdfo, node))
+        heapq.heapify(queue)
+        # These are nodes that we determined are 'common' that we are no longer
+        # walking
+        stop_nodes = {}
+        # Now we walk nodes until all nodes that are being walked are 'common'
+        num_candidates = len(candidate_nodes)
+        while queue and len(candidate_nodes) > 1:
+            _, next = heapq.heappop(queue)
+            assert not next.was_walked
+            assert next.ancestor_of is not None
+            next.was_walked = True
+            if len(next.ancestor_of) == num_candidates:
+                # This node is now considered 'common'
+                stop_nodes[next.key] = next
+                continue
+            if next.parent_keys is None:
+                # This is a ghost
+                stop_nodes[next.key] = next
+                continue
+            # Now project the current nodes ancestor list to the parent nodes,
+            # and queue them up to be walked
+            for parent_key in next.parent_keys:
+                if parent_key in candidate_nodes:
+                    candidate_nodes.pop(parent_key)
+                parent_node = self._nodes[parent_key]
+                assert not parent_node.was_walked
+                if parent_node.ancestor_of is None:
+                    # This node hasn't been walked yet
+                    parent_node.ancestor_of = node.ancestor_of
+                    # Enqueue this node
+                    heapq.heappush(queue, (-parent_node.gdfo, parent_node))
+                else:
+                    # Combine to get the full set of parents
+                    all_ancestors = set(parent_node.ancestor_of)
+                    all_ancestors.update(node.ancestor_of)
+                    all_ancestors = tuple(sorted(all_ancestors))
+                    parent_node.ancestor_of = all_ancestors
+                    # This would otherwise require popping the item out of the
+                    # queue, because we think we are done processing it.
+                    # As is, we'll just let the queue clean itself up later.
+                    # if len(parent_node.ancestor_of) == num_candidates:
+                    #     # This is now a common node
+                    #     stop_nodes[parent_node.key] = parent_node
+                    # This node should already be enqueued by whoever walked it
+                    # earlier
+        # import pdb; pdb.set_trace()
+        self._clear_ancestry_info()
+        return set(candidate_nodes)
+
+    def _clear_ancestry_info(self):
+        for node in self._nodes.itervalues():
+            node.ancestor_of = None
+            node.was_walked = False
 
     def get_parent_map(self, keys):
         # Thunk to match the Graph._parents_provider api.

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2009-05-19 06:02:02 +0000
+++ b/bzrlib/tests/test_graph.py	2009-06-08 20:12:22 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2007 Canonical Ltd
+# Copyright (C) 2007, 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by



More information about the bazaar-commits mailing list