Rev 3508: Use a real Weave object by including only the LCA's. Seems to work correctly and fast. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file

Sat Jun 21 01:40:33 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file

------------------------------------------------------------
revno: 3508
revision-id: john at arbash-meinel.com-20080621004004-rycnow0cg7uaym63
parent: john at arbash-meinel.com-20080620225556-679khqi4j8oaeekv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: merge3_per_file
timestamp: Fri 2008-06-20 19:40:04 -0500
message:
  Use a real Weave object by including only the LCA's. Seems to work correctly and fast.
  Though for one file I'm showing that the revision is already in the ancestry of 'this:' which seems odd.
-------------- next part --------------
=== modified file 'bzrlib/merge.py'

--- a/bzrlib/merge.py	2008-06-20 22:55:56 +0000
+++ b/bzrlib/merge.py	2008-06-21 00:40:04 +0000
@@ -1375,69 +1375,70 @@
     """Plan an annotate merge using on-the-fly annotation"""
 
     def __init__(self, a_rev, b_rev, vf):
-       _PlanMergeBase.__init__(self, a_rev, b_rev, vf)
-       graph = _mod_graph.Graph(vf)
-       a_ancestry_unique, b_ancestry_unique = graph.find_difference(a_rev, b_rev)
-       self.uncommon = a_ancestry_unique.union(b_ancestry_unique)
-       # assert not a_ancestry.intersection(b_ancestry)
-       a_ancestry = set(vf.get_ancestry(a_rev, topo_sorted=False))
-       b_ancestry = set(vf.get_ancestry(b_rev, topo_sorted=False))
-       uncommon = a_ancestry.symmetric_difference(b_ancestry)
-       self.common = a_ancestry.intersection(b_ancestry)
-       assert uncommon == self.uncommon
-       note('Found %s uncommon ancestors to merge for %s',
-              len(self.uncommon), vf)
-
-    def _determine_status(self, revision_id, unique_line_numbers):
-        """Determines the status unique lines versus all lcas.
-
-        Basically, determines why the line is unique to this revision.
-
-        A line may be determined new or killed, but not both.
-
-        :param revision_id: The id of the revision in which the lines are
-            unique
-        :param unique_line_numbers: The line numbers of unique lines.
-        :return a tuple of (new_this, killed_other):
-        """
-        note('Getting the annotation for {%s}', revision_id)
-        annotated_text = self.vf.annotate(revision_id)
-        note('done')
-        new = set()
-        # Something really weird here, if do 'idx+1' I get ~ same results
-        # That makes something seem really broken.
-        # import pdb; pdb.set_trace()
-        new.update([idx for idx, (source, line) in enumerate(annotated_text)
-                         if source in self.uncommon])
-        killed = set(unique_line_numbers).difference(new)
-        return new, killed
-
-    def _find_new(self, version_id):
-        """Determine which lines are new in the ancestry of this version.
-
-        If a lines is present in this version, and not present in any
-        common ancestor, it is considered new.
-        """
-        if version_id not in self.uncommon:
-            return set()
-        parents = self.vf.get_parent_map([version_id])[version_id]
-        if len(parents) == 0:
-            lines = self.vf.get_lines(version_id)
-            return set(range(len(lines)))
-        new = None
-        for parent in parents:
-            blocks = self._get_matching_blocks(version_id, parent)
-            result, unused = self._unique_lines(blocks)
-            parent_new = self._find_new(parent)
-            for i, j, n in blocks:
-                for ii, jj in [(i+r, j+r) for r in range(n)]:
-                    if jj in parent_new:
-                        result.append(ii)
-            if new is None:
-                new = set(result)
-            else:
-                new.intersection_update(result)
-        return new
+        # We don't call _PlanMergeBase, because we don't use most of what it
+        # does
+        self.a_rev = a_rev
+        self.b_rev = b_rev
+        self.vf = vf
+        self._build_weave()
+
+    def _find_recursive_lcas(self):
+        """Find all the ancestors back to a unique lca"""
+        cur_ancestors = (self.a_rev, self.b_rev)
+        note('finding lcas for %s', cur_ancestors)
+        graph = _mod_graph.Graph(self.vf)
+        import pdb; pdb.set_trace()
+        ancestors = [cur_ancestors]
+        while True:
+            next_lcas = graph.find_lca(*cur_ancestors)
+            ancestors.append(next_lcas)
+            if len(next_lcas) == 0: 
+                ancestors[-1] = (NULL_REVISION,)
+                self.vf.add_lines(NULL_REVISION, [], [])
+                break
+            elif len(next_lcas) == 1:
+                break
+            cur_ancestors = next_lcas
+        ancestors.reverse()
+        return ancestors
+
+    def _get_interesting_texts(self, lcas):
+        all_revision_ids = set()
+        for ancestors in lcas:
+            all_revision_ids.update(ancestors)
+        all_revision_ids.add(self.a_rev)
+        all_revision_ids.add(self.b_rev)
+
+        all_texts = dict(zip(all_revision_ids,
+                             self.vf.get_line_list(all_revision_ids)))
+        return all_texts
+
+    def _build_weave(self):
+        from bzrlib import weave
+        self._weave = weave.Weave(weave_name='in_memory_weave',
+                                  allow_reserved=True)
+        lcas = self._find_recursive_lcas()
+        note('Found:\n => %s', '\n => '.join(str(sorted(lca)) for lca in lcas))
+
+        all_texts = self._get_interesting_texts(lcas)
+
+        last_parents = ()
+        for ancestors in lcas:
+            for ancestor in ancestors:
+                self._weave.add_lines(ancestor, last_parents,
+                                      all_texts[ancestor])
+            last_parents = ancestors
+
+    def plan_merge(self):
+        """Generate a 'plan' for merging the two revisions.
+
+        This involves comparing their texts and determining the cause of
+        differences.  If text A has a line and text B does not, then either the
+        line was added to text A, or it was deleted from B.  Once the causes
+        are combined, they are written out in the format described in
+        VersionedFile.plan_merge
+        """
+        return self._weave.plan_merge(self.a_rev, self.b_rev)
 
 
 class _PlanLCAMerge(_PlanMergeBase):

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-05-11 23:49:50 +0000
+++ b/bzrlib/weave.py	2008-06-21 00:40:04 +0000
@@ -191,32 +191,37 @@
       should be no way to get an earlier version deleting a later
       version.
 
-    _weave
+    :ivar _weave:
         Text of the weave; list of control instruction tuples and strings.
 
-    _parents
+    :ivar _parents:
         List of parents, indexed by version number.
         It is only necessary to store the minimal set of parents for
         each version; the parent's parents are implied.
 
-    _sha1s
+    :ivar _sha1s:
         List of hex SHA-1 of each version.
 
-    _names
+    :ivar _names:
         List of symbolic names for each version.  Each should be unique.
 
-    _name_map
+    :ivar _name_map:
         For each name, the version number.
 
-    _weave_name
+    :ivar _weave_name:
         Descriptive name of this weave; typically the filename if known.
         Set by read_weave.
+
+    :ivar _allow_reserved:
+        If True, we are allowed to add reserved ids like 'this:' or 'null:' to
+        the weave. This should only be allowed for in-memory weaves.
     """
 
     __slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
-                 '_weave_name', '_matcher']
+                 '_weave_name', '_matcher', '_allow_reserved']
     
-    def __init__(self, weave_name=None, access_mode='w', matcher=None, get_scope=None):
+    def __init__(self, weave_name=None, access_mode='w', matcher=None,
+                 get_scope=None, allow_reserved=False):
         """Create a weave.
 
         :param get_scope: A callable that returns an opaque object to be used
@@ -230,6 +235,7 @@
         self._names = []
         self._name_map = {}
         self._weave_name = weave_name
+        self._allow_reserved = allow_reserved
         if matcher is None:
             self._matcher = bzrlib.patiencediff.PatienceSequenceMatcher
         else:
@@ -278,7 +284,8 @@
 
     def _lookup(self, name):
         """Convert symbolic version name to index."""
-        self.check_not_reserved_id(name)
+        if not self._allow_reserved:
+            self.check_not_reserved_id(name)
         try:
             return self._name_map[name]
         except KeyError: