Rev 3506: Using vf.annotate() is vastly faster than the alternative, in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file

Fri Jun 20 22:40:00 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file

------------------------------------------------------------
revno: 3506
revision-id: john at arbash-meinel.com-20080620213932-hb0lqdikv524vedp
parent: john at arbash-meinel.com-20080620210820-bcsuribdjm6bu6m2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: merge3_per_file
timestamp: Fri 2008-06-20 16:39:32 -0500
message:
  Using vf.annotate() is vastly faster than the alternative,
  I'm not sure that it is actually correct yet, as it seems to conflict for stupid reasons.
-------------- next part --------------
=== modified file 'bzrlib/merge.py'

--- a/bzrlib/merge.py	2008-06-20 21:08:20 +0000
+++ b/bzrlib/merge.py	2008-06-20 21:39:32 +0000
@@ -1259,7 +1259,6 @@
         self.vf = vf
         self._last_lines = None
         self._last_lines_revision_id = None
-        self._cached_lines = {} # Map from revision_id => lines
         self._cached_matching_blocks = {}
 
     def plan_merge(self):
@@ -1315,14 +1314,8 @@
         if self._last_lines_revision_id == left_revision:
             left_lines = self._last_lines
         else:
-            if left_revision in self._cached_lines:
-                left_lines = self._cached_lines[left_revision]
-            else:
-                left_lines = self.vf.get_lines(left_revision)
-        if right_revision in self._cached_lines:
-            right_lines = self._cached_lines[left_revision]
-        else:
-            right_lines = self.vf.get_lines(right_revision)
+            left_lines = self.vf.get_lines(left_revision)
+        right_lines = self.vf.get_lines(right_revision)
         self._last_lines = right_lines
         self._last_lines_revision_id = right_revision
         matcher = patiencediff.PatienceSequenceMatcher(None, left_lines,
@@ -1384,12 +1377,16 @@
     def __init__(self, a_rev, b_rev, vf):
        _PlanMergeBase.__init__(self, a_rev, b_rev, vf)
        graph = _mod_graph.Graph(vf)
-       a_ancestry, b_ancestry = graph.find_difference(a_rev, b_rev)
-       self.uncommon = a_ancestry.union(b_ancestry)
-       mutter('Found %s uncommon ancestors to merge for %s',
+       a_ancestry_unique, b_ancestry_unique = graph.find_difference(a_rev, b_rev)
+       self.uncommon = a_ancestry_unique.union(b_ancestry_unique)
+       # assert not a_ancestry.intersection(b_ancestry)
+       a_ancestry = set(vf.get_ancestry(a_rev, topo_sorted=False))
+       b_ancestry = set(vf.get_ancestry(b_rev, topo_sorted=False))
+       uncommon = a_ancestry.symmetric_difference(b_ancestry)
+       self.common = a_ancestry.intersection(b_ancestry)
+       assert uncommon == self.uncommon
+       note('Found %s uncommon ancestors to merge for %s',
               len(self.uncommon), vf)
-       self._cached_lines = dict(zip(self.uncommon,
-                                     vf.get_line_list(self.uncommon)))
 
     def _determine_status(self, revision_id, unique_line_numbers):
         """Determines the status unique lines versus all lcas.
@@ -1403,7 +1400,15 @@
         :param unique_line_numbers: The line numbers of unique lines.
         :return a tuple of (new_this, killed_other):
         """
-        new = self._find_new(revision_id)
+        note('Getting the annotation for {%s}', revision_id)
+        annotated_text = self.vf.annotate(revision_id)
+        note('done')
+        new = set()
+        # Something really weird here, if do 'idx+1' I get ~ same results
+        # That makes something seem really broken.
+        import pdb; pdb.set_trace()
+        new.update([idx for idx, (source, line) in enumerate(annotated_text)
+                         if source in self.uncommon])
         killed = set(unique_line_numbers).difference(new)
         return new, killed
 
@@ -1417,10 +1422,7 @@
             return set()
         parents = self.vf.get_parent_map([version_id])[version_id]
         if len(parents) == 0:
-            if version_id in self._cached_lines:
-                lines = self._cached_lines[version_id]
-            else:
-                lines = self.vf.get_lines(version_id)
+            lines = self.vf.get_lines(version_id)
             return set(range(len(lines)))
         new = None
         for parent in parents:

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-06-20 21:08:20 +0000
+++ b/bzrlib/versionedfile.py	2008-06-20 21:39:32 +0000
@@ -23,9 +23,12 @@
 lazy_import(globals(), """
 
 from bzrlib import (
+    annotate,
     errors,
+    graph as _mod_graph,
     osutils,
     multiparent,
+    trace,
     tsort,
     revision,
     ui,
@@ -555,6 +558,26 @@
         old_plan = _PlanLCAMerge(ver_a, base, self, graph).plan_merge()
         return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
 
+    def annotate(self, version_id):
+        for vf in self.fallback_versionedfiles:
+            try:
+                return vf.annotate(version_id)
+            except (errors.NoSuchRevision, errors.RevisionNotPresent):
+                pass
+        # We got all the way to our own lines... we need to do our own
+        # annotation work
+        parents = self.get_parent_map([version_id])[version_id]
+        trace.note('Annotating custom version: %s with parents %s',
+                     version_id, parents)
+        ann_parents = [self.annotate(p) for p in parents]
+        # Now compare this with the parents to determine the annotation
+        graph_obj = self._get_graph()
+        head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
+        annotated = annotate.reannotate(ann_parents, self._lines[version_id],
+                                        version_id, None, 
+                                        heads_provider=head_cache)
+        return annotated
+
     def add_lines(self, version_id, parents, lines):
         """See VersionedFile.add_lines