Rev 3508: Use a real Weave object by including only the LCA's. Seems to work correctly and fast. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file
John Arbash Meinel
john at arbash-meinel.com
Sat Jun 21 01:40:33 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/merge3_per_file
------------------------------------------------------------
revno: 3508
revision-id: john at arbash-meinel.com-20080621004004-rycnow0cg7uaym63
parent: john at arbash-meinel.com-20080620225556-679khqi4j8oaeekv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: merge3_per_file
timestamp: Fri 2008-06-20 19:40:04 -0500
message:
Use a real Weave object by including only the LCA's. Seems to work correctly and fast.
Though for one file I'm showing that the revision is already in the ancestry of 'this:' which seems odd.
-------------- next part --------------
=== modified file 'bzrlib/merge.py'
--- a/bzrlib/merge.py 2008-06-20 22:55:56 +0000
+++ b/bzrlib/merge.py 2008-06-21 00:40:04 +0000
@@ -1375,69 +1375,70 @@
"""Plan an annotate merge using on-the-fly annotation"""
def __init__(self, a_rev, b_rev, vf):
- _PlanMergeBase.__init__(self, a_rev, b_rev, vf)
- graph = _mod_graph.Graph(vf)
- a_ancestry_unique, b_ancestry_unique = graph.find_difference(a_rev, b_rev)
- self.uncommon = a_ancestry_unique.union(b_ancestry_unique)
- # assert not a_ancestry.intersection(b_ancestry)
- a_ancestry = set(vf.get_ancestry(a_rev, topo_sorted=False))
- b_ancestry = set(vf.get_ancestry(b_rev, topo_sorted=False))
- uncommon = a_ancestry.symmetric_difference(b_ancestry)
- self.common = a_ancestry.intersection(b_ancestry)
- assert uncommon == self.uncommon
- note('Found %s uncommon ancestors to merge for %s',
- len(self.uncommon), vf)
-
- def _determine_status(self, revision_id, unique_line_numbers):
- """Determines the status unique lines versus all lcas.
-
- Basically, determines why the line is unique to this revision.
-
- A line may be determined new or killed, but not both.
-
- :param revision_id: The id of the revision in which the lines are
- unique
- :param unique_line_numbers: The line numbers of unique lines.
- :return a tuple of (new_this, killed_other):
- """
- note('Getting the annotation for {%s}', revision_id)
- annotated_text = self.vf.annotate(revision_id)
- note('done')
- new = set()
- # Something really weird here, if do 'idx+1' I get ~ same results
- # That makes something seem really broken.
- # import pdb; pdb.set_trace()
- new.update([idx for idx, (source, line) in enumerate(annotated_text)
- if source in self.uncommon])
- killed = set(unique_line_numbers).difference(new)
- return new, killed
-
- def _find_new(self, version_id):
- """Determine which lines are new in the ancestry of this version.
-
- If a lines is present in this version, and not present in any
- common ancestor, it is considered new.
- """
- if version_id not in self.uncommon:
- return set()
- parents = self.vf.get_parent_map([version_id])[version_id]
- if len(parents) == 0:
- lines = self.vf.get_lines(version_id)
- return set(range(len(lines)))
- new = None
- for parent in parents:
- blocks = self._get_matching_blocks(version_id, parent)
- result, unused = self._unique_lines(blocks)
- parent_new = self._find_new(parent)
- for i, j, n in blocks:
- for ii, jj in [(i+r, j+r) for r in range(n)]:
- if jj in parent_new:
- result.append(ii)
- if new is None:
- new = set(result)
- else:
- new.intersection_update(result)
- return new
+ # We don't call _PlanMergeBase, because we don't use most of what it
+ # does
+ self.a_rev = a_rev
+ self.b_rev = b_rev
+ self.vf = vf
+ self._build_weave()
+
+ def _find_recursive_lcas(self):
+ """Find all the ancestors back to a unique lca"""
+ cur_ancestors = (self.a_rev, self.b_rev)
+ note('finding lcas for %s', cur_ancestors)
+ graph = _mod_graph.Graph(self.vf)
+ import pdb; pdb.set_trace()
+ ancestors = [cur_ancestors]
+ while True:
+ next_lcas = graph.find_lca(*cur_ancestors)
+ ancestors.append(next_lcas)
+ if len(next_lcas) == 0:
+ ancestors[-1] = (NULL_REVISION,)
+ self.vf.add_lines(NULL_REVISION, [], [])
+ break
+ elif len(next_lcas) == 1:
+ break
+ cur_ancestors = next_lcas
+ ancestors.reverse()
+ return ancestors
+
+ def _get_interesting_texts(self, lcas):
+ all_revision_ids = set()
+ for ancestors in lcas:
+ all_revision_ids.update(ancestors)
+ all_revision_ids.add(self.a_rev)
+ all_revision_ids.add(self.b_rev)
+
+ all_texts = dict(zip(all_revision_ids,
+ self.vf.get_line_list(all_revision_ids)))
+ return all_texts
+
+ def _build_weave(self):
+ from bzrlib import weave
+ self._weave = weave.Weave(weave_name='in_memory_weave',
+ allow_reserved=True)
+ lcas = self._find_recursive_lcas()
+ note('Found:\n => %s', '\n => '.join(str(sorted(lca)) for lca in lcas))
+
+ all_texts = self._get_interesting_texts(lcas)
+
+ last_parents = ()
+ for ancestors in lcas:
+ for ancestor in ancestors:
+ self._weave.add_lines(ancestor, last_parents,
+ all_texts[ancestor])
+ last_parents = ancestors
+
+ def plan_merge(self):
+ """Generate a 'plan' for merging the two revisions.
+
+ This involves comparing their texts and determining the cause of
+ differences. If text A has a line and text B does not, then either the
+ line was added to text A, or it was deleted from B. Once the causes
+ are combined, they are written out in the format described in
+ VersionedFile.plan_merge
+ """
+ return self._weave.plan_merge(self.a_rev, self.b_rev)
class _PlanLCAMerge(_PlanMergeBase):
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2008-05-11 23:49:50 +0000
+++ b/bzrlib/weave.py 2008-06-21 00:40:04 +0000
@@ -191,32 +191,37 @@
should be no way to get an earlier version deleting a later
version.
- _weave
+ :ivar _weave:
Text of the weave; list of control instruction tuples and strings.
- _parents
+ :ivar _parents:
List of parents, indexed by version number.
It is only necessary to store the minimal set of parents for
each version; the parent's parents are implied.
- _sha1s
+ :ivar _sha1s:
List of hex SHA-1 of each version.
- _names
+ :ivar _names:
List of symbolic names for each version. Each should be unique.
- _name_map
+ :ivar _name_map:
For each name, the version number.
- _weave_name
+ :ivar _weave_name:
Descriptive name of this weave; typically the filename if known.
Set by read_weave.
+
+ :ivar _allow_reserved:
+ If True, we are allowed to add reserved ids like 'this:' or 'null:' to
+ the weave. This should only be allowed for in-memory weaves.
"""
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
- '_weave_name', '_matcher']
+ '_weave_name', '_matcher', '_allow_reserved']
- def __init__(self, weave_name=None, access_mode='w', matcher=None, get_scope=None):
+ def __init__(self, weave_name=None, access_mode='w', matcher=None,
+ get_scope=None, allow_reserved=False):
"""Create a weave.
:param get_scope: A callable that returns an opaque object to be used
@@ -230,6 +235,7 @@
self._names = []
self._name_map = {}
self._weave_name = weave_name
+ self._allow_reserved = allow_reserved
if matcher is None:
self._matcher = bzrlib.patiencediff.PatienceSequenceMatcher
else:
@@ -278,7 +284,8 @@
def _lookup(self, name):
"""Convert symbolic version name to index."""
- self.check_not_reserved_id(name)
+ if not self._allow_reserved:
+ self.check_not_reserved_id(name)
try:
return self._name_map[name]
except KeyError:
More information about the bazaar-commits
mailing list