Rev 4642: Change the KnownGraph.merge_sort api. in http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 17 21:40:59 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.19-known-graph-sorted
------------------------------------------------------------
revno: 4642
revision-id: john at arbash-meinel.com-20090817204048-e56pce2ol3h22v0x
parent: john at arbash-meinel.com-20090817200626-1m6qaasv2af9x3wx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.19-known-graph-sorted
timestamp: Mon 2009-08-17 15:40:48 -0500
message:
Change the KnownGraph.merge_sort api.
Instead of being a tuple api, it is now an Object api. TIMEIT testing at least
seems to say that _MergeSortNode.key is no slower than using a tuple.
Allocating the nodes is also not much slower (especially since we already needed
them.)
In the end it makes for an api which, IMO, is much easier to use, so
since performance isn't impacted, it is worth switching.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'
--- a/bzrlib/_known_graph_py.py 2009-08-17 20:06:26 +0000
+++ b/bzrlib/_known_graph_py.py 2009-08-17 20:40:48 +0000
@@ -41,6 +41,18 @@
self.parent_keys, self.child_keys)
+class _MergeSortNode(object):
+ """Information about a specific node in the merge graph."""
+
+ __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
+
+ def __init__(self, key, merge_depth, revno, end_of_merge):
+ self.key = key
+ self.merge_depth = merge_depth
+ self.revno = revno
+ self.end_of_merge = end_of_merge
+
+
class KnownGraph(object):
"""This is a class which assumes we already know the full graph."""
@@ -215,6 +227,8 @@
# We intentionally always generate revnos and never force the
# mainline_revisions
# Strip the sequence_number that merge_sort generates
- return [info[1:] for info in tsort.merge_sort(as_parent_map, tip_key,
- mainline_revisions=None,
- generate_revno=True)]
+ return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
+ for _, key, merge_depth, revno, end_of_merge
+ in tsort.merge_sort(as_parent_map, tip_key,
+ mainline_revisions=None,
+ generate_revno=True)]
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-08-17 18:56:38 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-08-17 20:40:48 +0000
@@ -412,27 +412,32 @@
cdef class _MergeSortNode:
"""Tracks information about a node during the merge_sort operation."""
- cdef long merge_depth
+ # Public api
+ cdef public object key
+ cdef public long merge_depth
+ cdef public object end_of_merge # True/False Is this the end of the current merge
+
+ # Private api, used while computing the information
cdef _KnownGraphNode left_parent
cdef _KnownGraphNode left_pending_parent
cdef object pending_parents # list of _KnownGraphNode for non-left parents
- cdef long revno_first
- cdef long revno_second
- cdef long revno_last
+ cdef long _revno_first
+ cdef long _revno_second
+ cdef long _revno_last
# TODO: turn these into flag/bit fields rather than individual members
cdef int is_first_child # Is this the first child?
cdef int seen_by_child # A child node has seen this parent
cdef int completed # Fully Processed
- cdef object end_of_merge # True/False Is this the end of the current merge
- def __init__(self):
+ def __init__(self, key):
+ self.key = key
self.merge_depth = -1
self.left_parent = None
self.left_pending_parent = None
self.pending_parents = None
- self.revno_first = -1
- self.revno_second = -1
- self.revno_last = -1
+ self._revno_first = -1
+ self._revno_second = -1
+ self._revno_last = -1
self.is_first_child = 0
self.seen_by_child = 0
self.completed = 0
@@ -440,7 +445,7 @@
def __repr__(self):
return '%s(depth:%s rev:%s,%s,%s first:%s seen:%s)' % (self.__class__.__name__,
self.merge_depth,
- self.revno_first, self.revno_second, self.revno_last,
+ self._revno_first, self._revno_second, self._revno_last,
self.is_first_child, self.seen_by_child)
cdef int has_pending_parents(self):
@@ -448,6 +453,18 @@
return 1
return 0
+ cdef object _revno(self):
+ if self._revno_first == -1:
+ if self._revno_second != -1:
+ raise RuntimeError('Something wrong with: %s' % (self,))
+ return (self._revno_last,)
+ else:
+ return (self._revno_first, self._revno_second, self._revno_last)
+
+ property revno:
+ def __get__(self):
+ return self._revno()
+
cdef class _MergeSorter:
"""This class does the work of computing the merge_sort ordering.
@@ -487,7 +504,7 @@
cdef _MergeSortNode ms_node
if node.extra is None:
- ms_node = _MergeSortNode()
+ ms_node = _MergeSortNode(node.key)
node.extra = ms_node
else:
ms_node = <_MergeSortNode>node.extra
@@ -539,17 +556,17 @@
ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
if ms_node.is_first_child:
# First child just increments the final digit
- ms_node.revno_first = ms_parent_node.revno_first
- ms_node.revno_second = ms_parent_node.revno_second
- ms_node.revno_last = ms_parent_node.revno_last + 1
+ ms_node._revno_first = ms_parent_node._revno_first
+ ms_node._revno_second = ms_parent_node._revno_second
+ ms_node._revno_last = ms_parent_node._revno_last + 1
else:
# Not the first child, make a new branch
# (mainline_revno, branch_count, 1)
- if ms_parent_node.revno_first == -1:
+ if ms_parent_node._revno_first == -1:
# Mainline ancestor, the increment is on the last digit
- base_revno = ms_parent_node.revno_last
+ base_revno = ms_parent_node._revno_last
else:
- base_revno = ms_parent_node.revno_first
+ base_revno = ms_parent_node._revno_first
temp = PyDict_GetItem(self._revno_to_branch_count,
base_revno)
if temp == NULL:
@@ -558,22 +575,22 @@
branch_count = (<object>temp) + 1
PyDict_SetItem(self._revno_to_branch_count, base_revno,
branch_count)
- ms_node.revno_first = base_revno
- ms_node.revno_second = branch_count
- ms_node.revno_last = 1
+ ms_node._revno_first = base_revno
+ ms_node._revno_second = branch_count
+ ms_node._revno_last = 1
else:
temp = PyDict_GetItem(self._revno_to_branch_count, 0)
if temp == NULL:
# The first root node doesn't have a 3-digit revno
root_count = 0
- ms_node.revno_first = -1
- ms_node.revno_second = -1
- ms_node.revno_last = 1
+ ms_node._revno_first = -1
+ ms_node._revno_second = -1
+ ms_node._revno_last = 1
else:
root_count = (<object>temp) + 1
- ms_node.revno_first = 0
- ms_node.revno_second = root_count
- ms_node.revno_last = 1
+ ms_node._revno_first = 0
+ ms_node._revno_second = root_count
+ ms_node._revno_last = 1
PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
ms_node.completed = 1
if PyList_GET_SIZE(self._scheduled_nodes) == 0:
@@ -665,23 +682,12 @@
# 7ms is computing the return tuple
# 4ms is PyList_Append()
ordered = []
- # output the result in reverse order, and convert from objects into
- # tuples...
+ # output the result in reverse order, and separate the generated info
for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
node = _get_list_node(self._scheduled_nodes, pos)
ms_node = <_MergeSortNode>node.extra
- if ms_node.revno_first == -1:
- if ms_node.revno_second != -1:
- raise RuntimeError('Something wrong with: %s' % (ms_node,))
- revno = (ms_node.revno_last,)
- else:
- revno = (ms_node.revno_first, ms_node.revno_second,
- ms_node.revno_last)
- PyList_Append(ordered, (node.key,
- ms_node.merge_depth, revno,
- ms_node.end_of_merge))
- # Get rid of the extra stored info
+ PyList_Append(ordered, ms_node)
node.extra = None
- # Clear out the scheduled nodes
+ # Clear out the scheduled nodes now that we're done
self._scheduled_nodes = []
return ordered
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py 2009-08-17 18:56:38 +0000
+++ b/bzrlib/branch.py 2009-08-17 20:40:48 +0000
@@ -449,10 +449,8 @@
last_key = (last_revision,)
known_graph = self.repository.revisions.get_known_graph_ancestry(
[last_key])
- revs = known_graph.merge_sort(last_key)
- self._merge_sorted_revisions_cache = [(key[-1], d, rn, eom)
- for key, d, rn, eom in revs]
-
+ self._merge_sorted_revisions_cache = known_graph.merge_sort(
+ last_key)
filtered = self._filter_merge_sorted_revisions(
self._merge_sorted_revisions_cache, start_revision_id,
stop_revision_id, stop_rule)
@@ -468,27 +466,34 @@
"""Iterate over an inclusive range of sorted revisions."""
rev_iter = iter(merge_sorted_revisions)
if start_revision_id is not None:
- for rev_id, depth, revno, end_of_merge in rev_iter:
+ for node in rev_iter:
+ rev_id = node.key[-1]
if rev_id != start_revision_id:
continue
else:
# The decision to include the start or not
# depends on the stop_rule if a stop is provided
- rev_iter = chain(
- iter([(rev_id, depth, revno, end_of_merge)]),
- rev_iter)
+ # so pop this node back into the iterator
+ rev_iter = chain(iter([node]), rev_iter)
break
if stop_revision_id is None:
- for rev_id, depth, revno, end_of_merge in rev_iter:
- yield rev_id, depth, revno, end_of_merge
+ # Yield everything
+ for node in rev_iter:
+ rev_id = node.key[-1]
+ yield (rev_id, node.merge_depth, node.revno,
+ node.end_of_merge)
elif stop_rule == 'exclude':
- for rev_id, depth, revno, end_of_merge in rev_iter:
+ for node in rev_iter:
+ rev_id = node.key[-1]
if rev_id == stop_revision_id:
return
- yield rev_id, depth, revno, end_of_merge
+ yield (rev_id, node.merge_depth, node.revno,
+ node.end_of_merge)
elif stop_rule == 'include':
- for rev_id, depth, revno, end_of_merge in rev_iter:
- yield rev_id, depth, revno, end_of_merge
+ for node in rev_iter:
+ rev_id = node.key[-1]
+ yield (rev_id, node.merge_depth, node.revno,
+ node.end_of_merge)
if rev_id == stop_revision_id:
return
elif stop_rule == 'with-merges':
@@ -497,10 +502,12 @@
left_parent = stop_rev.parent_ids[0]
else:
left_parent = _mod_revision.NULL_REVISION
- for rev_id, depth, revno, end_of_merge in rev_iter:
+ for node in rev_iter:
+ rev_id = node.key[-1]
if rev_id == left_parent:
return
- yield rev_id, depth, revno, end_of_merge
+ yield (rev_id, node.merge_depth, node.revno,
+ node.end_of_merge)
else:
raise ValueError('invalid stop_rule %r' % stop_rule)
=== modified file 'bzrlib/repofmt/weaverepo.py'
--- a/bzrlib/repofmt/weaverepo.py 2009-04-09 20:23:07 +0000
+++ b/bzrlib/repofmt/weaverepo.py 2009-08-17 20:40:48 +0000
@@ -28,6 +28,7 @@
lazy_import(globals(), """
from bzrlib import (
xml5,
+ graph as _mod_graph,
)
""")
from bzrlib import (
@@ -669,6 +670,13 @@
result[key] = parents
return result
+ def get_known_graph_ancestry(self, keys):
+ """Get a KnownGraph instance with the ancestry of keys."""
+ keys = self.keys()
+ parent_map = self.get_parent_map(keys)
+ kg = _mod_graph.KnownGraph(parent_map)
+ return kg
+
def get_record_stream(self, keys, sort_order, include_delta_closure):
for key in keys:
text, parents = self._load_text_parents(key)
=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py 2009-08-17 18:56:38 +0000
+++ b/bzrlib/tests/test__known_graph.py 2009-08-17 20:40:48 +0000
@@ -336,6 +336,8 @@
"""Check that merge based sorting and iter_topo_order on graph works."""
graph = self.make_known_graph(ancestry)
value = graph.merge_sort(branch_tip)
+ value = [(n.key, n.merge_depth, n.revno, n.end_of_merge)
+ for n in value]
if result_list != value:
self.assertEqualDiff(pprint.pformat(result_list),
pprint.pformat(value))
More information about the bazaar-commits
mailing list