Rev 4392: Attempt to make things better by splitting out left_parent and right_parent into in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
John Arbash Meinel
john at arbash-meinel.com
Wed Jun 10 23:03:48 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
------------------------------------------------------------
revno: 4392
revision-id: john at arbash-meinel.com-20090610220339-akbmevv1b0236upf
parent: john at arbash-meinel.com-20090610210222-trfxgzd242tl04nl
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Wed 2009-06-10 17:03:39 -0500
message:
Attempt to make things better by splitting out left_parent and right_parent into
individual attributes (to avoid extra type checks, etc.)
However, this seems to make things *much* worse rather than better.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-06-10 21:02:22 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-06-10 22:03:39 +0000
@@ -30,18 +30,23 @@
int PyDict_SetItem(object d, object k, object v) except -1
void Py_INCREF(object)
+
import heapq
from bzrlib import revision
+heappush = heapq.heappush
+heappop = heapq.heappop
+
cdef class _KnownGraphNode:
"""Represents a single object in the known graph."""
cdef object key
- # Ideally, we could change this into fixed arrays, rather than get the
- # extra allocations. Consider that 99% of all revisions will have <= 2
- # parents, we may want to put in the effort
+ # 99% of all revisions have <= 2 parents, so we pre-allocate space for it,
+ # but allow the flexibility of having N parents
+ cdef _KnownGraphNode left_parent
+ cdef _KnownGraphNode right_parent
cdef list parents
cdef list children
cdef _KnownGraphNode linear_dominator_node
@@ -51,16 +56,14 @@
cdef object ancestor_of
def __init__(self, key, parents):
+ cdef _KnownGraphNode _parent_node
+ cdef int i
+
self.key = key
- if parents is None:
- self.parents = parents
- else:
- if not isinstance(parents, list):
- raise TypeError('parents must be a list')
- for parent in parents:
- if not isinstance(parent, _KnownGraphNode):
- raise TypeError('parents must be a list of _KnownGraphNode')
- self.parents = parents
+ self.left_parent = None
+ self.right_parent = None
+ self.set_parents(parents)
+
self.children = []
# oldest ancestor, such that no parents between here and there have >1
# child or >1 parent.
@@ -89,10 +92,26 @@
else:
return self.linear_dominator_node.key
+ cdef set_parents(self, list parents):
+ cdef int num_parents
+ self.parents = parents
+ if parents is None:
+ return
+ num_parents = len(self.parents)
+ if num_parents > 2:
+ # No special ops
+ return
+ if num_parents > 0:
+ self.left_parent = self.parents[0]
+ if num_parents > 1:
+ self.right_parent = self.parents[1]
+
cdef clear_references(self):
self.parents = None
self.children = None
self.linear_dominator_node = None
+ self.left_parent = None
+ self.right_parent = None
def __repr__(self):
parent_keys = []
@@ -116,6 +135,9 @@
cdef public object _nodes
cdef dict _known_heads
cdef public int do_cache
+ # Nodes we've touched that we'll need to reset their info when heads() is
+ # done
+ cdef list _to_cleanup
def __init__(self, parent_map, do_cache=True):
"""Create a new KnownGraph instance.
@@ -125,6 +147,7 @@
self._nodes = {}
# Maps {sorted(revision_id, revision_id): heads}
self._known_heads = {}
+ self._to_cleanup = []
self.do_cache = int(do_cache)
self._initialize_nodes(parent_map)
self._find_linear_dominators()
@@ -162,7 +185,7 @@
if key in nodes:
node = nodes[key]
assert node.parents is None
- node.parents = parent_nodes
+ node.set_parents(parent_nodes)
else:
node = _KnownGraphNode(key, parent_nodes)
nodes[key] = node
@@ -378,19 +401,50 @@
heads.append(<object>test_key)
return dom_lookup_key, PyFrozenSet_New(heads)
+ cdef int _process_parent(self, _KnownGraphNode node,
+ _KnownGraphNode parent_node,
+ dict candidate_nodes,
+ queue) except -1:
+ """Process the parent of a node, seeing if we need to walk it.
+
+ :return: 0 No extra work needed
+ 1 This was a candidate node, and now there is only 1 candidate
+ left, so break out of the loop
+ """
+ cdef PyObject *maybe_candidate
+ maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
+ if maybe_candidate != NULL:
+ candidate_nodes.pop(parent_node.key)
+ if len(candidate_nodes) <= 1:
+ return 1
+ if parent_node.ancestor_of is None:
+ # This node hasn't been walked yet, so just project node's ancestor
+ # info directly to parent_node, and enqueue it for later processing
+ parent_node.ancestor_of = node.ancestor_of
+ heappush(queue, (-parent_node.gdfo, parent_node))
+ self._to_cleanup.append(parent_node)
+ elif parent_node.ancestor_of != node.ancestor_of:
+ # Combine to get the full set of parents
+ # Rewrite using PySet_* functions, unfortunately you have to use
+ # PySet_Add since there is no PySet_Update... :(
+ all_ancestors = set(parent_node.ancestor_of)
+ all_ancestors.update(node.ancestor_of)
+ parent_node.ancestor_of = tuple(sorted(all_ancestors))
+ return 0
+
cdef object _heads_from_candidate_nodes(self, dict candidate_nodes):
- cdef list to_cleanup
cdef _KnownGraphNode node
cdef _KnownGraphNode parent_node
cdef int num_candidates
+ cdef int num_parents
+ cdef list queue
queue = []
- to_cleanup = []
for node in candidate_nodes.itervalues():
assert node.ancestor_of is None
node.ancestor_of = (node.key,)
queue.append((-node.gdfo, node))
- to_cleanup.append(node)
+ self._to_cleanup.append(node)
heapq.heapify(queue)
# These are nodes that we determined are 'common' that we are no longer
# walking
@@ -423,25 +477,25 @@
# We are at the tip of a long linear region
# We know that there is nothing between here and the tail
# that is interesting, so skip to the end
- parents = [node.linear_dominator_node]
+ if (self._process_parent(node, node.linear_dominator_node,
+ candidate_nodes, queue)):
+ break
else:
- parents = node.parents
- for parent_node in node.parents:
- if parent_node.key in candidate_nodes:
- candidate_nodes.pop(parent_node.key)
- if len(candidate_nodes) <= 1:
- break
- if parent_node.ancestor_of is None:
- # This node hasn't been walked yet
- parent_node.ancestor_of = node.ancestor_of
- # Enqueue this node
- heappush(queue, (-parent_node.gdfo, parent_node))
- to_cleanup.append(parent_node)
- elif parent_node.ancestor_of != node.ancestor_of:
- # Combine to get the full set of parents
- all_ancestors = set(parent_node.ancestor_of)
- all_ancestors.update(node.ancestor_of)
- parent_node.ancestor_of = tuple(sorted(all_ancestors))
- for node in to_cleanup:
+ num_parents = len(node.parents)
+ if num_parents > 2:
+ for parent_node in node.parents:
+ if (self._process_parent(node, parent_node,
+ candidate_nodes, queue)):
+ break
+ else:
+ if num_parents > 0:
+ if (self._process_parent(node, node.left_parent,
+ candidate_nodes, queue)):
+ break
+ if num_parents > 1:
+ if (self._process_parent(node, node.right_parent,
+ candidate_nodes, queue)):
+ break
+ for node in self._to_cleanup:
node.ancestor_of = None
return PyFrozenSet_New(candidate_nodes)
More information about the bazaar-commits
mailing list