Rev 4404: Use a hybrid pass to walk all nodes. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
John Arbash Meinel
john at arbash-meinel.com
Fri Jun 12 04:10:36 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads
------------------------------------------------------------
revno: 4404
revision-id: john at arbash-meinel.com-20090612031025-m3hbl28qub56doga
parent: john at arbash-meinel.com-20090611221804-xbsmslillnb6tldf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Thu 2009-06-11 22:10:25 -0500
message:
Use a hybrid pass to walk all nodes.
This avoids a heapq, which we didn't really need anyway.
It is ~ the same number of passes, only we trade off a simplistic
'find_tails' call, with doing more work on the 1-pass over the dict.
It seems to be slightly faster for bzr, and slightly slower for OOo.
Not particularly noticeable in the noise of heads(), but measurable for
__init__ performance.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-06-11 22:18:04 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2009-06-12 03:10:25 +0000
@@ -57,7 +57,7 @@
cdef class _KnownGraphNode:
"""Represents a single object in the known graph."""
- cdef object key
+ cdef public object key
cdef object parents
cdef object children
cdef _KnownGraphNode linear_dominator_node
@@ -271,20 +271,6 @@
next_node.dominator_distance = node.dominator_distance + 1
node = next_node
- cdef object _find_tails(self):
- cdef object tails
- cdef PyObject *temp_node
- cdef Py_ssize_t pos
- cdef _KnownGraphNode node
-
- tails = []
- pos = 0
- while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
- node = <_KnownGraphNode>temp_node
- if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
- PyList_Append(tails, node)
- return tails
-
def _find_gdfo(self):
cdef PyObject *temp_node
cdef Py_ssize_t pos, pos2
@@ -299,35 +285,58 @@
# iter-dict, and then a second time when finding children,
# possibly a third when there is more than one parent)
- tails = self._find_tails()
- todo = []
- for pos from 0 <= pos < PyList_GET_SIZE(tails):
- temp_node = PyList_GET_ITEM(tails, pos)
- node = <_KnownGraphNode>temp_node
- node.gdfo = 1
- heappush(todo, (1, node))
- max_gdfo = len(self._nodes) + 1
- while PyList_GET_SIZE(todo) > 0:
- temp_node = PyTuple_GET_ITEM(heappop(todo), 1)
- node = <_KnownGraphNode>temp_node
- next_gdfo = node.gdfo + 1
- assert next_gdfo <= max_gdfo
- for pos from 0 <= pos < PyList_GET_SIZE(node.children):
- temp_node = PyList_GET_ITEM(node.children, pos)
- child_node = <_KnownGraphNode>temp_node
- if child_node.gdfo < next_gdfo:
- # Only enque children when all of their parents have been
- # resolved
- for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(child_node.parents):
- temp_node = PyTuple_GET_ITEM(child_node.parents, pos2)
- parent_node = <_KnownGraphNode>temp_node
- # We know that 'this' parent is counted
- if parent_node is not node:
- if parent_node.gdfo == -1:
- break
- else:
- child_node.gdfo = next_gdfo
- heappush(todo, (next_gdfo, child_node))
+ pos = 0
+ nodes_to_number = []
+ nodes_to_number_pop = nodes_to_number.pop
+ nodes_to_number_extend = nodes_to_number.extend
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if node.gdfo != -1: # Already done
+ continue
+ # Special case this here, because for all children, we know this
+ # won't be true :)
+ if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
+ node.gdfo = 1
+ # Start numbering from the children
+ nodes_to_number_extend(node.children)
+ else:
+ # We need to start numbering from this node
+ PyList_Append(nodes_to_number, node)
+ while PyList_GET_SIZE(nodes_to_number):
+ # There is no PyList_Pop, though there is a simple static
+ # 'listpop' function that is only exposed through python
+ # methods
+ node = nodes_to_number_pop()
+ if node.gdfo != -1: # Already done
+ # This can happen when you have complex history, and one
+ # parent queues up a child, and the other parent then
+ # queues it up again
+ continue
+ # Find the gdfo for this node based on all parent nodes
+ parent_gdfo = 0
+ for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(node.parents):
+ temp_node = PyTuple_GET_ITEM(node.parents, pos2)
+ parent_node = <_KnownGraphNode>temp_node
+ if parent_node.gdfo == -1:
+ # One of this node's parents has not been numbered yet.
+ # We will number this node when we get back here
+ break
+ if parent_gdfo < parent_node.gdfo:
+ parent_gdfo = parent_node.gdfo
+ else:
+ # All the parents were numbered, so it is safe to number
+ # this child
+ node.gdfo = parent_gdfo + 1
+ if node.gdfo == -1:
+ # We could not number this node, yet, we assume we'll get
+ # back to it when we number a different parent
+ continue
+ # This node was numbered. Let's number all the children we can
+ # There is no PyList_Extend either, and from the looks of it,
+ # listextend(list) is much more efficient because it can
+ # pre-allocated for all entries that will be added, and can do
+ # fast PySequence iteration.
+ nodes_to_number_extend(node.children)
def heads(self, keys):
"""Return the heads from amongst keys.
More information about the bazaar-commits
mailing list