Rev 4454: Hack together a pyrex version of the _resolve_annotations function. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-annotate-bug387952
John Arbash Meinel
john at arbash-meinel.com
Tue Jun 16 23:00:46 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-annotate-bug387952
------------------------------------------------------------
revno: 4454
revision-id: john at arbash-meinel.com-20090616220035-fh1ofkid2zoyz18m
parent: john at arbash-meinel.com-20090616212629-vrst5j82phb7ynas
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-annotate-bug387952
timestamp: Tue 2009-06-16 17:00:35 -0500
message:
Hack together a pyrex version of the _resolve_annotations function.
This drops the time down to 5.4s or so (versus 5.0s for bzr.dev).
My guess is that the current overhead is:
1) Always converting the right lines back into simple lines (stripping annotations)
2) The PatienceDiff match on these lines is probably quite a bit more expensive.
I don't know for sure, but I would guess that the uniqueness level is going down,
which causes more recursions, etc for the get_matching_blocks.
However, this is 'close enough' to current performance to not worry about for now.
Just need to create a real module and factor out real tests for this improvement.
-------------- next part --------------
=== modified file 'bzrlib/_chunks_to_lines_pyx.pyx'
--- a/bzrlib/_chunks_to_lines_pyx.pyx 2009-03-23 14:59:43 +0000
+++ b/bzrlib/_chunks_to_lines_pyx.pyx 2009-06-16 22:00:35 +0000
@@ -35,6 +35,26 @@
Py_ssize_t PyString_GET_SIZE(object p)
object PyString_FromStringAndSize(char *c_str, Py_ssize_t len)
+ int PyTuple_CheckExact(object t)
+ Py_ssize_t PyTuple_GET_SIZE(object t)
+ PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t idx)
+
+ void Py_INCREF(object)
+
+ int PyList_CheckExact(object l)
+ int PyList_SetItem(object l, Py_ssize_t idx, object v) except -1
+ # TODO: PyList_SET_ITEM?
+ PyObject *PyList_GET_ITEM(object l, Py_ssize_t idx)
+ Py_ssize_t PyList_GET_SIZE(object l)
+
+ int Py_EQ
+ int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
+ PyObject *left, PyObject *right, int opid) except -1
+
+ int PyAnySet_Check(object)
+ Py_ssize_t PySet_GET_SIZE(object)
+
+
cdef extern from "string.h":
void *memchr(void *s, int c, size_t n)
@@ -128,3 +148,70 @@
if tail is not None:
PyList_Append(lines, tail)
return lines
+
+
+def _resolve_matching_region(output_lines, annotated_lines, new_idx,
+ right_parent_lines, parent_idx,
+ match_len, new_key, heads_provider,
+ break_tie):
+ """For this matching region, determine final annotations."""
+ cdef Py_ssize_t c_new_idx, c_parent_idx, c_match_len
+ cdef Py_ssize_t c_idx
+ cdef PyObject *temp
+ cdef PyObject *new_line_key, *parent_key
+ cdef PyObject *new_key_ptr
+
+ if not PyList_CheckExact(output_lines):
+ raise TypeError('output_lines must be a list')
+ if not PyList_CheckExact(annotated_lines):
+ raise TypeError('annotated_lines must be a list')
+ if not PyList_CheckExact(right_parent_lines):
+ raise TypeError('right_parent_lines must be a list')
+ c_match_len = match_len
+ c_new_idx = new_idx
+ c_parent_idx = parent_idx
+ new_key_ptr = <PyObject*>new_key
+ if heads_provider is not None:
+ heads = heads_provider.heads
+ else:
+ heads = None
+ for c_idx from 0 <= c_idx < match_len:
+ temp = PyList_GET_ITEM(annotated_lines, c_new_idx + c_idx)
+ new_line = <object>temp
+ if (not PyTuple_CheckExact(new_line)
+ or PyTuple_GET_SIZE(new_line) != 2):
+ raise TypeError('all lines must be tuples of (annotation, text)')
+ new_line_key = PyTuple_GET_ITEM(new_line, 0)
+
+ temp = PyList_GET_ITEM(right_parent_lines, c_parent_idx + c_idx)
+ parent_line = <object>temp
+ if (not PyTuple_CheckExact(parent_line)
+ or PyTuple_GET_SIZE(parent_line) != 2):
+ raise TypeError('all lines must be tuples of (annotation, text)')
+ parent_key = PyTuple_GET_ITEM(parent_line, 0)
+
+ if (new_line_key == parent_key # exact object match
+ or new_line_key == new_key_ptr
+ or PyObject_RichCompareBool_ptr(new_line_key, parent_key, Py_EQ)
+ or PyObject_RichCompareBool_ptr(new_line_key, new_key_ptr, Py_EQ)):
+ # Either the lines have identical annotations, or we know that
+ # right_parent_lines supersedes (annotated_lines is marked with
+ # 'new_key')
+ annotated_line = parent_line
+ elif heads is None:
+ annotated_line = (new_key, new_line[1])
+ else:
+ the_heads = heads((<object>new_line_key, <object>parent_key))
+ if not PyAnySet_Check(the_heads):
+ # This works for set() or frozenset()
+ raise TypeError('expected heads() to return a Set object.')
+ if PySet_GET_SIZE(the_heads) == 1:
+ # Unfortunately there isn't anything like PyDict_GetNext :(
+ # And PySet_Pop() would actually mutate set which we don't want
+ # to do
+ (head,) = the_heads
+ annotated_line = (head, new_line[1])
+ else:
+ annotated_line = break_tie([new_line, parent_line])
+ Py_INCREF(annotated_line) # PyList_SetItem steals a ref
+ PyList_SetItem(output_lines, c_new_idx + c_idx, annotated_line)
=== modified file 'bzrlib/annotate.py'
--- a/bzrlib/annotate.py 2009-06-16 21:26:29 +0000
+++ b/bzrlib/annotate.py 2009-06-16 22:00:35 +0000
@@ -400,7 +400,7 @@
def _resolve_matching_region(output_lines, annotated_lines, new_idx,
right_parent_lines, parent_idx,
- match_len, new_revision_id, heads_provider,
+ match_len, new_key, heads_provider,
break_tie):
"""For this matching region, determine final annotations."""
annotated_match_subset = annotated_lines[new_idx:new_idx + match_len]
@@ -414,14 +414,14 @@
new_line = annotated_match_subset[idx]
right_line = right_parent_subset[idx]
assert new_line[1] == right_line[1]
- if new_line[0] == right_line[0] or new_line[0] == new_revision_id:
+ if new_line[0] == right_line[0] or new_line[0] == new_key:
# These have identical annotations, or the right parent supersedes
# the 'new' status
output_lines[out_idx] = right_line
else:
# Both new and right parent lay claim to this line, resolve
if heads_provider is None:
- output_lines[out_idx] = (new_revision_id, right_line[1])
+ output_lines[out_idx] = (new_key, right_line[1])
else:
heads = heads_provider.heads((new_line[0], right_line[0]))
if len(heads) == 1:
@@ -475,3 +475,6 @@
last_new_idx = new_idx + match_len
return lines
+
+
+from bzrlib._chunks_to_lines_pyx import _resolve_matching_region
More information about the bazaar-commits
mailing list