Rev 4454: Hack together a pyrex version of the _resolve_annotations function. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-annotate-bug387952

John Arbash Meinel john at arbash-meinel.com
Tue Jun 16 23:00:46 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-annotate-bug387952

------------------------------------------------------------
revno: 4454
revision-id: john at arbash-meinel.com-20090616220035-fh1ofkid2zoyz18m
parent: john at arbash-meinel.com-20090616212629-vrst5j82phb7ynas
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-annotate-bug387952
timestamp: Tue 2009-06-16 17:00:35 -0500
message:
  Hack together a pyrex version of the _resolve_annotations function.
  This drops the time down to 5.4s or so (versus 5.0s for bzr.dev).
  My guess is that the current overhead is:
  1) Always converting the right lines back into simple lines (stripping annotations)
  2) The PatienceDiff match on these lines is probably quite a bit more expensive.
  I don't know for sure, but I would guess that the uniqueness level is going down,
  which causes more recursions, etc for the get_matching_blocks.
  However, this is 'close enough' to current performance to not worry about for now.
  
  Just need to create a real module and factor out real tests for this improvement.
-------------- next part --------------
=== modified file 'bzrlib/_chunks_to_lines_pyx.pyx'
--- a/bzrlib/_chunks_to_lines_pyx.pyx	2009-03-23 14:59:43 +0000
+++ b/bzrlib/_chunks_to_lines_pyx.pyx	2009-06-16 22:00:35 +0000
@@ -35,6 +35,26 @@
     Py_ssize_t PyString_GET_SIZE(object p)
     object PyString_FromStringAndSize(char *c_str, Py_ssize_t len)
 
+    int PyTuple_CheckExact(object t)
+    Py_ssize_t PyTuple_GET_SIZE(object t)
+    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t idx)
+
+    void Py_INCREF(object)
+
+    int PyList_CheckExact(object l)
+    int PyList_SetItem(object l, Py_ssize_t idx, object v) except -1
+    # TODO: PyList_SET_ITEM?
+    PyObject *PyList_GET_ITEM(object l, Py_ssize_t idx)
+    Py_ssize_t PyList_GET_SIZE(object l)
+
+    int Py_EQ
+    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
+        PyObject *left, PyObject *right, int opid) except -1
+
+    int PyAnySet_Check(object)
+    Py_ssize_t PySet_GET_SIZE(object)
+
+
 cdef extern from "string.h":
     void *memchr(void *s, int c, size_t n)
 
@@ -128,3 +148,70 @@
     if tail is not None:
         PyList_Append(lines, tail)
     return lines
+
+
+def _resolve_matching_region(output_lines, annotated_lines, new_idx,
+                             right_parent_lines, parent_idx,
+                             match_len, new_key, heads_provider,
+                             break_tie):
+    """For this matching region, determine final annotations."""
+    cdef Py_ssize_t c_new_idx, c_parent_idx, c_match_len
+    cdef Py_ssize_t c_idx
+    cdef PyObject *temp
+    cdef PyObject *new_line_key, *parent_key
+    cdef PyObject *new_key_ptr
+
+    if not PyList_CheckExact(output_lines):
+        raise TypeError('output_lines must be a list')
+    if not PyList_CheckExact(annotated_lines):
+        raise TypeError('annotated_lines must be a list')
+    if not PyList_CheckExact(right_parent_lines):
+        raise TypeError('right_parent_lines must be a list')
+    c_match_len = match_len
+    c_new_idx = new_idx
+    c_parent_idx = parent_idx
+    new_key_ptr = <PyObject*>new_key
+    if heads_provider is not None:
+        heads = heads_provider.heads
+    else:
+        heads = None
+    for c_idx from 0 <= c_idx < match_len:
+        temp = PyList_GET_ITEM(annotated_lines, c_new_idx + c_idx)
+        new_line = <object>temp
+        if (not PyTuple_CheckExact(new_line)
+            or PyTuple_GET_SIZE(new_line) != 2):
+            raise TypeError('all lines must be tuples of (annotation, text)')
+        new_line_key = PyTuple_GET_ITEM(new_line, 0)
+
+        temp = PyList_GET_ITEM(right_parent_lines, c_parent_idx + c_idx)
+        parent_line = <object>temp
+        if (not PyTuple_CheckExact(parent_line)
+             or PyTuple_GET_SIZE(parent_line) != 2):
+            raise TypeError('all lines must be tuples of (annotation, text)')
+        parent_key = PyTuple_GET_ITEM(parent_line, 0)
+
+        if (new_line_key == parent_key # exact object match
+            or new_line_key == new_key_ptr
+            or PyObject_RichCompareBool_ptr(new_line_key, parent_key, Py_EQ)
+            or PyObject_RichCompareBool_ptr(new_line_key, new_key_ptr, Py_EQ)):
+            # Either the lines have identical annotations, or we know that
+            # right_parent_lines supersedes (annotated_lines is marked with
+            # 'new_key')
+            annotated_line = parent_line
+        elif heads is None:
+            annotated_line = (new_key, new_line[1])
+        else:
+            the_heads = heads((<object>new_line_key, <object>parent_key))
+            if not PyAnySet_Check(the_heads):
+                # This works for set() or frozenset()
+                raise TypeError('expected heads() to return a Set object.')
+            if PySet_GET_SIZE(the_heads) == 1:
+                # Unfortunately there isn't anything like PyDict_GetNext :(
+                # And PySet_Pop() would actually mutate set which we don't want
+                # to do
+                (head,) = the_heads
+                annotated_line = (head, new_line[1])
+            else:
+                annotated_line = break_tie([new_line, parent_line])
+        Py_INCREF(annotated_line) # PyList_SetItem steals a ref
+        PyList_SetItem(output_lines, c_new_idx + c_idx, annotated_line)

=== modified file 'bzrlib/annotate.py'
--- a/bzrlib/annotate.py	2009-06-16 21:26:29 +0000
+++ b/bzrlib/annotate.py	2009-06-16 22:00:35 +0000
@@ -400,7 +400,7 @@
 
 def _resolve_matching_region(output_lines, annotated_lines, new_idx,
                              right_parent_lines, parent_idx,
-                             match_len, new_revision_id, heads_provider,
+                             match_len, new_key, heads_provider,
                              break_tie):
     """For this matching region, determine final annotations."""
     annotated_match_subset = annotated_lines[new_idx:new_idx + match_len]
@@ -414,14 +414,14 @@
         new_line = annotated_match_subset[idx]
         right_line = right_parent_subset[idx]
         assert new_line[1] == right_line[1]
-        if new_line[0] == right_line[0] or new_line[0] == new_revision_id:
+        if new_line[0] == right_line[0] or new_line[0] == new_key:
             # These have identical annotations, or the right parent supersedes
             # the 'new' status
             output_lines[out_idx] = right_line
         else:
             # Both new and right parent lay claim to this line, resolve
             if heads_provider is None:
-                output_lines[out_idx] = (new_revision_id, right_line[1])
+                output_lines[out_idx] = (new_key, right_line[1])
             else:
                 heads = heads_provider.heads((new_line[0], right_line[0]))
                 if len(heads) == 1:
@@ -475,3 +475,6 @@
 
         last_new_idx = new_idx + match_len
     return lines
+
+
+from bzrlib._chunks_to_lines_pyx import _resolve_matching_region



More information about the bazaar-commits mailing list