Rev 4500: Combine left and right knowing that we already have sorted tuples. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

John Arbash Meinel john at arbash-meinel.com
Tue Jun 23 22:42:17 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4500
revision-id: john at arbash-meinel.com-20090623214211-rodknh0g95udq10g
parent: john at arbash-meinel.com-20090623210525-5xgt7byhm4ble0md
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Tue 2009-06-23 16:42:11 -0500
message:
  Combine left and right knowing that we already have sorted tuples.
  Add a cache of entries. Now down to 7.46s.
-------------- next part --------------
=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx	2009-06-23 21:05:25 +0000
+++ b/bzrlib/_annotator_pyx.pyx	2009-06-23 21:42:11 +0000
@@ -26,7 +26,20 @@
     int PyList_CheckExact(object)
     PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
     Py_ssize_t PyList_GET_SIZE(object)
+    int PyList_Append(object, object) except -1
     int PyList_SetItem(object, Py_ssize_t o, object) except -1
+    int PyList_Sort(object) except -1
+
+    int PyTuple_CheckExact(object)
+    object PyTuple_New(Py_ssize_t len)
+    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
+    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
+    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
+    Py_ssize_t PyTuple_GET_SIZE(object)
+
+    PyObject *PyDict_GetItem(object d, object k)
+    int PyDict_SetItem(object d, object k, object v) except -1
+
     void Py_INCREF(object)
 
     int Py_EQ
@@ -90,6 +103,70 @@
     return 0
 
 
+cdef object _combine_annotations(ann_one, ann_two, cache):
+    """Combine the annotations from both sides."""
+    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
+    cdef Py_ssize_t out_pos
+    cdef PyObject *temp
+
+    cache_key = (ann_one, ann_two)
+    temp = PyDict_GetItem(cache, cache_key)
+    if temp != NULL:
+        return <object>temp
+
+    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
+        raise TypeError('annotations must be tuples')
+    # We know that annotations are tuples, and that both sides are already
+    # sorted, so we can just walk and update a new list.
+    len_one = PyTuple_GET_SIZE(ann_one)
+    len_two = PyTuple_GET_SIZE(ann_two)
+    if len_one < 1 or len_two < 1:
+        raise ValueError('annotations cannot claim 0 references.')
+    pos_one = 0
+    pos_two = 0
+    out_pos = 0
+    temp = PyTuple_GET_ITEM(ann_one, pos_one); left = <object>temp
+    temp = PyTuple_GET_ITEM(ann_two, pos_two); right = <object>temp
+    new_ann = PyTuple_New(len_one + len_two)
+    while left is not None or right is not None:
+        if PyObject_RichCompareBool(left, right, Py_EQ):
+            # Append once, increment both
+            Py_INCREF(left)
+            PyTuple_SET_ITEM(new_ann, out_pos, left)
+            pos_one = pos_one + 1
+            pos_two = pos_two + 1
+            if pos_one >= len_one:
+                left = None
+            else:
+                temp = PyTuple_GET_ITEM(ann_one, pos_one); left = <object>temp
+            if pos_two >= len_two:
+                right = None
+            else:
+                temp = PyTuple_GET_ITEM(ann_two, pos_two); right = <object>temp
+        elif right is None or (left is not None and left < right):
+            Py_INCREF(left)
+            PyTuple_SET_ITEM(new_ann, out_pos, left)
+            pos_one = pos_one + 1
+            if pos_one >= len_one:
+                left = None
+            else:
+                temp = PyTuple_GET_ITEM(ann_one, pos_one); left = <object>temp
+        else: # right < left
+            Py_INCREF(right)
+            PyTuple_SET_ITEM(new_ann, out_pos, right)
+            pos_two = pos_two + 1
+            if pos_two >= len_two:
+                right = None
+            else:
+                temp = PyTuple_GET_ITEM(ann_two, pos_two); right = <object>temp
+        out_pos = out_pos + 1
+    if out_pos != len_one + len_two:
+        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
+        new_ann = new_ann[0:out_pos]
+    PyDict_SetItem(cache, cache_key, new_ann)
+    return new_ann
+
+
 class Annotator:
     """Class that drives performing annotations."""
 
@@ -191,7 +268,8 @@
     def _update_from_other_parents(self, key, annotations, lines,
                                    this_annotation, parent_key):
         """Reannotate this text relative to a second (or more) parent."""
-        cdef long parent_idx, ann_idx, lines_idx, match_len, idx
+        cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
+        cdef Py_ssize_t pos
         cdef PyObject *temp
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
             key, lines, parent_key)
@@ -199,6 +277,7 @@
         last_ann = None
         last_parent = None
         last_res = None
+        cache = {}
         for parent_idx, lines_idx, match_len in matching_blocks:
             _check_match_ranges(parent_annotations, annotations,
                                 parent_idx, lines_idx, match_len)
@@ -226,9 +305,7 @@
                     Py_INCREF(last_res)
                     PyList_SetItem(annotations, ann_idx, last_res)
                 else:
-                    new_ann = set(ann)
-                    new_ann.update(par_ann)
-                    new_ann = tuple(sorted(new_ann))
+                    new_ann = _combine_annotations(ann, par_ann, cache)
                     Py_INCREF(new_ann)
                     PyList_SetItem(annotations, ann_idx, new_ann)
                     last_ann = ann



More information about the bazaar-commits mailing list