Rev 4499: A bit of tweaking to the _update_from_other_parents drops us to 7.67s in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

John Arbash Meinel john at arbash-meinel.com
Tue Jun 23 22:05:31 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4499
revision-id: john at arbash-meinel.com-20090623210525-5xgt7byhm4ble0md
parent: john at arbash-meinel.com-20090623204059-6i3q6h0c3n14fzuv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Tue 2009-06-23 16:05:25 -0500
message:
  A bit of tweaking to the _update_from_other_parents drops us to 7.67s
-------------- next part --------------
=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx	2009-06-23 20:40:59 +0000
+++ b/bzrlib/_annotator_pyx.pyx	2009-06-23 21:05:25 +0000
@@ -29,6 +29,8 @@
     int PyList_SetItem(object, Py_ssize_t o, object) except -1
     void Py_INCREF(object)
 
+    int Py_EQ
+    int PyObject_RichCompareBool(object, object, int opid) except -1
 
 from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
 
@@ -63,6 +65,31 @@
         return record.key, lines, num_lines
 
 
+cdef int _check_annotations_are_lists(annotations,
+                                      parent_annotations) except -1:
+    if not PyList_CheckExact(annotations):
+        raise TypeError('annotations must be a list')
+    if not PyList_CheckExact(parent_annotations):
+        raise TypeError('parent_annotations must be a list')
+    return 0
+
+
+cdef int _check_match_ranges(parent_annotations, annotations,
+                         Py_ssize_t parent_idx, Py_ssize_t lines_idx,
+                         Py_ssize_t match_len) except -1:
+    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
+        raise ValueError('Match length exceeds len of'
+                         ' parent_annotations %s > %s'
+                         % (parent_idx + match_len,
+                            PyList_GET_SIZE(parent_annotations)))
+    if lines_idx + match_len > PyList_GET_SIZE(annotations):
+        raise ValueError('Match length exceeds len of'
+                         ' annotations %s > %s'
+                         % (lines_idx + match_len,
+                            PyList_GET_SIZE(annotations)))
+    return 0
+
+
 class Annotator:
     """Class that drives performing annotations."""
 
@@ -138,24 +165,13 @@
         cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
         cdef PyObject *temp
 
-        if not PyList_CheckExact(annotations):
-            raise TypeError('annotations must be a list')
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
             key, lines, parent_key)
+        _check_annotations_are_lists(annotations, parent_annotations)
 
-        if not PyList_CheckExact(parent_annotations):
-            raise TypeError('parent_annotations must be a list')
         for parent_idx, lines_idx, match_len in matching_blocks:
-            if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
-                raise ValueError('Match length exceeds len of'
-                                 ' parent_annotations %s > %s'
-                                 % (parent_idx + match_len,
-                                    PyList_GET_SIZE(parent_annotations)))
-            if lines_idx + match_len > PyList_GET_SIZE(annotations):
-                raise ValueError('Match length exceeds len of'
-                                 ' annotations %s > %s'
-                                 % (lines_idx + match_len,
-                                    PyList_GET_SIZE(annotations)))
+            _check_match_ranges(parent_annotations, annotations,
+                                parent_idx, lines_idx, match_len)
             for idx from 0 <= idx < match_len:
                 temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
                 ann = <object>temp
@@ -175,47 +191,46 @@
     def _update_from_other_parents(self, key, annotations, lines,
                                    this_annotation, parent_key):
         """Reannotate this text relative to a second (or more) parent."""
-        cdef long parent_idx, lines_idx, match_len
+        cdef long parent_idx, ann_idx, lines_idx, match_len, idx
+        cdef PyObject *temp
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
             key, lines, parent_key)
-
+        _check_annotations_are_lists(annotations, parent_annotations)
         last_ann = None
         last_parent = None
         last_res = None
-        # TODO: consider making all annotations unique and then using 'is'
-        #       everywhere. Current results claim that isn't any faster,
-        #       because of the time spent deduping
-        #       deduping also saves a bit of memory. For NEWS it saves ~1MB,
-        #       but that is out of 200-300MB for extracting everything, so a
-        #       fairly trivial amount
         for parent_idx, lines_idx, match_len in matching_blocks:
+            _check_match_ranges(parent_annotations, annotations,
+                                parent_idx, lines_idx, match_len)
             # For lines which match this parent, we will now resolve whether
             # this parent wins over the current annotation
-            ann_sub = annotations[lines_idx:lines_idx + match_len]
-            par_sub = parent_annotations[parent_idx:parent_idx + match_len]
-            if ann_sub == par_sub:
-                continue
-            for idx in xrange(match_len):
-                ann = ann_sub[idx]
-                par_ann = par_sub[idx]
+            for idx from 0 <= idx < match_len:
                 ann_idx = lines_idx + idx
-                if ann == par_ann:
+                temp = PyList_GET_ITEM(annotations, ann_idx)
+                ann = <object>temp
+                temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
+                par_ann = <object>temp
+                if (PyObject_RichCompareBool(ann, par_ann, Py_EQ)):
                     # Nothing to change
                     continue
-                if ann == this_annotation:
+                if (PyObject_RichCompareBool(ann, this_annotation, Py_EQ)):
                     # Originally claimed 'this', but it was really in this
                     # parent
-                    annotations[ann_idx] = par_ann
+                    Py_INCREF(par_ann)
+                    PyList_SetItem(annotations, ann_idx, par_ann)
                     continue
                 # Resolve the fact that both sides have a different value for
                 # last modified
-                if ann == last_ann and par_ann == last_parent:
-                    annotations[ann_idx] = last_res
+                if (PyObject_RichCompareBool(ann, last_ann, Py_EQ)
+                    and PyObject_RichCompareBool(par_ann, last_parent, Py_EQ)):
+                    Py_INCREF(last_res)
+                    PyList_SetItem(annotations, ann_idx, last_res)
                 else:
                     new_ann = set(ann)
                     new_ann.update(par_ann)
                     new_ann = tuple(sorted(new_ann))
-                    annotations[ann_idx] = new_ann
+                    Py_INCREF(new_ann)
+                    PyList_SetItem(annotations, ann_idx, new_ann)
                     last_ann = ann
                     last_parent = par_ann
                     last_res = new_ann



More information about the bazaar-commits mailing list