Rev 4527: inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

Wed Jul 8 16:14:25 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4527
revision-id: john at arbash-meinel.com-20090708151421-0mrjr4txzbmofi10
parent: john at arbash-meinel.com-20090707025325-bf2s89nclg19pibu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Wed 2009-07-08 10:14:21 -0500
message:
  inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
  
  This removes some of the redundancy between implemenations. Though it seems
  to have a small performance impact (7.0s => 7.2s for NEWS).
-------------- next part --------------
=== modified file 'bzrlib/_annotator_py.py'

--- a/bzrlib/_annotator_py.py	2009-07-06 21:31:40 +0000
+++ b/bzrlib/_annotator_py.py	2009-07-08 15:14:21 +0000
@@ -37,6 +37,7 @@
         self._num_needed_children = {}
         self._annotations_cache = {}
         self._heads_provider = None
+        self._ann_tuple_cache = {}
 
     def _update_needed_children(self, key, parent_keys):
         for parent_key in parent_keys:
@@ -140,7 +141,7 @@
         matching_blocks = matcher.get_matching_blocks()
         return parent_annotations, matching_blocks
 
-    def _update_from_one_parent(self, key, annotations, lines, parent_key):
+    def _update_from_first_parent(self, key, annotations, lines, parent_key):
         """Reannotate this text relative to its first parent."""
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
             key, lines, parent_key)
@@ -215,7 +216,8 @@
         annotations = [this_annotation] * num_lines
         parent_keys = self._parent_map[key]
         if parent_keys:
-            self._update_from_one_parent(key, annotations, text, parent_keys[0])
+            self._update_from_first_parent(key, annotations, text,
+                                           parent_keys[0])
             for parent in parent_keys[1:]:
                 self._update_from_other_parents(key, annotations, text,
                                                 this_annotation, parent)
@@ -262,8 +264,7 @@
             else:
                 the_heads = heads(annotation)
                 if len(the_heads) == 1:
-                    for head in the_heads:
-                        break
+                    for head in the_heads: break # get the item out of the set
                 else:
                     # We need to resolve the ambiguity, for now just pick the
                     # sorted smallest

=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx	2009-07-06 20:21:34 +0000
+++ b/bzrlib/_annotator_pyx.pyx	2009-07-08 15:14:21 +0000
@@ -55,62 +55,10 @@
         PyObject *, PyObject *, int opid)
 
 
+from bzrlib import _annotator_py
 from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
 
 
-cdef class _NeededTextIterator:
-
-    cdef object counter
-    cdef object text_cache
-    cdef object stream
-    cdef object ann_keys
-    cdef object stream_len
-    cdef object pb
-    cdef int stream_is_consumed
-    cdef int ann_key_pos
-
-    def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
-        self.counter = 0
-        self.stream = stream
-        self.stream_len = stream_len
-        self.text_cache = text_cache
-        self.stream_len = stream_len
-        self.ann_keys = list(ann_keys)
-        self.pb = pb
-        self.stream_is_consumed = 0
-        self.ann_key_pos = 0
-
-    def __iter__(self):
-        return self
-
-    cdef _get_ann_text(self):
-        if self.ann_key_pos >= len(self.ann_keys):
-            raise StopIteration
-        key = self.ann_keys[self.ann_key_pos]
-        self.ann_key_pos = self.ann_key_pos + 1
-        lines = self.text_cache[key]
-        num_lines = len(lines)
-        return key, lines, num_lines
-
-    def __next__(self):
-        if self.stream_is_consumed:
-            return self._get_ann_text()
-        try:
-            record = self.stream.next()
-        except StopIteration:
-            self.stream_is_consumed = 1
-            return self._get_ann_text()
-        if self.pb is not None:
-            self.pb.update('extracting', self.counter, self.stream_len)
-        if record.storage_kind == 'absent':
-            raise errors.RevisionNotPresent(record.key, None)
-        self.counter = self.counter + 1
-        lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
-        num_lines = len(lines)
-        self.text_cache[record.key] = lines
-        return record.key, lines, num_lines
-
-
 cdef int _check_annotations_are_lists(annotations,
                                       parent_annotations) except -1:
     if not PyList_CheckExact(annotations):
@@ -231,114 +179,10 @@
             ann_temp[idx] = par_temp[idx]
 
 
-class Annotator:
+class Annotator(_annotator_py.Annotator):
     """Class that drives performing annotations."""
 
-    def __init__(self, vf):
-        """Create a new Annotator from a VersionedFile."""
-        self._vf = vf
-        self._parent_map = {}
-        self._text_cache = {}
-        # Map from key => number of nexts that will be built from this key
-        self._num_needed_children = {}
-        self._annotations_cache = {}
-        self._heads_provider = None
-        self._ann_tuple_cache = {}
-
-
-    def _update_needed_children(self, key, parent_keys):
-        for parent_key in parent_keys:
-            if parent_key in self._num_needed_children:
-                self._num_needed_children[parent_key] += 1
-            else:
-                self._num_needed_children[parent_key] = 1
-
-    def _get_needed_keys(self, key):
-        """Determine the texts we need to get from the backing vf.
-
-        :return: (vf_keys_needed, ann_keys_needed)
-            vf_keys_needed  These are keys that we need to get from the vf
-            ann_keys_needed Texts which we have in self._text_cache but we
-                            don't have annotations for. We need to yield these
-                            in the proper order so that we can get proper
-                            annotations.
-        """
-        parent_map = self._parent_map
-        # We need 1 extra copy of the node we will be looking at when we are
-        # done
-        self._num_needed_children[key] = 1
-        vf_keys_needed = set()
-        ann_keys_needed = set()
-        needed_keys = set([key])
-        while needed_keys:
-            parent_lookup = []
-            next_parent_map = {}
-            for key in needed_keys:
-                if key in self._parent_map:
-                    # We don't need to lookup this key in the vf
-                    if key not in self._text_cache:
-                        # Extract this text from the vf
-                        vf_keys_needed.add(key)
-                    elif key not in self._annotations_cache:
-                        # We do need to annotate
-                        ann_keys_needed.add(key)
-                        next_parent_map[key] = self._parent_map[key]
-                else:
-                    parent_lookup.append(key)
-                    vf_keys_needed.add(key)
-            needed_keys = set()
-            next_parent_map.update(self._vf.get_parent_map(parent_lookup))
-            for key, parent_keys in next_parent_map.iteritems():
-                if parent_keys is None:
-                    parent_keys = ()
-                    next_parent_map[key] = ()
-                self._update_needed_children(key, parent_keys)
-                for key in parent_keys:
-                    if key not in parent_map:
-                        needed_keys.add(key)
-            parent_map.update(next_parent_map)
-            # _heads_provider does some graph caching, so it is only valid while
-            # self._parent_map hasn't changed
-            self._heads_provider = None
-        return vf_keys_needed, ann_keys_needed
-
-    def _get_needed_texts(self, key, pb=None):
-        """Get the texts we need to properly annotate key.
-
-        :param key: A Key that is present in self._vf
-        :return: Yield (this_key, text, num_lines)
-            'text' is an opaque object that just has to work with whatever
-            matcher object we are using. Currently it is always 'lines' but
-            future improvements may change this to a simple text string.
-        """
-        keys, ann_keys = self._get_needed_keys(key)
-        if pb is not None:
-            pb.update('getting stream', 0, len(keys))
-        stream  = self._vf.get_record_stream(keys, 'topological', True)
-        iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
-                                       ann_keys, pb)
-        return iterator
-
-    def _get_parent_annotations_and_matches(self, key, text, parent_key):
-        """Get the list of annotations for the parent, and the matching lines.
-
-        :param text: The opaque value given by _get_needed_texts
-        :param parent_key: The key for the parent text
-        :return: (parent_annotations, matching_blocks)
-            parent_annotations is a list as long as the number of lines in
-                parent
-            matching_blocks is a list of (parent_idx, text_idx, len) tuples
-                indicating which lines match between the two texts
-        """
-        parent_lines = self._text_cache[parent_key]
-        parent_annotations = self._annotations_cache[parent_key]
-        # PatienceSequenceMatcher should probably be part of Policy
-        matcher = patiencediff.PatienceSequenceMatcher(None,
-            parent_lines, text)
-        matching_blocks = matcher.get_matching_blocks()
-        return parent_annotations, matching_blocks
-
-    def _update_from_one_parent(self, key, annotations, lines, parent_key):
+    def _update_from_first_parent(self, key, annotations, lines, parent_key):
         """Reannotate this text relative to its first parent."""
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
             key, lines, parent_key)
@@ -397,55 +241,6 @@
                     last_parent = par_ann
                     last_res = new_ann
 
-    def _record_annotation(self, key, parent_keys, annotations):
-        self._annotations_cache[key] = annotations
-        for parent_key in parent_keys:
-            num = self._num_needed_children[parent_key]
-            num = num - 1
-            if num == 0:
-                del self._text_cache[parent_key]
-                del self._annotations_cache[parent_key]
-                # Do we want to clean up _num_needed_children at this point as
-                # well?
-            self._num_needed_children[parent_key] = num
-
-    def _annotate_one(self, key, text, num_lines):
-        this_annotation = (key,)
-        # Note: annotations will be mutated by calls to _update_from*
-        annotations = [this_annotation] * num_lines
-        parent_keys = self._parent_map[key]
-        if parent_keys:
-            self._update_from_one_parent(key, annotations, text, parent_keys[0])
-            for parent in parent_keys[1:]:
-                self._update_from_other_parents(key, annotations, text,
-                                                this_annotation, parent)
-        self._record_annotation(key, parent_keys, annotations)
-
-    def add_special_text(self, key, parent_keys, text):
-        """Add a specific text to the graph."""
-        self._parent_map[key] = parent_keys
-        self._text_cache[key] = osutils.split_lines(text)
-        self._heads_provider = None
-
-    def annotate(self, key):
-        """Return annotated fulltext for the given key."""
-        pb = ui.ui_factory.nested_progress_bar()
-        try:
-            for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
-                self._annotate_one(text_key, text, num_lines)
-        finally:
-            pb.finished()
-        try:
-            annotations = self._annotations_cache[key]
-        except KeyError:
-            raise errors.RevisionNotPresent(key, self._vf)
-        return annotations, self._text_cache[key]
-
-    def _get_heads_provider(self):
-        if self._heads_provider is None:
-            self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
-        return self._heads_provider
-
     def annotate_flat(self, key):
         """Determine the single-best-revision to source for each line.
 
@@ -465,8 +260,7 @@
             else:
                 the_heads = heads(annotation)
                 if len(the_heads) == 1:
-                    for head in the_heads:
-                        break
+                    for head in the_heads: break # get the item out of the set
                 else:
                     # We need to resolve the ambiguity, for now just pick the
                     # sorted smallest