Rev 4527: inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 8 16:14:25 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate
------------------------------------------------------------
revno: 4527
revision-id: john at arbash-meinel.com-20090708151421-0mrjr4txzbmofi10
parent: john at arbash-meinel.com-20090707025325-bf2s89nclg19pibu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Wed 2009-07-08 10:14:21 -0500
message:
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
This removes some of the redundancy between implemenations. Though it seems
to have a small performance impact (7.0s => 7.2s for NEWS).
-------------- next part --------------
=== modified file 'bzrlib/_annotator_py.py'
--- a/bzrlib/_annotator_py.py 2009-07-06 21:31:40 +0000
+++ b/bzrlib/_annotator_py.py 2009-07-08 15:14:21 +0000
@@ -37,6 +37,7 @@
self._num_needed_children = {}
self._annotations_cache = {}
self._heads_provider = None
+ self._ann_tuple_cache = {}
def _update_needed_children(self, key, parent_keys):
for parent_key in parent_keys:
@@ -140,7 +141,7 @@
matching_blocks = matcher.get_matching_blocks()
return parent_annotations, matching_blocks
- def _update_from_one_parent(self, key, annotations, lines, parent_key):
+ def _update_from_first_parent(self, key, annotations, lines, parent_key):
"""Reannotate this text relative to its first parent."""
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
key, lines, parent_key)
@@ -215,7 +216,8 @@
annotations = [this_annotation] * num_lines
parent_keys = self._parent_map[key]
if parent_keys:
- self._update_from_one_parent(key, annotations, text, parent_keys[0])
+ self._update_from_first_parent(key, annotations, text,
+ parent_keys[0])
for parent in parent_keys[1:]:
self._update_from_other_parents(key, annotations, text,
this_annotation, parent)
@@ -262,8 +264,7 @@
else:
the_heads = heads(annotation)
if len(the_heads) == 1:
- for head in the_heads:
- break
+ for head in the_heads: break # get the item out of the set
else:
# We need to resolve the ambiguity, for now just pick the
# sorted smallest
=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx 2009-07-06 20:21:34 +0000
+++ b/bzrlib/_annotator_pyx.pyx 2009-07-08 15:14:21 +0000
@@ -55,62 +55,10 @@
PyObject *, PyObject *, int opid)
+from bzrlib import _annotator_py
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
-cdef class _NeededTextIterator:
-
- cdef object counter
- cdef object text_cache
- cdef object stream
- cdef object ann_keys
- cdef object stream_len
- cdef object pb
- cdef int stream_is_consumed
- cdef int ann_key_pos
-
- def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
- self.counter = 0
- self.stream = stream
- self.stream_len = stream_len
- self.text_cache = text_cache
- self.stream_len = stream_len
- self.ann_keys = list(ann_keys)
- self.pb = pb
- self.stream_is_consumed = 0
- self.ann_key_pos = 0
-
- def __iter__(self):
- return self
-
- cdef _get_ann_text(self):
- if self.ann_key_pos >= len(self.ann_keys):
- raise StopIteration
- key = self.ann_keys[self.ann_key_pos]
- self.ann_key_pos = self.ann_key_pos + 1
- lines = self.text_cache[key]
- num_lines = len(lines)
- return key, lines, num_lines
-
- def __next__(self):
- if self.stream_is_consumed:
- return self._get_ann_text()
- try:
- record = self.stream.next()
- except StopIteration:
- self.stream_is_consumed = 1
- return self._get_ann_text()
- if self.pb is not None:
- self.pb.update('extracting', self.counter, self.stream_len)
- if record.storage_kind == 'absent':
- raise errors.RevisionNotPresent(record.key, None)
- self.counter = self.counter + 1
- lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
- num_lines = len(lines)
- self.text_cache[record.key] = lines
- return record.key, lines, num_lines
-
-
cdef int _check_annotations_are_lists(annotations,
parent_annotations) except -1:
if not PyList_CheckExact(annotations):
@@ -231,114 +179,10 @@
ann_temp[idx] = par_temp[idx]
-class Annotator:
+class Annotator(_annotator_py.Annotator):
"""Class that drives performing annotations."""
- def __init__(self, vf):
- """Create a new Annotator from a VersionedFile."""
- self._vf = vf
- self._parent_map = {}
- self._text_cache = {}
- # Map from key => number of nexts that will be built from this key
- self._num_needed_children = {}
- self._annotations_cache = {}
- self._heads_provider = None
- self._ann_tuple_cache = {}
-
-
- def _update_needed_children(self, key, parent_keys):
- for parent_key in parent_keys:
- if parent_key in self._num_needed_children:
- self._num_needed_children[parent_key] += 1
- else:
- self._num_needed_children[parent_key] = 1
-
- def _get_needed_keys(self, key):
- """Determine the texts we need to get from the backing vf.
-
- :return: (vf_keys_needed, ann_keys_needed)
- vf_keys_needed These are keys that we need to get from the vf
- ann_keys_needed Texts which we have in self._text_cache but we
- don't have annotations for. We need to yield these
- in the proper order so that we can get proper
- annotations.
- """
- parent_map = self._parent_map
- # We need 1 extra copy of the node we will be looking at when we are
- # done
- self._num_needed_children[key] = 1
- vf_keys_needed = set()
- ann_keys_needed = set()
- needed_keys = set([key])
- while needed_keys:
- parent_lookup = []
- next_parent_map = {}
- for key in needed_keys:
- if key in self._parent_map:
- # We don't need to lookup this key in the vf
- if key not in self._text_cache:
- # Extract this text from the vf
- vf_keys_needed.add(key)
- elif key not in self._annotations_cache:
- # We do need to annotate
- ann_keys_needed.add(key)
- next_parent_map[key] = self._parent_map[key]
- else:
- parent_lookup.append(key)
- vf_keys_needed.add(key)
- needed_keys = set()
- next_parent_map.update(self._vf.get_parent_map(parent_lookup))
- for key, parent_keys in next_parent_map.iteritems():
- if parent_keys is None:
- parent_keys = ()
- next_parent_map[key] = ()
- self._update_needed_children(key, parent_keys)
- for key in parent_keys:
- if key not in parent_map:
- needed_keys.add(key)
- parent_map.update(next_parent_map)
- # _heads_provider does some graph caching, so it is only valid while
- # self._parent_map hasn't changed
- self._heads_provider = None
- return vf_keys_needed, ann_keys_needed
-
- def _get_needed_texts(self, key, pb=None):
- """Get the texts we need to properly annotate key.
-
- :param key: A Key that is present in self._vf
- :return: Yield (this_key, text, num_lines)
- 'text' is an opaque object that just has to work with whatever
- matcher object we are using. Currently it is always 'lines' but
- future improvements may change this to a simple text string.
- """
- keys, ann_keys = self._get_needed_keys(key)
- if pb is not None:
- pb.update('getting stream', 0, len(keys))
- stream = self._vf.get_record_stream(keys, 'topological', True)
- iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
- ann_keys, pb)
- return iterator
-
- def _get_parent_annotations_and_matches(self, key, text, parent_key):
- """Get the list of annotations for the parent, and the matching lines.
-
- :param text: The opaque value given by _get_needed_texts
- :param parent_key: The key for the parent text
- :return: (parent_annotations, matching_blocks)
- parent_annotations is a list as long as the number of lines in
- parent
- matching_blocks is a list of (parent_idx, text_idx, len) tuples
- indicating which lines match between the two texts
- """
- parent_lines = self._text_cache[parent_key]
- parent_annotations = self._annotations_cache[parent_key]
- # PatienceSequenceMatcher should probably be part of Policy
- matcher = patiencediff.PatienceSequenceMatcher(None,
- parent_lines, text)
- matching_blocks = matcher.get_matching_blocks()
- return parent_annotations, matching_blocks
-
- def _update_from_one_parent(self, key, annotations, lines, parent_key):
+ def _update_from_first_parent(self, key, annotations, lines, parent_key):
"""Reannotate this text relative to its first parent."""
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
key, lines, parent_key)
@@ -397,55 +241,6 @@
last_parent = par_ann
last_res = new_ann
- def _record_annotation(self, key, parent_keys, annotations):
- self._annotations_cache[key] = annotations
- for parent_key in parent_keys:
- num = self._num_needed_children[parent_key]
- num = num - 1
- if num == 0:
- del self._text_cache[parent_key]
- del self._annotations_cache[parent_key]
- # Do we want to clean up _num_needed_children at this point as
- # well?
- self._num_needed_children[parent_key] = num
-
- def _annotate_one(self, key, text, num_lines):
- this_annotation = (key,)
- # Note: annotations will be mutated by calls to _update_from*
- annotations = [this_annotation] * num_lines
- parent_keys = self._parent_map[key]
- if parent_keys:
- self._update_from_one_parent(key, annotations, text, parent_keys[0])
- for parent in parent_keys[1:]:
- self._update_from_other_parents(key, annotations, text,
- this_annotation, parent)
- self._record_annotation(key, parent_keys, annotations)
-
- def add_special_text(self, key, parent_keys, text):
- """Add a specific text to the graph."""
- self._parent_map[key] = parent_keys
- self._text_cache[key] = osutils.split_lines(text)
- self._heads_provider = None
-
- def annotate(self, key):
- """Return annotated fulltext for the given key."""
- pb = ui.ui_factory.nested_progress_bar()
- try:
- for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
- self._annotate_one(text_key, text, num_lines)
- finally:
- pb.finished()
- try:
- annotations = self._annotations_cache[key]
- except KeyError:
- raise errors.RevisionNotPresent(key, self._vf)
- return annotations, self._text_cache[key]
-
- def _get_heads_provider(self):
- if self._heads_provider is None:
- self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
- return self._heads_provider
-
def annotate_flat(self, key):
"""Determine the single-best-revision to source for each line.
@@ -465,8 +260,7 @@
else:
the_heads = heads(annotation)
if len(the_heads) == 1:
- for head in the_heads:
- break
+ for head in the_heads: break # get the item out of the set
else:
# We need to resolve the ambiguity, for now just pick the
# sorted smallest
More information about the bazaar-commits
mailing list