Rev 4492: Start using left-matching-blocks during the actual annotation. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

John Arbash Meinel john at arbash-meinel.com
Tue Jun 23 20:38:32 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4492
revision-id: john at arbash-meinel.com-20090623193827-eyoz3todw0o2dno9
parent: john at arbash-meinel.com-20090623192501-qzf1a07g8gxzkrxl
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Tue 2009-06-23 14:38:27 -0500
message:
  Start using left-matching-blocks during the actual annotation.
  Down to 11.3s.
-------------- next part --------------
=== modified file 'bzrlib/_annotator_py.py'
--- a/bzrlib/_annotator_py.py	2009-06-18 22:07:42 +0000
+++ b/bzrlib/_annotator_py.py	2009-06-23 19:38:27 +0000
@@ -82,7 +82,7 @@
             self._text_cache[this_key] = lines
             yield this_key, lines, num_lines
 
-    def _get_parent_annotations_and_matches(self, text, parent_key):
+    def _get_parent_annotations_and_matches(self, key, text, parent_key):
         """Get the list of annotations for the parent, and the matching lines.
 
         :param text: The opaque value given by _get_needed_texts
@@ -101,21 +101,21 @@
         matching_blocks = matcher.get_matching_blocks()
         return parent_annotations, matching_blocks
 
-    def _update_from_one_parent(self, annotations, lines, parent_key):
+    def _update_from_one_parent(self, key, annotations, lines, parent_key):
         """Reannotate this text relative to its first parent."""
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
-            lines, parent_key)
+            key, lines, parent_key)
 
         for parent_idx, lines_idx, match_len in matching_blocks:
             # For all matching regions we copy across the parent annotations
             annotations[lines_idx:lines_idx + match_len] = \
                 parent_annotations[parent_idx:parent_idx + match_len]
 
-    def _update_from_other_parents(self, annotations, lines, this_annotation,
-                                   parent_key):
+    def _update_from_other_parents(self, key, annotations, lines,
+                                   this_annotation, parent_key):
         """Reannotate this text relative to a second (or more) parent."""
         parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
-            lines, parent_key)
+            key, lines, parent_key)
 
         last_ann = None
         last_parent = None
@@ -172,9 +172,9 @@
         annotations = [this_annotation] * num_lines
         parent_keys = self._parent_map[key]
         if parent_keys:
-            self._update_from_one_parent(annotations, text, parent_keys[0])
+            self._update_from_one_parent(key, annotations, text, parent_keys[0])
             for parent in parent_keys[1:]:
-                self._update_from_other_parents(annotations, text,
+                self._update_from_other_parents(key, annotations, text,
                                                 this_annotation, parent)
         self._record_annotation(key, parent_keys, annotations)
 

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2009-06-23 19:25:01 +0000
+++ b/bzrlib/knit.py	2009-06-23 19:38:27 +0000
@@ -3312,8 +3312,8 @@
         # TODO: handle Nodes which cannot be extracted
         # self._ghosts = set()
 
-        # Map from revision_id => left_matching_blocks, should be 'use once'
-        self._left_matching_blocks = {}
+        # Map from (key, parent_key) => matching_blocks, should be 'use once'
+        self._matching_blocks = {}
 
         # KnitContent objects
         self._content_objects = {}
@@ -3402,7 +3402,7 @@
     def _cache_delta_blocks(self, key, compression_parent, delta, lines):
         parent_lines = self._text_cache[compression_parent]
         blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))
-        self._left_matching_blocks[key] = blocks
+        self._matching_blocks[(key, compression_parent)] = blocks
 
     def _expand_record(self, key, parent_keys, compression_parent, record,
                        record_details):
@@ -3443,6 +3443,25 @@
             self._cache_delta_blocks(key, compression_parent, delta, lines)
         return lines
 
+    def _get_parent_annotations_and_matches(self, key, text, parent_key):
+        """Get the list of annotations for the parent, and the matching lines.
+
+        :param text: The opaque value given by _get_needed_texts
+        :param parent_key: The key for the parent text
+        :return: (parent_annotations, matching_blocks)
+            parent_annotations is a list as long as the number of lines in
+                parent
+            matching_blocks is a list of (parent_idx, text_idx, len) tuples
+                indicating which lines match between the two texts
+        """
+        block_key = (key, parent_key)
+        if block_key in self._matching_blocks:
+            blocks = self._matching_blocks.pop(block_key)
+            parent_annotations = self._annotations_cache[parent_key]
+            return parent_annotations, blocks
+        return super(_KnitAnnotator, self)._get_parent_annotations_and_matches(
+            key, text, parent_key)
+
     def _process_pending(self, key):
         """The content for 'key' was just processed.
 

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2009-06-23 19:25:01 +0000
+++ b/bzrlib/tests/test_knit.py	2009-06-23 19:38:27 +0000
@@ -1383,14 +1383,30 @@
                            ['line1\n', 'line2\n', 'line3\n'],
                            ('fulltext', False))
         ann._expand_record(rev_key, (parent_key,), parent_key, record, details)
-        self.assertEqual({rev_key: [(1, 1, 1), (3, 3, 0)]},
-                         ann._left_matching_blocks)
+        self.assertEqual({(rev_key, parent_key): [(1, 1, 1), (3, 3, 0)]},
+                         ann._matching_blocks)
         rev2_key = ('rev2-id',)
         record = ['0,1,1\n', 'new-line\n']
         details = ('line-delta', False)
         ann._expand_record(rev2_key, (parent_key,), parent_key, record, details)
         self.assertEqual([(1, 1, 2), (3, 3, 0)],
-                         ann._left_matching_blocks[rev2_key])
+                         ann._matching_blocks[(rev2_key, parent_key)])
+
+    def test__get_parent_ann_uses_matching_blocks(self):
+        ann = self.make_annotator()
+        rev_key = ('rev-id',)
+        parent_key = ('parent-id',)
+        parent_ann = [(parent_key,)]*3
+        block_key = (rev_key, parent_key)
+        ann._annotations_cache[parent_key] = parent_ann
+        ann._matching_blocks[block_key] = [(0, 1, 1), (3, 3, 0)]
+        # We should not try to access any parent_lines content, because we know
+        # we already have the matching blocks
+        par_ann, blocks = ann._get_parent_annotations_and_matches(rev_key,
+                                        ['1\n', '2\n', '3\n'], parent_key)
+        self.assertEqual(parent_ann, par_ann)
+        self.assertEqual([(0, 1, 1), (3, 3, 0)], blocks)
+        self.assertEqual({}, ann._matching_blocks)
 
     def test__process_pending(self):
         ann = self.make_annotator()



More information about the bazaar-commits mailing list