Rev 4490: Only cache the content objects that we will reuse. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

Tue Jun 23 20:14:35 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4490
revision-id: john at arbash-meinel.com-20090623191430-jpb00rg2to2spfb3
parent: john at arbash-meinel.com-20090623190924-5jzikwlde7sc0z29
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Tue 2009-06-23 14:14:30 -0500
message:
  Only cache the content objects that we will reuse.
  
  Peak memory is down to 92MB, and annotate time down to 15.9s
-------------- next part --------------
=== modified file 'bzrlib/knit.py'

--- a/bzrlib/knit.py	2009-06-23 19:09:24 +0000
+++ b/bzrlib/knit.py	2009-06-23 19:14:30 +0000
@@ -3420,6 +3420,8 @@
             # child object. However, whenever noeol=False,
             # self._text_cache[parent_key] is content._lines. So mutating it
             # gives very bad results.
+            # The alternative is to copy the lines into text cache, but then we
+            # are copying anyway, so just do it here.
             content, _ = self._vf._factory.parse_record(
                 key, record, record_details, base_content,
                 copy_base_content=True)
@@ -3429,7 +3431,8 @@
                 key, record, record_details, None)
         # TODO: Only track the content when there are compression children.
         #       Otherwise we only need the lines
-        self._content_objects[key] = content
+        if self._num_compression_children.get(key, 0) > 0:
+            self._content_objects[key] = content
         lines = content.text()
         self._text_cache[key] = lines
         return lines

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2009-06-23 18:51:10 +0000
+++ b/bzrlib/tests/test_knit.py	2009-06-23 19:14:30 +0000
@@ -1323,6 +1323,7 @@
     def test__expand_fulltext(self):
         ann = self.make_annotator()
         rev_key = ('rev-id',)
+        ann._num_compression_children[rev_key] = 1
         res = ann._expand_record(rev_key, (('parent-id',),), None,
                            ['line1\n', 'line2\n'], ('fulltext', True))
         # The content object and text lines should be cached appropriately
@@ -1367,6 +1368,9 @@
                                  record, details)
         self.assertFalse(parent_key in ann._content_objects)
         self.assertEqual({}, ann._num_compression_children)
+        # We should not cache the content_objects for rev2 and rev, because
+        # they do not have compression children of their own.
+        self.assertEqual({}, ann._content_objects)
 
     def test__process_pending(self):
         ann = self.make_annotator()