Rev 4471: The only caller already knows the content length, so make the api such that in http://bazaar.launchpad.net/~jameinel/bzr/1.17-gc-single-mem

Mon Jun 22 19:30:47 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.17-gc-single-mem

------------------------------------------------------------
revno: 4471
revision-id: john at arbash-meinel.com-20090622183008-pofo16w8y3at5jjv
parent: john at arbash-meinel.com-20090622181004-0rsmfqcnhk48fq88
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-gc-single-mem
timestamp: Mon 2009-06-22 13:30:08 -0500
message:
  The only caller already knows the content length, so make the api such that
  you are required to pass it in.
  It isn't particularly more difficult to do so, and it saves the time of calling len()
  on thousands of strings during pack.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'

--- a/bzrlib/groupcompress.py	2009-06-22 18:10:04 +0000
+++ b/bzrlib/groupcompress.py	2009-06-22 18:30:08 +0000
@@ -278,12 +278,19 @@
             bytes = apply_delta_to_source(self._content, content_start, end)
         return bytes
 
-    def set_chunked_content(self, content_chunks):
+    def set_chunked_content(self, content_chunks, length):
         """Set the content of this block to the given chunks."""
-        self._content_length = sum(map(len, content_chunks))
+        # TODO: if we have *lots* of short lines, it is probably more efficient
+        #       to go ahead and join them up from the start
+        assert length == sum(map(len, content_chunks))
+        self._content_length = length
+        # if self._content_length < 10*1024*1024:
+        #     self._content_chunks = None
+        #     self._content = ''.join(content_chunks)
+        # else:
         self._content_chunks = content_chunks
+        self._content = None
         self._z_content = None
-        self._content = None
 
     def set_content(self, content):
         """Set the content of this block."""
@@ -799,10 +806,9 @@
         #       for 'commit' down to ~1x the size of the largest file, at a
         #       cost of increased complexity within this code. 2x is still <<
         #       3x the size of the largest file, so we are doing ok.
-        content = ''.join(self.chunks)
+        self._block.set_chunked_content(self.chunks, self.endpoint)
         self.chunks = None
         self._delta_index = None
-        self._block.set_content(content)
         return self._block
 
     def pop_last(self):

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-06-22 18:10:04 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-06-22 18:30:08 +0000
@@ -366,7 +366,8 @@
         # we should get the same results if using the chunked version
         gcb = groupcompress.GroupCompressBlock()
         gcb.set_chunked_content(['this is some content\n'
-                                 'this content will be compressed\n'])
+                                 'this content will be compressed\n'],
+                                 len(content))
         old_bytes = bytes
         bytes = gcb.to_bytes()
         self.assertEqual(old_bytes, bytes)