Rev 4471: The only caller already knows the content length, so make the api such that in http://bazaar.launchpad.net/~jameinel/bzr/1.17-gc-single-mem
John Arbash Meinel
john at arbash-meinel.com
Mon Jun 22 19:30:47 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-gc-single-mem
------------------------------------------------------------
revno: 4471
revision-id: john at arbash-meinel.com-20090622183008-pofo16w8y3at5jjv
parent: john at arbash-meinel.com-20090622181004-0rsmfqcnhk48fq88
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-gc-single-mem
timestamp: Mon 2009-06-22 13:30:08 -0500
message:
The only caller already knows the content length, so make the api such that
you are required to pass it in.
It isn't particularly more difficult to do so, and it saves the time of calling len()
on thousands of strings during pack.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-06-22 18:10:04 +0000
+++ b/bzrlib/groupcompress.py 2009-06-22 18:30:08 +0000
@@ -278,12 +278,19 @@
bytes = apply_delta_to_source(self._content, content_start, end)
return bytes
- def set_chunked_content(self, content_chunks):
+ def set_chunked_content(self, content_chunks, length):
"""Set the content of this block to the given chunks."""
- self._content_length = sum(map(len, content_chunks))
+ # TODO: if we have *lots* of short lines, it is probably more efficient
+ # to go ahead and join them up from the start
+ assert length == sum(map(len, content_chunks))
+ self._content_length = length
+ # if self._content_length < 10*1024*1024:
+ # self._content_chunks = None
+ # self._content = ''.join(content_chunks)
+ # else:
self._content_chunks = content_chunks
+ self._content = None
self._z_content = None
- self._content = None
def set_content(self, content):
"""Set the content of this block."""
@@ -799,10 +806,9 @@
# for 'commit' down to ~1x the size of the largest file, at a
# cost of increased complexity within this code. 2x is still <<
# 3x the size of the largest file, so we are doing ok.
- content = ''.join(self.chunks)
+ self._block.set_chunked_content(self.chunks, self.endpoint)
self.chunks = None
self._delta_index = None
- self._block.set_content(content)
return self._block
def pop_last(self):
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-06-22 18:10:04 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-06-22 18:30:08 +0000
@@ -366,7 +366,8 @@
# we should get the same results if using the chunked version
gcb = groupcompress.GroupCompressBlock()
gcb.set_chunked_content(['this is some content\n'
- 'this content will be compressed\n'])
+ 'this content will be compressed\n'],
+ len(content))
old_bytes = bytes
bytes = gcb.to_bytes()
self.assertEqual(old_bytes, bytes)
More information about the bazaar-commits
mailing list