Rev 4647: Fix bug 402652 by recompressing all texts that are streamed - slightly slower at fetch, substantially faster and more compact at read. in http://bazaar.launchpad.net/~lifeless/bzr/bug-402652
Robert Collins
robertc at robertcollins.net
Tue Sep 1 07:10:51 BST 2009
At http://bazaar.launchpad.net/~lifeless/bzr/bug-402652
------------------------------------------------------------
revno: 4647
revision-id: robertc at robertcollins.net-20090901061024-qasufbfj7jse2eai
parent: pqm at pqm.ubuntu.com-20090830232250-2oqzti7o30pv7zc5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: bug-402652
timestamp: Tue 2009-09-01 16:10:24 +1000
message:
Fix bug 402652 by recompressing all texts that are streamed - slightly slower at fetch, substantially faster and more compact at read.
=== modified file 'NEWS'
--- a/NEWS 2009-08-30 22:02:45 +0000
+++ b/NEWS 2009-09-01 06:10:24 +0000
@@ -20,6 +20,13 @@
revisions that are in the fallback repository. (Regressed in 2.0rc1).
(John Arbash Meinel, #419241)
+* Fetches from 2a to 2a are now again requested in 'groupcompress' order,
+ and all texts are recombined appropriately. This doesn't reuse existing
+ groups, which will be measurable in some specific circumstances - an
+ approximately 25% overhead. However, doing this ensures high performance
+ reads subsequent to the fetch operation, which is the most common
+ operation: write once read many. (Robert Collins, #402652)
+
* Fix a segmentation fault when computing the ``merge_sort`` of a graph
that has a ghost in the mainline ancestry.
(John Arbash Meinel, #419241)
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-08-26 16:47:51 +0000
+++ b/bzrlib/groupcompress.py 2009-09-01 06:10:24 +0000
@@ -1516,7 +1516,8 @@
# test_insert_record_stream_existing_keys fail for groupcompress and
# groupcompress-nograph, this needs to be revisited while addressing
# 'bzr branch' performance issues.
- for _ in self._insert_record_stream(stream, random_id=False):
+ for _ in self._insert_record_stream(stream, random_id=False,
+ reuse_blocks=False):
pass
def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,
@@ -1580,7 +1581,7 @@
' but then inserted %r two times', record.key)
continue
inserted_keys.add(record.key)
- if reuse_blocks:
+ if not inserted_keys and reuse_blocks:
# If the reuse_blocks flag is set, check to see if we can just
# copy a groupcompress block as-is.
if record.storage_kind == 'groupcompress-block':
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-08-24 19:34:13 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-09-01 06:10:24 +0000
@@ -932,7 +932,7 @@
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
self._revision_keys = None
self._text_keys = None
- # self._text_fetch_order = 'unordered'
+ self._text_fetch_order = 'groupcompress'
self._chk_id_roots = None
self._chk_p_id_roots = None
@@ -949,7 +949,7 @@
p_id_roots_set = set()
source_vf = self.from_repository.inventories
stream = source_vf.get_record_stream(inventory_keys,
- 'unordered', True)
+ 'groupcompress', True)
for record in stream:
if record.storage_kind == 'absent':
if allow_absent:
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2009-08-17 23:15:55 +0000
+++ b/bzrlib/tests/test_repository.py 2009-09-01 06:10:24 +0000
@@ -683,6 +683,27 @@
class Test2a(TestCaseWithTransport):
+ def test_fetch_combines_groups(self):
+ builder = self.make_branch_builder('source', format='2a')
+ builder.start_series()
+ builder.build_snapshot('1', None, [
+ ('add', ('', 'root-id', 'directory', '')),
+ ('add', ('file', 'file-id', 'file', 'content\n'))])
+ builder.build_snapshot('2', ['1'], [
+ ('modify', ('file-id', 'content-2\n'))])
+ builder.finish_series()
+ source = builder.get_branch()
+ target = self.make_repository('target', format='2a')
+ target.fetch(source.repository)
+ target.lock_read()
+ details = target.texts._index.get_build_details(
+ [('file-id', '1',), ('file-id', '2',)])
+ file_1_details = details[('file-id', '1')]
+ file_2_details = details[('file-id', '2')]
+ # The index, and what to read off disk, should be the same for both
+ # versions of the file.
+ self.assertEqual(file_1_details[0][:3], file_2_details[0][:3])
+
def test_format_pack_compresses_True(self):
repo = self.make_repository('repo', format='2a')
self.assertTrue(repo._format.pack_compresses)
More information about the bazaar-commits
mailing list