Rev 3791: (jam) 'bzr pack' now passes optimization flags down to the index in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Oct 22 21:18:23 BST 2008
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 3791
revision-id: pqm at pqm.ubuntu.com-20081022201819-s0a7gbf7wrsgn2q7
parent: pqm at pqm.ubuntu.com-20081022194407-i8gphy9hg1sj48ib
parent: john at arbash-meinel.com-20081022192642-3llptswnqir54glz
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2008-10-22 21:18:19 +0100
message:
(jam) 'bzr pack' now passes optimization flags down to the index
builder
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/chunk_writer.py chunk_writer.py-20080630234519-6ggn4id17nipovny-1
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_chunk_writer.py test_chunk_writer.py-20080630234519-6ggn4id17nipovny-2
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 3777.5.7
revision-id: john at arbash-meinel.com-20081022192642-3llptswnqir54glz
parent: john at arbash-meinel.com-20081022192527-t9vrlcebnylxyv4f
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-22 14:26:42 -0500
message:
NEWS entry about index optimization.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
------------------------------------------------------------
revno: 3777.5.6
revision-id: john at arbash-meinel.com-20081022192527-t9vrlcebnylxyv4f
parent: john at arbash-meinel.com-20081022192446-gh0f5zijpmurmdmg
parent: pqm at pqm.ubuntu.com-20081021231845-k119hl1icewguq50
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-22 14:25:27 -0500
message:
Merge bzr.dev 3789
added:
bzrlib/tests/fake_command.py fake_command.py-20081021195002-r9v65tgxx63c25v9-1
doc/developers/cycle.txt cycle.txt-20081017031739-rw24r0cywm2ok3xu-1
tools/packaging/lp-upload-release lpuploadrelease-20081020075647-56zdf9z6yav1bx81-1
modified:
Makefile Makefile-20050805140406-d96e3498bb61c5bb
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/branch.py branch.py-20050309040759-e4baf4e0d046576e
bzrlib/commands.py bzr.py-20050309040720-d10f4714595cf8c3
bzrlib/config.py config.py-20051011043216-070c74f4e9e338e8
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/patches.py patches.py-20050727183609-378c1cc5972ce908
bzrlib/plugins/launchpad/account.py account.py-20071011033320-50y6vfftywf4yllw-1
bzrlib/plugins/launchpad/lp_directory.py lp_indirect.py-20070126012204-de5rugwlt22c7u7e-1
bzrlib/plugins/launchpad/test_account.py test_account.py-20071011033320-50y6vfftywf4yllw-2
bzrlib/plugins/launchpad/test_lp_directory.py test_lp_indirect.py-20070126002743-oyle362tzv9cd8mi-1
bzrlib/tests/blackbox/test_command_encoding.py test_command_encoding.py-20060106032110-45431fd2ce9ff21f
bzrlib/tests/test_branch.py test_branch.py-20060116013032-97819aa07b8ab3b5
bzrlib/tests/test_commands.py test_command.py-20051019190109-3b17be0f52eaa7a8
bzrlib/tests/test_config.py testconfig.py-20051011041908-742d0c15d8d8c8eb
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
bzrlib/tests/test_patches.py test_patches.py-20051231203844-f4974d20f6aea09c
bzrlib/tests/test_plugins.py plugins.py-20050622075746-32002b55e5e943e9
bzrlib/tests/test_remote.py test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
bzrlib/tests/test_sftp_transport.py testsftp.py-20051027032739-247570325fec7e7e
bzrlib/tests/test_store.py teststore.py-20050826022702-f6caadb647395769
bzrlib/tests/test_transform.py test_transaction.py-20060105172520-b3ffb3946550e6c4
bzrlib/transform.py transform.py-20060105172343-dd99e54394d91687
bzrlib/transport/ftp/__init__.py ftp.py-20051116161804-58dc9506548c2a53
bzrlib/transport/remote.py ssh.py-20060608202016-c25gvf1ob7ypbus6-1
bzrlib/transport/sftp.py sftp.py-20051019050329-ab48ce71b7e32dfe
bzrlib/transport/ssh.py ssh.py-20060824042150-0s9787kng6zv1nwq-1
doc/developers/HACKING.txt HACKING-20050805200004-2a5dc975d870f78c
doc/developers/index.txt index.txt-20070508041241-qznziunkg0nffhiw-1
doc/developers/releasing.txt releasing.txt-20080502015919-fnrcav8fwy8ccibu-1
setup.py setup.py-20050314065409-02f8a0a6e3f9bc70
------------------------------------------------------------
revno: 3777.5.5
revision-id: john at arbash-meinel.com-20081022192446-gh0f5zijpmurmdmg
parent: john at arbash-meinel.com-20081016185822-3zwdbkphgacdz9s5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-22 14:24:46 -0500
message:
Up-call to the parent as suggested by Andrew.
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
------------------------------------------------------------
revno: 3777.5.4
revision-id: john at arbash-meinel.com-20081016185822-3zwdbkphgacdz9s5
parent: john at arbash-meinel.com-20081015214003-n96hr05ylrwlgdvi
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Thu 2008-10-16 13:58:22 -0500
message:
OptimisingPacker now sets the optimize flags for the indexes being built.
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 3777.5.3
revision-id: john at arbash-meinel.com-20081015214003-n96hr05ylrwlgdvi
parent: john at arbash-meinel.com-20081015213410-g19sy2rpgxcl2sew
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-15 16:40:03 -0500
message:
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
------------------------------------------------------------
revno: 3777.5.2
revision-id: john at arbash-meinel.com-20081015213410-g19sy2rpgxcl2sew
parent: john at arbash-meinel.com-20081015212739-ap2uunpg6rjkypc1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-15 16:34:10 -0500
message:
Change the name to ChunkWriter.set_optimize()
Also allow it to be passed during __init__ and pass it in from
BTreeBuilder.
modified:
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/chunk_writer.py chunk_writer.py-20080630234519-6ggn4id17nipovny-1
bzrlib/tests/test_chunk_writer.py test_chunk_writer.py-20080630234519-6ggn4id17nipovny-2
------------------------------------------------------------
revno: 3777.5.1
revision-id: john at arbash-meinel.com-20081015212739-ap2uunpg6rjkypc1
parent: pqm at pqm.ubuntu.com-20081014031836-0pn8u98igc7gvtv0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree_optimize
timestamp: Wed 2008-10-15 16:27:39 -0500
message:
Add ChunkWriter.optimize(for_size=True)
modified:
bzrlib/chunk_writer.py chunk_writer.py-20080630234519-6ggn4id17nipovny-1
bzrlib/tests/test_chunk_writer.py test_chunk_writer.py-20080630234519-6ggn4id17nipovny-2
=== modified file 'NEWS'
--- a/NEWS 2008-10-22 19:09:16 +0000
+++ b/NEWS 2008-10-22 20:18:19 +0000
@@ -19,6 +19,10 @@
* ``bzr dump-btree`` is a hidden command introduced to allow dumping
the contents of a compressed btree file. (John Arbash Meinel)
+ * ``bzr pack`` now tells the index builders to optimize for size. For
+ btree index repositories, this can save 25% of the index size
+ (mostly in the text indexes). (John Arbash Meinel)
+
* default username for bzr+ssh and sftp can be configured in
authentication.conf. (Aaron Bentley)
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2008-09-26 07:09:50 +0000
+++ b/bzrlib/btree_index.py 2008-10-15 21:34:10 +0000
@@ -139,6 +139,7 @@
self._nodes = {}
# Indicate it hasn't been built yet
self._nodes_by_key = None
+ self._optimize_for_size = False
def add_node(self, key, value, references=()):
"""Add a node to the index.
@@ -276,7 +277,8 @@
length = _PAGE_SIZE
if internal_row.nodes == 0:
length -= _RESERVED_HEADER_BYTES # padded
- internal_row.writer = chunk_writer.ChunkWriter(length, 0)
+ internal_row.writer = chunk_writer.ChunkWriter(length, 0,
+ optimize_for_size=self._optimize_for_size)
internal_row.writer.write(_INTERNAL_FLAG)
internal_row.writer.write(_INTERNAL_OFFSET +
str(rows[pos + 1].nodes) + "\n")
@@ -284,7 +286,8 @@
length = _PAGE_SIZE
if rows[-1].nodes == 0:
length -= _RESERVED_HEADER_BYTES # padded
- rows[-1].writer = chunk_writer.ChunkWriter(length)
+ rows[-1].writer = chunk_writer.ChunkWriter(length,
+ optimize_for_size=self._optimize_for_size)
rows[-1].writer.write(_LEAF_FLAG)
if rows[-1].writer.write(line):
# this key did not fit in the node:
@@ -313,7 +316,8 @@
# This will be padded, hence the -100
new_row.writer = chunk_writer.ChunkWriter(
_PAGE_SIZE - _RESERVED_HEADER_BYTES,
- reserved_bytes)
+ reserved_bytes,
+ optimize_for_size=self._optimize_for_size)
new_row.writer.write(_INTERNAL_FLAG)
new_row.writer.write(_INTERNAL_OFFSET +
str(rows[1].nodes - 1) + "\n")
=== modified file 'bzrlib/chunk_writer.py'
--- a/bzrlib/chunk_writer.py 2008-08-28 20:13:31 +0000
+++ b/bzrlib/chunk_writer.py 2008-10-15 21:34:10 +0000
@@ -47,51 +47,53 @@
# In testing, some values for bzr.dev::
# repack time MB max full
# 1 7.5 4.6 1140 0
- # 2 8.4 4.2 1036 1 6.8
+ # 2 8.4 4.2 1036 1
# 3 9.8 4.1 1012 278
# 4 10.8 4.1 728 945
# 20 11.1 4.1 0 1012
# repack = 0
- # zsync time MB repack max_z time w/ add_node
- # 0 6.7 24.7 0 6270 5.0
- # 1 6.5 13.2 0 3342 4.3
- # 2 6.6 9.6 0 2414 4.9
- # 5 6.5 6.2 0 1549 4.8
- # 6 6.5 5.8 1 1435 4.8
- # 7 6.6 5.5 19 1337 4.8
- # 8 6.7 5.3 81 1220 4.4
- # 10 6.8 5.0 260 967 5.3
- # 11 6.8 4.9 366 839 5.3
- # 12 6.9 4.8 454 731 5.1
- # 15 7.2 4.7 704 450 5.8
- # 20 7.7 4.6 1133 7 5.8
+ # zsync time MB repack stop_for_z
+ # 0 5.0 24.7 0 6270
+ # 1 4.3 13.2 0 3342
+ # 2 4.9 9.6 0 2414
+ # 5 4.8 6.2 0 1549
+ # 6 4.8 5.8 1 1435
+ # 7 4.8 5.5 19 1337
+ # 8 4.4 5.3 81 1220
+ # 10 5.3 5.0 260 967
+ # 11 5.3 4.9 366 839
+ # 12 5.1 4.8 454 731
+ # 15 5.8 4.7 704 450
+ # 20 5.8 4.6 1133 7
# In testing, some values for mysql-unpacked::
# next_bytes estim
- # repack time MB hit_max full
- # 1 51.7 15.4 3913 0
- # 2 54.4 13.7 3467 0 35.4
- # 20 67.0 13.4 0 3380 46.7
+ # repack time MB full stop_for_repack
+ # 1 15.4 0 3913
+ # 2 35.4 13.7 0 346
+ # 20 46.7 13.4 3380 0
# repack=0
- # zsync time w/ add_node
- # 0 47.7 116.5 0 29782 29.5
- # 1 48.5 60.2 0 15356 27.8
- # 2 48.1 42.4 0 10822 27.8
- # 5 48.3 25.5 0 6491 26.8
- # 6 48.0 23.2 13 5896 27.3
- # 7 48.1 21.6 29 5451 27.5
- # 8 48.1 20.3 52 5108 27.1
- # 10 46.9 18.6 195 4526 29.4
- # 11 48.8 18.0 421 4143 29.2
- # 12 47.4 17.5 702 3738 28.0
- # 15 49.6 16.5 1223 2969 28.9
- # 20 48.9 15.7 2182 1810 29.6
- # 30 15.4 3891 23 31.4
-
- _max_repack = 0
- _max_zsync = 8
-
- def __init__(self, chunk_size, reserved=0):
+ # zsync stop_for_z
+ # 0 29.5 116.5 0 29782
+ # 1 27.8 60.2 0 15356
+ # 2 27.8 42.4 0 10822
+ # 5 26.8 25.5 0 6491
+ # 6 27.3 23.2 13 5896
+ # 7 27.5 21.6 29 5451
+ # 8 27.1 20.3 52 5108
+ # 10 29.4 18.6 195 4526
+ # 11 29.2 18.0 421 4143
+ # 12 28.0 17.5 702 3738
+ # 15 28.9 16.5 1223 2969
+ # 20 29.6 15.7 2182 1810
+ # 30 31.4 15.4 3891 23
+
+ # Tuple of (num_repack_attempts, num_zsync_attempts)
+ # num_zsync_attempts only has meaning if num_repack_attempts is 0.
+ _repack_opts_for_speed = (0, 8)
+ _repack_opts_for_size = (20, 0)
+
+ def __init__(self, chunk_size, reserved=0, optimize_for_size=False):
"""Create a ChunkWriter to write chunk_size chunks.
:param chunk_size: The total byte count to emit at the end of the
@@ -110,6 +112,8 @@
self.num_zsync = 0
self.unused_bytes = None
self.reserved_size = reserved
+ # Default is to make building fast rather than compact
+ self.set_optimize(for_size=optimize_for_size)
def finish(self):
"""Finish the chunk.
@@ -141,6 +145,19 @@
self.bytes_list.append("\x00" * nulls_needed)
return self.bytes_list, self.unused_bytes, nulls_needed
+ def set_optimize(self, for_size=True):
+ """Change how we optimize our writes.
+
+ :param for_size: If True, optimize for minimum space usage, otherwise
+ optimize for fastest writing speed.
+ :return: None
+ """
+ if for_size:
+ opts = ChunkWriter._repack_opts_for_size
+ else:
+ opts = ChunkWriter._repack_opts_for_speed
+ self._max_repack, self._max_zsync = opts
+
def _recompress_all_bytes_in(self, extra_bytes=None):
"""Recompress the current bytes_in, and optionally more.
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2008-09-21 14:48:37 +0000
+++ b/bzrlib/index.py 2008-10-15 21:40:03 +0000
@@ -84,6 +84,7 @@
self._nodes = {}
self._nodes_by_key = None
self._key_length = key_elements
+ self._optimize_for_size = False
def _check_key(self, key):
"""Raise BadIndexKey if key is not a valid key for this index."""
@@ -278,6 +279,17 @@
(len(result.getvalue()), expected_bytes))
return result
+ def set_optimize(self, for_size=True):
+ """Change how the builder tries to optimize the result.
+
+ :param for_size: Tell the builder to try and make the index as small as
+ possible.
+ :return: None
+ """
+ # GraphIndexBuilder itself doesn't pay attention to the flag yet, but
+ # other builders do.
+ self._optimize_for_size = for_size
+
class GraphIndex(object):
"""An index for data with embedded graphs.
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-10-01 05:40:45 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-10-22 19:24:46 +0000
@@ -971,6 +971,16 @@
# TODO: combine requests in the same index that are in ascending order.
return total, requests
+ def open_pack(self):
+ """Open a pack for the pack we are creating."""
+ new_pack = super(OptimisingPacker, self).open_pack()
+ # Turn on the optimization flags for all the index builders.
+ new_pack.revision_index.set_optimize(for_size=True)
+ new_pack.inventory_index.set_optimize(for_size=True)
+ new_pack.text_index.set_optimize(for_size=True)
+ new_pack.signature_index.set_optimize(for_size=True)
+ return new_pack
+
class ReconcilePacker(Packer):
"""A packer which regenerates indices etc as it copies.
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-08-28 20:13:31 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-10-15 21:40:03 +0000
@@ -434,6 +434,13 @@
self.assertEqual(sorted(nodes), nodes)
self.assertEqual(16, len(nodes))
+ def test_set_optimize(self):
+ builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
+ builder.set_optimize(for_size=True)
+ self.assertTrue(builder._optimize_for_size)
+ builder.set_optimize(for_size=False)
+ self.assertFalse(builder._optimize_for_size)
+
def test_spill_index_stress_2_2(self):
# test that references and longer keys don't confuse things.
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
=== modified file 'bzrlib/tests/test_chunk_writer.py'
--- a/bzrlib/tests/test_chunk_writer.py 2008-08-22 05:54:44 +0000
+++ b/bzrlib/tests/test_chunk_writer.py 2008-10-15 21:34:10 +0000
@@ -39,6 +39,24 @@
# Only a zlib header.
self.assertEqual(4088, padding)
+ def test_optimize_for_speed(self):
+ writer = chunk_writer.ChunkWriter(4096)
+ writer.set_optimize(for_size=False)
+ self.assertEqual(chunk_writer.ChunkWriter._repack_opts_for_speed,
+ (writer._max_repack, writer._max_zsync))
+ writer = chunk_writer.ChunkWriter(4096, optimize_for_size=False)
+ self.assertEqual(chunk_writer.ChunkWriter._repack_opts_for_speed,
+ (writer._max_repack, writer._max_zsync))
+
+ def test_optimize_for_size(self):
+ writer = chunk_writer.ChunkWriter(4096)
+ writer.set_optimize(for_size=True)
+ self.assertEqual(chunk_writer.ChunkWriter._repack_opts_for_size,
+ (writer._max_repack, writer._max_zsync))
+ writer = chunk_writer.ChunkWriter(4096, optimize_for_size=True)
+ self.assertEqual(chunk_writer.ChunkWriter._repack_opts_for_size,
+ (writer._max_repack, writer._max_zsync))
+
def test_some_data(self):
writer = chunk_writer.ChunkWriter(4096)
writer.write("foo bar baz quux\n")
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2008-09-02 17:52:00 +0000
+++ b/bzrlib/tests/test_index.py 2008-10-15 21:40:03 +0000
@@ -350,6 +350,13 @@
builder.add_node(('k', 'ey'), 'data', ([('reference', 'tokey')], ))
builder.add_node(('reference', 'tokey'), 'data', ([],))
+ def test_set_optimize(self):
+ builder = GraphIndexBuilder(reference_lists=1, key_elements=2)
+ builder.set_optimize(for_size=True)
+ self.assertTrue(builder._optimize_for_size)
+ builder.set_optimize(for_size=False)
+ self.assertFalse(builder._optimize_for_size)
+
class TestGraphIndex(TestCaseWithMemoryTransport):
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2008-09-29 07:03:55 +0000
+++ b/bzrlib/tests/test_repository.py 2008-10-16 18:58:22 +0000
@@ -998,6 +998,24 @@
# thus there are not yet any tests.
+class TestOptimisingPacker(TestCaseWithTransport):
+ """Tests for the OptimisingPacker class."""
+
+ def get_pack_collection(self):
+ repo = self.make_repository('.')
+ return repo._pack_collection
+
+ def test_open_pack_will_optimise(self):
+ packer = pack_repo.OptimisingPacker(self.get_pack_collection(),
+ [], '.test')
+ new_pack = packer.open_pack()
+ self.assertIsInstance(new_pack, pack_repo.NewPack)
+ self.assertTrue(new_pack.revision_index._optimize_for_size)
+ self.assertTrue(new_pack.inventory_index._optimize_for_size)
+ self.assertTrue(new_pack.text_index._optimize_for_size)
+ self.assertTrue(new_pack.signature_index._optimize_for_size)
+
+
class TestInterDifferingSerializer(TestCaseWithTransport):
def test_progress_bar(self):
More information about the bazaar-commits
mailing list