Rev 2750: Merge pack writer tweaks. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Mon Sep 3 05:43:05 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2750
revision-id: robertc at robertcollins.net-20070903044253-bg0bbr8sgy4ivsl0
parent: robertc at robertcollins.net-20070903032716-agc2gd18ogf35i3f
parent: robertc at robertcollins.net-20070903043134-k1w3zs0se7psbuoh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-09-03 14:42:53 +1000
message:
Merge pack writer tweaks.
added:
bzrlib/benchmarks/bench_pack.py bench_pack.py-20070903042947-0wphp878xr6wkw7t-1
modified:
bzrlib/benchmarks/__init__.py __init__.py-20060516064526-eb0d37c78e86065d
bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.31.2.1
revision-id: robertc at robertcollins.net-20070903043134-k1w3zs0se7psbuoh
parent: pqm at pqm.ubuntu.com-20070901160444-hcr66zejwyy0jezc
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack
timestamp: Mon 2007-09-03 14:31:34 +1000
message:
25 percent time reduction in pack write logic.
added:
bzrlib/benchmarks/bench_pack.py bench_pack.py-20070903042947-0wphp878xr6wkw7t-1
modified:
bzrlib/benchmarks/__init__.py __init__.py-20060516064526-eb0d37c78e86065d
bzrlib/pack.py container.py-20070607160755-tr8zc26q18rn0jnb-1
=== added file 'bzrlib/benchmarks/bench_pack.py'
--- a/bzrlib/benchmarks/bench_pack.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/benchmarks/bench_pack.py 2007-09-03 04:31:34 +0000
@@ -0,0 +1,54 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Benchmarks for pack performance"""
+
+import os
+
+from bzrlib import (
+ pack,
+ )
+from bzrlib.benchmarks import Benchmark
+
+
+class BenchPack(Benchmark):
+ """Benchmark pack performance."""
+
+ def test_insert_one_gig_1k_chunks_no_names_disk(self):
+ # test real disk writing of many small chunks.
+ # useful for testing whether buffer sizes are right
+ transport = self.get_transport()
+ stream = transport.open_write_stream('pack.pack')
+ writer = pack.ContainerWriter(stream.write)
+ self.write_1_gig(writer)
+ stream.close()
+
+ def test_insert_one_gig_1k_chunks_no_names_null(self):
+ # write to dev/null so we test the pack processing.
+ transport = self.get_transport()
+ dev_null = open('/dev/null', 'wb')
+ writer = pack.ContainerWriter(dev_null.write)
+ self.write_1_gig(writer)
+ dev_null.close()
+
+ def write_1_gig(self, writer):
+ one_k = "A" * 1024
+ writer.begin()
+ def write_1g():
+ for hunk in xrange(1024 * 1024):
+ writer.add_bytes_record(one_k, [])
+ self.time(write_1g)
+ writer.end()
=== modified file 'bzrlib/benchmarks/__init__.py'
--- a/bzrlib/benchmarks/__init__.py 2007-08-29 04:43:31 +0000
+++ b/bzrlib/benchmarks/__init__.py 2007-09-03 04:31:34 +0000
@@ -185,6 +185,7 @@
'bzrlib.benchmarks.bench_inventory',
'bzrlib.benchmarks.bench_knit',
'bzrlib.benchmarks.bench_log',
+ 'bzrlib.benchmarks.bench_pack',
'bzrlib.benchmarks.bench_osutils',
'bzrlib.benchmarks.bench_rocks',
'bzrlib.benchmarks.bench_startup',
=== modified file 'bzrlib/pack.py'
--- a/bzrlib/pack.py 2007-08-15 01:12:57 +0000
+++ b/bzrlib/pack.py 2007-09-03 04:31:34 +0000
@@ -104,20 +104,29 @@
"""
current_offset = self.current_offset
# Kind marker
- self.write_func("B")
+ byte_sections = ["B"]
# Length
- self.write_func(str(len(bytes)) + "\n")
+ byte_sections.append(str(len(bytes)) + "\n")
# Names
for name_tuple in names:
# Make sure we're writing valid names. Note that we will leave a
# half-written record if a name is bad!
for name in name_tuple:
_check_name(name)
- self.write_func('\x00'.join(name_tuple) + "\n")
+ byte_sections.append('\x00'.join(name_tuple) + "\n")
# End of headers
- self.write_func("\n")
+ byte_sections.append("\n")
# Finally, the contents.
- self.write_func(bytes)
+ byte_sections.append(bytes)
+ # XXX: This causes a memory copy of bytes in size, but is usually
+ # faster than two write calls (12 vs 13 seconds to output a gig of
+ # 1k records.) - results may differ on significantly larger records
+ # like .iso's but as they should be rare in any case and thus not
+ # likely to be the common case. The biggest issue is causing extreme
+ # memory pressure in that case. One possibly improvement here is to
+ # check the size of the content before deciding to join here vs call
+ # write twice.
+ self.write_func(''.join(byte_sections))
self.records_written += 1
# return a memo of where we wrote data to allow random access.
return current_offset, self.current_offset - current_offset
More information about the bazaar-commits
mailing list