Rev 2750: Merge pack writer tweaks. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Sep 3 05:43:05 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2750
revision-id: robertc at robertcollins.net-20070903044253-bg0bbr8sgy4ivsl0
parent: robertc at robertcollins.net-20070903032716-agc2gd18ogf35i3f
parent: robertc at robertcollins.net-20070903043134-k1w3zs0se7psbuoh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-09-03 14:42:53 +1000
message:
  Merge pack writer tweaks.
added:
  bzrlib/benchmarks/bench_pack.py bench_pack.py-20070903042947-0wphp878xr6wkw7t-1
modified:
  bzrlib/benchmarks/__init__.py  __init__.py-20060516064526-eb0d37c78e86065d
  bzrlib/pack.py                 container.py-20070607160755-tr8zc26q18rn0jnb-1
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.3.74.1.31.2.1
    revision-id: robertc at robertcollins.net-20070903043134-k1w3zs0se7psbuoh
    parent: pqm at pqm.ubuntu.com-20070901160444-hcr66zejwyy0jezc
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: pack
    timestamp: Mon 2007-09-03 14:31:34 +1000
    message:
      25 percent time reduction in pack write logic.
    added:
      bzrlib/benchmarks/bench_pack.py bench_pack.py-20070903042947-0wphp878xr6wkw7t-1
    modified:
      bzrlib/benchmarks/__init__.py  __init__.py-20060516064526-eb0d37c78e86065d
      bzrlib/pack.py                 container.py-20070607160755-tr8zc26q18rn0jnb-1
=== added file 'bzrlib/benchmarks/bench_pack.py'
--- a/bzrlib/benchmarks/bench_pack.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/benchmarks/bench_pack.py	2007-09-03 04:31:34 +0000
@@ -0,0 +1,54 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Benchmarks for pack performance"""
+
+import os
+
+from bzrlib import (
+    pack,
+    )
+from bzrlib.benchmarks import Benchmark
+
+
+class BenchPack(Benchmark):
+    """Benchmark pack performance."""
+
+    def test_insert_one_gig_1k_chunks_no_names_disk(self):
+        # test real disk writing of many small chunks. 
+        # useful for testing whether buffer sizes are right 
+        transport = self.get_transport()
+        stream = transport.open_write_stream('pack.pack')
+        writer = pack.ContainerWriter(stream.write)
+        self.write_1_gig(writer)
+        stream.close()
+
+    def test_insert_one_gig_1k_chunks_no_names_null(self):
+        # write to dev/null so we test the pack processing.
+        transport = self.get_transport()
+        dev_null = open('/dev/null', 'wb')
+        writer = pack.ContainerWriter(dev_null.write)
+        self.write_1_gig(writer)
+        dev_null.close()
+
+    def write_1_gig(self, writer):
+        one_k = "A" * 1024
+        writer.begin()
+        def write_1g():
+            for hunk in xrange(1024 * 1024):
+                writer.add_bytes_record(one_k, [])
+        self.time(write_1g)
+        writer.end()

=== modified file 'bzrlib/benchmarks/__init__.py'
--- a/bzrlib/benchmarks/__init__.py	2007-08-29 04:43:31 +0000
+++ b/bzrlib/benchmarks/__init__.py	2007-09-03 04:31:34 +0000
@@ -185,6 +185,7 @@
                    'bzrlib.benchmarks.bench_inventory',
                    'bzrlib.benchmarks.bench_knit',
                    'bzrlib.benchmarks.bench_log',
+                   'bzrlib.benchmarks.bench_pack',
                    'bzrlib.benchmarks.bench_osutils',
                    'bzrlib.benchmarks.bench_rocks',
                    'bzrlib.benchmarks.bench_startup',

=== modified file 'bzrlib/pack.py'
--- a/bzrlib/pack.py	2007-08-15 01:12:57 +0000
+++ b/bzrlib/pack.py	2007-09-03 04:31:34 +0000
@@ -104,20 +104,29 @@
         """
         current_offset = self.current_offset
         # Kind marker
-        self.write_func("B")
+        byte_sections = ["B"]
         # Length
-        self.write_func(str(len(bytes)) + "\n")
+        byte_sections.append(str(len(bytes)) + "\n")
         # Names
         for name_tuple in names:
             # Make sure we're writing valid names.  Note that we will leave a
             # half-written record if a name is bad!
             for name in name_tuple:
                 _check_name(name)
-            self.write_func('\x00'.join(name_tuple) + "\n")
+            byte_sections.append('\x00'.join(name_tuple) + "\n")
         # End of headers
-        self.write_func("\n")
+        byte_sections.append("\n")
         # Finally, the contents.
-        self.write_func(bytes)
+        byte_sections.append(bytes)
+        # XXX: This causes a memory copy of bytes in size, but is usually
+        # faster than two write calls (12 vs 13 seconds to output a gig of
+        # 1k records.) - results may differ on significantly larger records
+        # like .iso's but as they should be rare in any case and thus not
+        # likely to be the common case. The biggest issue is causing extreme
+        # memory pressure in that case. One possibly improvement here is to
+        # check the size of the content before deciding to join here vs call
+        # write twice.
+        self.write_func(''.join(byte_sections))
         self.records_written += 1
         # return a memo of where we wrote data to allow random access.
         return current_offset, self.current_offset - current_offset



More information about the bazaar-commits mailing list