Rev 5: Add git and beta dulwich compressor support. in http://people.ubuntu.com/~robertc/baz2.0/plugins/compressbench/trunk
Robert Collins
robertc at robertcollins.net
Fri Jan 23 06:29:06 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/plugins/compressbench/trunk
------------------------------------------------------------
revno: 5
revision-id: robertc at robertcollins.net-20090123062854-a2oezj8kln5m0aql
parent: robertc at robertcollins.net-20090122010818-htr2chqbsa5l13y3
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2009-01-23 17:28:54 +1100
message:
Add git and beta dulwich compressor support.
=== modified file 'bench.py'
--- a/bench.py 2009-01-22 01:08:18 +0000
+++ b/bench.py 2009-01-23 06:28:54 +0000
@@ -94,11 +94,113 @@
source.close()
+from subprocess import Popen, PIPE
+from bzrlib.btree_index import BTreeBuilder
+from bzrlib.versionedfile import FulltextContentFactory, VersionedFiles
+class GitVersionedFiles(VersionedFiles):
+
+ def __init__(self, transport):
+ self._index = BTreeBuilder()
+ self._transport = transport
+ self._path = transport.local_abspath('.')
+ self._run_git(['init'])
+ transport.put_bytes('content', '')
+ self._run_git(['add', 'content'])
+
+ def _run_git(self, params):
+ process = Popen(['git'] + params, cwd=self._path, stdin=PIPE,
+ stdout=PIPE, stderr=PIPE)
+ out, err = process.communicate()
+ rc = process.returncode
+ assert rc == 0, 'bad result %d %r %r' % (rc, out, err)
+ return out, err
+
+ def get_record_stream(self, keys, ordering, include_delta_closure):
+ for key in keys:
+ blob_id = self._index.iter_entries([key]).next()[2]
+ content, _ = self._run_git(['cat-file', '-p', blob_id])
+ yield FulltextContentFactory(key, (), None, content)
+
+ def insert_record_stream(self, stream):
+ for record in stream:
+ self._transport.put_bytes('content', record.get_bytes_as('fulltext'))
+ # Save the content
+ out,_ = self._run_git(['hash-object', 'content'])
+ blob_id = out[:-1]
+ # record a tree so pack will work as normal with ordering etc.
+ # NB: note that this is double-hashing the file content probably.
+ self._run_git(['commit', '-m', 'foo', 'content'])
+ self._index.add_node(record.key, blob_id)
+
+ def keys(self):
+ result = set()
+ for node in self._index.iter_all_entries():
+ result.add(node[1])
+ return result
+
+ def _pack(self):
+ self._run_git(['repack', '-a', '-d', '--depth=200', '--window=200'])
+
+ def _pack_name(self):
+ paths = self._transport.list_dir('.git/objects/pack')
+ for path in paths:
+ if path.endswith('.pack'):
+ return '.git/objects/pack/' + path
+
+ def size(self):
+ return self._transport.stat(self._pack_name()).st_size
+
+
+class GitWriterThunk(object):
+
+ def __init__(self, vf):
+ self.vf = vf
+
+ def close(self):
+ pass
+
+ def end(self):
+ self.vf._pack()
+
+
+def make_git_factory(delta, graph, keylength):
+ """Make a VF factory based on invoking a git commit for each step."""
+ def factory(transport):
+ result = GitVersionedFiles(transport)
+ result.writer = GitWriterThunk(result)
+ result.stream = result.writer
+ return result
+ return factory
+
+
+def make_dulwich_factory(delta, graph, keylength):
+ """Make a dulwich-using git backed VF."""
+ from dulwich.pack import Pack
+ class DulwichVersionedFiles(GitVersionedFiles):
+ """A dulwich using VF implementation."""
+ def __init__(self, transport):
+ GitVersionedFiles.__init__(self, transport)
+ self._pack_obj = None
+ def get_record_stream(self, keys, ordering, include_delta_closure):
+ if self._pack_obj is None:
+ self._pack_obj = Pack(self._transport.local_abspath(self._pack_name()[:-5]))
+ for key in keys:
+ blob_id = self._index.iter_entries([key]).next()[2]
+ content = self._pack_obj.get_raw(blob_id)
+ yield FulltextContentFactory(key, (), None, content)
+
+ def factory(transport):
+ result = DulwichVersionedFiles(transport)
+ result.writer = GitWriterThunk(result)
+ result.stream = result.writer
+ return result
+ return factory
+
class cmd_compressbench(Command):
"""Benchmark compression performance."""
takes_options = [
- ListOption('delta', type=str, help='Compressors to test. (gc, knit)'),
+ ListOption('delta', type=str, help='Compressors to test. (gc, knit, git, dulwich)'),
Option('limit', type=int, help='Cap the corpus at limit bytes'),
]
@@ -122,14 +224,19 @@
factory.parents = parents
parents = (factory.key,)
# Now for each compression type benchmark it:
- factories = {'knit':make_pack_factory, 'gc':make_gc_factory}
+ factories = {'knit':make_pack_factory, 'gc':make_gc_factory,
+ 'git':make_git_factory, 'dulwich':make_dulwich_factory}
for label in delta:
factory = factories[label]
print "Testing", label, "compression"
vf, vf_t, vf_cleanup = self.test_compressor(stream, factory)
try:
# decompress
- print "Compressed size", vf_t.stat('newpack').st_size
+ try:
+ size = vf.size()
+ except AttributeError:
+ size = vf_t.stat('newpack').st_size
+ print "Compressed size", size
self.test_decompress(vf)
finally:
vf_cleanup()
More information about the bazaar-commits
mailing list