Rev 39: Have the GroupCompressBlock decide how to compress the header and content. in http://bzr.arbash-meinel.com/plugins/groupcompress
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 4 21:22:56 GMT 2009
At http://bzr.arbash-meinel.com/plugins/groupcompress
------------------------------------------------------------
revno: 39
revision-id: john at arbash-meinel.com-20090304212250-xcvwt1yx4zt76pev
parent: john at arbash-meinel.com-20090304210622-ur7wz2dz0w4lhzn3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress
timestamp: Wed 2009-03-04 15:22:50 -0600
message:
Have the GroupCompressBlock decide how to compress the header and content.
It can now decide whether they should be compressed together or not.
As long as we make the to_bytes() function match the from_bytes() one, we should be fine.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-04 21:06:22 +0000
+++ b/groupcompress.py 2009-03-04 21:22:50 +0000
@@ -169,7 +169,8 @@
pos2 = pos + z_header_length
z_header_bytes = bytes[pos:pos2]
assert len(z_header_bytes) == z_header_length
- header_bytes = zlib.decompress(z_header_bytes)
+ d = zlib.decompressobj()
+ header_bytes = d.decompress(z_header_bytes)
assert len(header_bytes) == header_length
del z_header_bytes
lines = header_bytes.split('\n')
@@ -193,7 +194,8 @@
info_dict[key] = value
zcontent = bytes[pos2:]
if zcontent:
- out._content = zlib.decompress(zcontent)
+ out._content = d.decompress(zcontent)
+ assert d.flush() == ''
out._size = header_len + len(out._content)
return out
@@ -228,7 +230,7 @@
self._entries[key] = entry
return entry
- def to_bytes(self):
+ def to_bytes(self, content=''):
"""Encode the information into a byte stream."""
chunks = []
for key in sorted(self._entries):
@@ -248,11 +250,21 @@
chunks.append(chunk)
bytes = ''.join(chunks)
info_len = len(bytes)
- z_bytes = zlib.compress(bytes)
+ c = zlib.compressobj()
+ z_bytes = []
+ z_bytes.append(c.compress(bytes))
del bytes
- z_len = len(z_bytes)
- chunks = [self.GCB_HEADER, '%d\n' % (z_len,), '%d\n' % (info_len,),
- z_bytes]
+ z_bytes.append(c.flush(zlib.Z_SYNC_FLUSH))
+ z_len = sum(map(len, z_bytes))
+ c_len = len(content)
+ z_bytes.append(c.compress(content))
+ z_bytes.append(c.flush())
+ chunks = [self.GCB_HEADER,
+ '%d\n' % (z_len,),
+ '%d\n' % (info_len,),
+ #'%d\n' % (c_len,),
+ ]
+ chunks.extend(z_bytes)
return ''.join(chunks)
@@ -725,11 +737,10 @@
# label in the header is duplicated in the text.
# For chk pages and real bytes, I would guess this is not
# true.
- header = self._compressor._block.to_bytes()
- compressed = zlib.compress(''.join(self._compressor.lines))
- out = header + compressed
+ bytes = self._compressor._block.to_bytes(
+ ''.join(self._compressor.lines))
index, start, length = self._access.add_raw_records(
- [(None, len(out))], out)[0]
+ [(None, len(bytes))], bytes)[0]
nodes = []
for key, reads, refs in keys_to_add:
nodes.append((key, "%d %d %s" % (start, length, reads), refs))
More information about the bazaar-commits
mailing list