Rev 47: experiment with removing the label and sha1 fields. Seems to shrink texts by 10-30%. in http://bazaar.launchpad.net/%7Ejameinel/bzr-groupcompress/experimental
John Arbash Meinel
john at arbash-meinel.com
Thu Feb 26 21:58:31 GMT 2009
At http://bazaar.launchpad.net/%7Ejameinel/bzr-groupcompress/experimental
------------------------------------------------------------
revno: 47
revision-id: john at arbash-meinel.com-20090226215757-0rpcnz9vf6z3eikn
parent: john at arbash-meinel.com-20090225221429-l0is3qxy1hvzuhes
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: experimental
timestamp: Thu 2009-02-26 15:57:57 -0600
message:
experiment with removing the label and sha1 fields. Seems to shrink texts by 10-30%.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-02-20 03:52:05 +0000
+++ b/groupcompress.py 2009-02-26 21:57:57 +0000
@@ -57,13 +57,13 @@
result = []
lines = iter(line_list)
next = lines.next
- label_line = lines.next()
- sha1_line = lines.next()
- if (not label_line.startswith('label: ') or
- not sha1_line.startswith('sha1: ')):
- raise AssertionError("bad text record %r" % lines)
- label = tuple(label_line[7:-1].split('\x00'))
- sha1 = sha1_line[6:-1]
+ ## label_line = lines.next()
+ ## sha1_line = lines.next()
+ ## if (not label_line.startswith('label: ') or
+ ## not sha1_line.startswith('sha1: ')):
+ ## raise AssertionError("bad text record %r" % lines)
+ ## label = tuple(label_line[7:-1].split('\x00'))
+ ## sha1 = sha1_line[6:-1]
for header in lines:
op = header[0]
numbers = header[2:]
@@ -73,7 +73,8 @@
else:
contents = [next() for i in xrange(numbers[0])]
result.append((op, None, numbers[0], contents))
- return label, sha1, result
+ return result
+ ## return label, sha1, result
def apply_delta(basis, delta):
@@ -221,9 +222,9 @@
key = key[:-1] + ('sha1:' + sha1,)
label = '\x00'.join(key)
new_lines = []
- new_lines.append('label: %s\n' % label)
- new_lines.append('sha1: %s\n' % sha1)
- index_lines = [False, False]
+ # new_lines.append('label: %s\n' % label)
+ # new_lines.append('sha1: %s\n' % sha1)
+ index_lines = []
# setup good encoding for trailing \n support.
if not lines or lines[-1].endswith('\n'):
lines.append('\n')
@@ -274,9 +275,10 @@
"""
delta_details = self.labels_deltas[key]
delta_lines = self.lines[delta_details[0][1]:delta_details[1][1]]
- label, sha1, delta = parse(delta_lines)
- if label != key:
- raise AssertionError("wrong key: %r, wanted %r" % (label, key))
+ ## label, sha1, delta = parse(delta_lines)
+ delta = parse(delta_lines)
+ ## if label != key:
+ ## raise AssertionError("wrong key: %r, wanted %r" % (label, key))
# Perhaps we want to keep the line offsets too in memory at least?
lines = apply_delta(''.join(self.lines), delta)
sha1 = sha_strings(lines)
@@ -618,12 +620,14 @@
else:
index_memo, _, parents, (method, _) = locations[key]
plain, delta_lines = self._get_group_and_delta_lines(index_memo)
- label, sha1, delta = parse(delta_lines)
- if label != key:
- raise AssertionError("wrong key: %r, wanted %r" % (label, key))
+ delta = parse(delta_lines)
+ # label, sha1, delta = parse(delta_lines)
+ # if label != key:
+ # raise AssertionError("wrong key: %r, wanted %r" % (label, key))
lines = apply_delta(plain, delta)
- if sha_strings(lines) != sha1:
- raise AssertionError('sha1 sum did not match')
+ sha1 = sha_strings(lines)
+ # if sha_strings(lines) != sha1:
+ # raise AssertionError('sha1 sum did not match')
yield ChunkedContentFactory(key, parents, sha1, lines)
def get_sha1s(self, keys):
More information about the bazaar-commits
mailing list