Rev 100: Use the max_delta flag. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
John Arbash Meinel
john at arbash-meinel.com
Tue Mar 3 22:03:10 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
------------------------------------------------------------
revno: 100
revision-id: john at arbash-meinel.com-20090303220215-1luhz4zfr9vrdmud
parent: john at arbash-meinel.com-20090303214221-ea1e84bkmi22yfgk
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Tue 2009-03-03 16:02:15 -0600
message:
Use the max_delta flag.
Prefer to extract and compress bytes rather than chunks/lines.
This has a fairly positive impact on the 'bzr pack' times.
We still do a ''.join([bytes]), but we know that doesn't have
to do any memory copying.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-03 20:35:26 +0000
+++ b/groupcompress.py 2009-03-03 22:02:15 +0000
@@ -150,12 +150,14 @@
:return: The sha1 of lines, and the number of bytes accumulated in
the group output so far.
"""
- target_text = ''.join(chunks)
- sha1 = sha_string(target_text)
+ # TODO: Change this to a bytes interface, since the output is now a
+ # bytes interface anyway.
+ bytes = ''.join(chunks)
+ sha1 = sha_string(bytes)
if key[-1] is None:
key = key[:-1] + ('sha1:' + sha1,)
label = '\x00'.join(key)
- input_len = len(target_text)
+ input_len = len(bytes)
# By having action/label/sha1/len, we can parse the group if the index
# was ever destroyed, we have the key in 'label', we know the final
# bytes are valid from sha1, and we know where to find the end of this
@@ -172,9 +174,9 @@
raise AssertionError('_source_offset != endpoint'
' somehow the DeltaIndex got out of sync with'
' the output lines')
- delta = self._delta_index.make_delta(target_text)
- if (delta is None
- or len(delta) > len(target_text) / 2):
+ max_delta_size = len(bytes) / 2
+ delta = self._delta_index.make_delta(bytes, max_delta_size)
+ if (delta is None):
# We can't delta (perhaps source_text is empty)
# so mark this as an insert
if _NO_LABELS:
@@ -183,8 +185,8 @@
new_chunks.insert(0, 'fulltext\n')
new_chunks.append('len:%s\n' % (input_len,))
unadded_bytes = sum(map(len, new_chunks))
- self._delta_index.add_source(target_text, unadded_bytes)
- new_chunks.append(target_text)
+ self._delta_index.add_source(bytes, unadded_bytes)
+ new_chunks.append(bytes)
else:
if _NO_LABELS:
new_chunks = ['d']
@@ -605,12 +607,11 @@
if record.storage_kind == 'absent':
raise errors.RevisionNotPresent(record.key, self)
try:
- lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
+ bytes = record.get_bytes_as('fulltext')
except errors.UnavailableRepresentation:
adapter_key = record.storage_kind, 'fulltext'
adapter = get_adapter(adapter_key)
bytes = adapter.get_bytes(record)
- lines = osutils.split_lines(bytes)
soft = False
if len(record.key) > 1:
prefix = record.key[0]
@@ -625,7 +626,7 @@
groups += 1
last_prefix = prefix
found_sha1, end_point = self._compressor.compress(record.key,
- lines, record.sha1, soft=soft)
+ [bytes], record.sha1, soft=soft)
if record.key[-1] is None:
key = record.key[:-1] + ('sha1:' + found_sha1,)
else:
More information about the bazaar-commits
mailing list