Rev 3879: Change .compress() to return the start-point. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack3
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 19 23:16:02 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack3
------------------------------------------------------------
revno: 3879
revision-id: john at arbash-meinel.com-20090319225110-hfslu08ridcsc5xi
parent: john at arbash-meinel.com-20090319223546-odxel2nktm700d7e
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack3
timestamp: Thu 2009-03-19 17:51:10 -0500
message:
Change .compress() to return the start-point.
Now that we can return an entry pointing at an existing entry
we need to record that correctly in the index.
It also helps for the 'null' entries.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-19 22:35:46 +0000
+++ b/bzrlib/groupcompress.py 2009-03-19 22:51:10 +0000
@@ -339,8 +339,8 @@
:param sha1: TODO (should we validate only when sha1 is supplied?)
:return: The bytes for the content
"""
- if start == end == 0:
- return ''
+ if start == end == 0: # NULL entry
+ return None, ''
# Make sure we have enough bytes for this record
# TODO: if we didn't want to track the end of this entry, we could
# _ensure_content(start+enough_bytes_for_type_and_length), and
@@ -376,7 +376,8 @@
else:
if end != content_start + content_len:
raise ValueError('end != len according to field header'
- ' %s != %s' % (end, content_start + content_len))
+ ' %s != %s, %s' % (end, content_start + content_len),
+ (start, content_len))
entry = GroupCompressBlockEntry(key, type, sha1=None,
start=start, length=end-start)
content = self._content[content_start:end]
@@ -764,6 +765,7 @@
self._delta_index = _groupcompress_pyx.DeltaIndex()
self._block = GroupCompressBlock()
self._entries_by_sha1 = {}
+ self._empty_entries = 0
self._deduped_entries = 0
def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
@@ -786,7 +788,11 @@
:seealso VersionedFiles.add_lines:
"""
if not bytes: # empty, like a dir entry, etc
- return None, 0, 'fulltext', 0
+ self._empty_entries += 1
+ self._block.add_entry(key, type='empty',
+ sha1=None, start=0,
+ length=0)
+ return None, 0, 0, 'fulltext', 0
sha1 = None
# we assume someone knew what they were doing when they passed it in
if expected_sha is not None:
@@ -822,7 +828,7 @@
self._block.add_entry(key, type=type,
sha1=sha1, start=start,
length=length)
- return sha1, self.endpoint, 'sha1_dupe', 0
+ return sha1, start, start+length, 'sha1_dupe', length
max_delta_size = len(bytes) / 2
delta = self._delta_index.make_delta(bytes, max_delta_size)
if (delta is None):
@@ -846,6 +852,7 @@
start=self.endpoint, length=length)
if self._check_for_dupes:
self._entries_by_sha1[sha1] = (type, self.endpoint, length)
+ start = self.endpoint
delta_start = (self.endpoint, len(self.lines))
self.num_keys += 1
self.output_chunks(new_chunks)
@@ -856,7 +863,7 @@
raise AssertionError('the delta index is out of sync'
'with the output lines %s != %s'
% (self._delta_index._source_offset, self.endpoint))
- return sha1, self.endpoint, type, length
+ return sha1, start, self.endpoint, type, length
def extract(self, key):
"""Extract a key previously added to the compressor.
@@ -1424,14 +1431,16 @@
self._compressor = GroupCompressor(self._check_for_dupes)
self._unadded_refs = {}
keys_to_add = []
- basis_end = 0
- total_deduped = [0]
+ total_deduped = [0, 0]
def flush():
- if self._compressor._deduped_entries > 0:
+ if (self._compressor._deduped_entries > 0
+ or self._compressor._empty_entries > 0):
total_deduped[0] += self._compressor._deduped_entries
- trace.note('Dedupped %d out of %d entries',
+ total_deduped[1] += self._compressor._empty_entries
+ trace.note('Dedupped %d out of %d entries, %d empty',
self._compressor._deduped_entries,
- len(self._compressor._block._entries))
+ len(self._compressor._block._entries),
+ self._compressor._empty_entries)
bytes = self._compressor.flush().to_bytes()
index, start, length = self._access.add_raw_records(
[(None, len(bytes))], bytes)[0]
@@ -1491,7 +1500,7 @@
if max_fulltext_len < len(bytes):
max_fulltext_len = len(bytes)
max_fulltext_prefix = prefix
- (found_sha1, end_point, type,
+ (found_sha1, start_point, end_point, type,
length) = self._compressor.compress(record.key,
bytes, record.sha1, soft=soft,
nostore_sha=nostore_sha)
@@ -1539,9 +1548,8 @@
if start_new_block:
self._compressor.pop_last()
flush()
- basis_end = 0
max_fulltext_len = len(bytes)
- (found_sha1, end_point, type,
+ (found_sha1, start_point, end_point, type,
length) = self._compressor.compress(record.key,
bytes, record.sha1)
last_fulltext_len = length
@@ -1551,17 +1559,14 @@
key = record.key
self._unadded_refs[key] = record.parents
yield found_sha1
- if length == 0:
- keys_to_add.append((key, '0 0', (record.parents,)))
- else:
- keys_to_add.append((key, '%d %d' % (basis_end, end_point),
- (record.parents,)))
- basis_end = end_point
+ keys_to_add.append((key, '%d %d' % (start_point, end_point),
+ (record.parents,)))
if len(keys_to_add):
flush()
self._compressor = None
- if total_deduped[0] > 0:
- trace.note('Total deduped = %d\n', total_deduped[0])
+ if total_deduped[0] > 0 or total_deduped[1] > 0:
+ trace.note('Total deduped = %d, total empty = %d\n',
+ total_deduped[0], total_deduped[1])
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
"""Iterate over the lines in the versioned files from keys.
More information about the bazaar-commits
mailing list