Rev 81: Fix a bug when handling multiple large-range copies. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
John Arbash Meinel
john at arbash-meinel.com
Mon Mar 2 22:38:49 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
------------------------------------------------------------
revno: 81
revision-id: john at arbash-meinel.com-20090302223828-hyb4crn4w28sgvmc
parent: john at arbash-meinel.com-20090302210223-9ixutqay7sx8c1n3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Mon 2009-03-02 16:38:28 -0600
message:
Fix a bug when handling multiple large-range copies.
We were adjusting moff multiple times, without adjusting it back.
-------------- next part --------------
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx 2009-03-02 21:02:23 +0000
+++ b/_groupcompress_pyx.pyx 2009-03-02 22:38:28 +0000
@@ -249,7 +249,8 @@
# handling, and to avoid double allocating memory
if (delta_size < DELTA_SIZE_MIN):
# XXX: Invalid delta block
- return None
+ raise RuntimeError('delta_size %d smaller than min delta size %d'
+ % (delta_size, DELTA_SIZE_MIN))
data = <unsigned char *>delta
top = data + delta_size
@@ -259,7 +260,8 @@
size = get_delta_hdr_size(&data, top)
if (size > source_size):
# XXX: mismatched source size
- return None
+ raise RuntimeError('source size %d < expected source size %d'
+ % (source_size, size))
source_size = size
# now the result size
@@ -302,13 +304,17 @@
if (cp_off + cp_size < cp_size or
cp_off + cp_size > source_size or
cp_size > size):
- break
+ raise RuntimeError('Something wrong with:'
+ ' cp_off = %s, cp_size = %s'
+ ' source_size = %s, size = %s'
+ % (cp_off, cp_size, source_size, size))
memcpy(out, source + cp_off, cp_size)
out = out + cp_size
size = size - cp_size
elif (cmd):
if (cmd > size):
- break
+ raise RuntimeError('Insert instruction longer than remaining'
+ ' bytes: %d > %d' % (cmd, size))
memcpy(out, data, cmd)
out = out + cmd
data = data + cmd
@@ -320,11 +326,14 @@
# * encountering them (might be data corruption).
# */
## /* XXX: error("unexpected delta opcode 0"); */
- return None
+ raise RuntimeError('Got delta opcode: 0, not supported')
# /* sanity check */
if (data != top or size != 0):
## /* XXX: error("delta replay has gone wild"); */
+ raise RuntimeError('Did not extract the number of bytes we expected'
+ ' we were left with %d bytes in "size", and top - data = %d'
+ % (size, <int>(top - data)))
return None
# *dst_size = out - dst_buf;
=== modified file 'diff-delta.c'
--- a/diff-delta.c 2009-03-02 21:02:23 +0000
+++ b/diff-delta.c 2009-03-02 22:38:28 +0000
@@ -446,8 +446,8 @@
if (msize < ref - entry->ptr) {
/* this is our best match so far */
msize = ref - entry->ptr;
- moff = entry->ptr - ref_data;
mindex = index;
+ moff = entry->ptr - ref_data + mindex->agg_src_offset;
if (msize >= 4096) /* good enough */
break;
}
@@ -477,10 +477,17 @@
unsigned char *op;
if (inscnt) {
+ unsigned int local_moff;
+
+ /* moff is the offset in the global structure, we only want the
+ * offset in the local source.
+ */
+ local_moff = moff - mindex->agg_src_offset;
ref_data = mindex->src_buf;
- while (moff && ref_data[moff-1] == data[-1]) {
+ while (local_moff && ref_data[local_moff-1] == data[-1]) {
/* we can match one byte back */
msize++;
+ local_moff--;
moff--;
data--;
outpos--;
@@ -501,10 +508,6 @@
op = out + outpos++;
i = 0x80;
- /* so far, moff has been the offset in a single source, however,
- * now we encode it as the offset in the aggregate source
- */
- moff = moff + mindex->agg_src_offset;
if (moff & 0x000000ff)
out[outpos++] = moff >> 0, i |= 0x01;
if (moff & 0x0000ff00)
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-02 21:02:23 +0000
+++ b/groupcompress.py 2009-03-02 22:38:28 +0000
@@ -167,6 +167,10 @@
new_chunks = []
else:
new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
+ if self._delta_index._source_offset != self.endpoint:
+ raise AssertionError('_source_offset != endpoint'
+ ' somehow the DeltaIndex got out of sync with'
+ ' the output lines')
delta = self._delta_index.make_delta(target_text)
if (delta is None
or len(delta) > len(target_text) / 2):
@@ -178,10 +182,6 @@
new_chunks.insert(0, 'fulltext\n')
new_chunks.append('len:%s\n' % (input_len,))
unadded_bytes = sum(map(len, new_chunks))
- deltas_unadded = (self.endpoint - self._delta_index._source_offset)
- if deltas_unadded != 0:
- import pdb; pdb.set_trace()
- unadded_bytes += deltas_unadded
self._delta_index.add_source(target_text, unadded_bytes)
new_chunks.append(target_text)
else:
@@ -190,6 +190,7 @@
else:
new_chunks.insert(0, 'delta\n')
new_chunks.append('len:%s\n' % (len(delta),))
+ # unadded_bytes = sum(map(len, new_chunks))
# self._delta_index.add_source(delta, unadded_bytes)
new_chunks.append(delta)
unadded_bytes = sum(map(len, new_chunks))
More information about the bazaar-commits
mailing list