Rev 81: Fix a bug when handling multiple large-range copies. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin

John Arbash Meinel john at arbash-meinel.com
Mon Mar 2 22:38:49 GMT 2009


At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin

------------------------------------------------------------
revno: 81
revision-id: john at arbash-meinel.com-20090302223828-hyb4crn4w28sgvmc
parent: john at arbash-meinel.com-20090302210223-9ixutqay7sx8c1n3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Mon 2009-03-02 16:38:28 -0600
message:
  Fix a bug when handling multiple large-range copies.
  
  We were adjusting moff multiple times, without adjusting it back.
-------------- next part --------------
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx	2009-03-02 21:02:23 +0000
+++ b/_groupcompress_pyx.pyx	2009-03-02 22:38:28 +0000
@@ -249,7 +249,8 @@
     # handling, and to avoid double allocating memory
     if (delta_size < DELTA_SIZE_MIN):
         # XXX: Invalid delta block
-        return None
+        raise RuntimeError('delta_size %d smaller than min delta size %d'
+                           % (delta_size, DELTA_SIZE_MIN))
 
     data = <unsigned char *>delta
     top = data + delta_size
@@ -259,7 +260,8 @@
     size = get_delta_hdr_size(&data, top)
     if (size > source_size):
         # XXX: mismatched source size
-        return None
+        raise RuntimeError('source size %d < expected source size %d'
+                           % (source_size, size))
     source_size = size
 
     # now the result size
@@ -302,13 +304,17 @@
             if (cp_off + cp_size < cp_size or
                 cp_off + cp_size > source_size or
                 cp_size > size):
-                break
+                raise RuntimeError('Something wrong with:'
+                    ' cp_off = %s, cp_size = %s'
+                    ' source_size = %s, size = %s'
+                    % (cp_off, cp_size, source_size, size))
             memcpy(out, source + cp_off, cp_size)
             out = out + cp_size
             size = size - cp_size
         elif (cmd):
             if (cmd > size):
-                break
+                raise RuntimeError('Insert instruction longer than remaining'
+                    ' bytes: %d > %d' % (cmd, size))
             memcpy(out, data, cmd)
             out = out + cmd
             data = data + cmd
@@ -320,11 +326,14 @@
             #  * encountering them (might be data corruption).
             #  */
             ## /* XXX: error("unexpected delta opcode 0"); */
-            return None
+            raise RuntimeError('Got delta opcode: 0, not supported')
 
     # /* sanity check */
     if (data != top or size != 0):
         ## /* XXX: error("delta replay has gone wild"); */
+        raise RuntimeError('Did not extract the number of bytes we expected'
+            ' we were left with %d bytes in "size", and top - data = %d'
+            % (size, <int>(top - data)))
         return None
 
     # *dst_size = out - dst_buf;

=== modified file 'diff-delta.c'
--- a/diff-delta.c	2009-03-02 21:02:23 +0000
+++ b/diff-delta.c	2009-03-02 22:38:28 +0000
@@ -446,8 +446,8 @@
 					if (msize < ref - entry->ptr) {
 						/* this is our best match so far */
 						msize = ref - entry->ptr;
-						moff = entry->ptr - ref_data;
 						mindex = index;
+						moff = entry->ptr - ref_data + mindex->agg_src_offset;
 						if (msize >= 4096) /* good enough */
 							break;
 					}
@@ -477,10 +477,17 @@
 			unsigned char *op;
 
 			if (inscnt) {
+				unsigned int local_moff;
+
+				/* moff is the offset in the global structure, we only want the
+				 * offset in the local source.
+				 */
+				local_moff = moff - mindex->agg_src_offset;
 				ref_data = mindex->src_buf;
-				while (moff && ref_data[moff-1] == data[-1]) {
+				while (local_moff && ref_data[local_moff-1] == data[-1]) {
 					/* we can match one byte back */
 					msize++;
+					local_moff--;
 					moff--;
 					data--;
 					outpos--;
@@ -501,10 +508,6 @@
 			op = out + outpos++;
 			i = 0x80;
 
-			/* so far, moff has been the offset in a single source, however,
-			 * now we encode it as the offset in the aggregate source
-			 */
-			moff = moff + mindex->agg_src_offset;
 			if (moff & 0x000000ff)
 				out[outpos++] = moff >> 0,  i |= 0x01;
 			if (moff & 0x0000ff00)

=== modified file 'groupcompress.py'
--- a/groupcompress.py	2009-03-02 21:02:23 +0000
+++ b/groupcompress.py	2009-03-02 22:38:28 +0000
@@ -167,6 +167,10 @@
             new_chunks = []
         else:
             new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
+        if self._delta_index._source_offset != self.endpoint:
+            raise AssertionError('_source_offset != endpoint'
+                ' somehow the DeltaIndex got out of sync with'
+                ' the output lines')
         delta = self._delta_index.make_delta(target_text)
         if (delta is None
             or len(delta) > len(target_text) / 2):
@@ -178,10 +182,6 @@
                 new_chunks.insert(0, 'fulltext\n')
                 new_chunks.append('len:%s\n' % (input_len,))
             unadded_bytes = sum(map(len, new_chunks))
-            deltas_unadded = (self.endpoint - self._delta_index._source_offset)
-            if deltas_unadded != 0:
-                import pdb; pdb.set_trace()
-            unadded_bytes += deltas_unadded
             self._delta_index.add_source(target_text, unadded_bytes)
             new_chunks.append(target_text)
         else:
@@ -190,6 +190,7 @@
             else:
                 new_chunks.insert(0, 'delta\n')
                 new_chunks.append('len:%s\n' % (len(delta),))
+            # unadded_bytes = sum(map(len, new_chunks))
             # self._delta_index.add_source(delta, unadded_bytes)
             new_chunks.append(delta)
             unadded_bytes = sum(map(len, new_chunks))



More information about the bazaar-commits mailing list