Rev 36: Allow writing negative offsets. Turns out not to actually compress better. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/trunk

Wed Feb 18 22:14:43 GMT 2009

At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/trunk

------------------------------------------------------------
revno: 36
revision-id: john at arbash-meinel.com-20090218221422-tsu9pw3gmdrovqes
parent: john at arbash-meinel.com-20090218204046-yyqr5q4tza4v4xug
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-02-18 16:14:22 -0600
message:
  Allow writing negative offsets. Turns out not to actually compress better.
  After zlib compression, negative offsets are a loss. Presumably because there is
  redundancy that zlib can factor out from bytes-since-start.
-------------- next part --------------
=== modified file 'groupcompress.py'

--- a/groupcompress.py	2009-02-17 22:17:24 +0000
+++ b/groupcompress.py	2009-02-18 22:14:22 +0000
@@ -53,6 +53,8 @@
     )
 
 
+total_copy_neg = 0
+
 def parse(line_list):
     result = []
     lines = iter(line_list)
@@ -75,7 +77,7 @@
             result.append((op, None, numbers[0], contents))
     return label, sha1, result
 
-def apply_delta(basis, delta):
+def apply_delta(basis, delta, delta_bytes_start):
     """Apply delta to this object to become new_version_id."""
     lines = []
     last_offset = 0
@@ -83,6 +85,8 @@
     # start, end refer to offsets in basis
     for op, start, count, delta_lines in delta:
         if op == 'c':
+            if start < 0:
+                start = delta_bytes_start + start
             lines.append(basis[start:start+count])
         else:
             lines.extend(delta_lines)
@@ -237,7 +241,14 @@
             else:
                 start_byte = self.line_offsets[copy_start - 1]
             bytes = stop_byte - start_byte
+            neg_offset = start_byte - self.endpoint
             copy_control_instruction = "c,%d,%d\n" % (start_byte, bytes)
+            assert neg_offset < 0
+            copy_neg = "c,%d,%d\n" % (neg_offset, bytes)
+            if len(copy_neg) < len(copy_control_instruction):
+                global total_copy_neg
+                total_copy_neg += len(copy_neg) - len(copy_control_instruction)
+                # copy_control_instruction = copy_neg
             if (bytes + len(insert_instruction) >
                 len(copy_control_instruction)):
                 new_lines.append(copy_control_instruction)
@@ -519,7 +530,7 @@
                 label, sha1, delta = parse(delta_lines)
                 if label != key:
                     raise AssertionError("wrong key: %r, wanted %r" % (label, key))
-                lines = apply_delta(plain, delta)
+                lines = apply_delta(plain, delta, index_memo[3])
             yield ChunkedContentFactory(key, parents, sha1, lines)
 
     def get_sha1s(self, keys):
@@ -578,6 +589,7 @@
             nodes = []
             for key, reads, refs in keys_to_add:
                 nodes.append((key, "%d %d %s" % (start, length, reads), refs))
+            print '\ntotal neg %s' % (total_copy_neg,)
             self._index.add_records(nodes, random_id=random_id)
         for record in stream:
             # Raise an error when a record is missing.