Rev 59: Insert a fulltext if the delta is more than half the total size. in http://bzr.arbash-meinel.com/plugins/groupcompress_rabin

John Arbash Meinel john at arbash-meinel.com
Fri Feb 27 20:40:03 GMT 2009


At http://bzr.arbash-meinel.com/plugins/groupcompress_rabin

------------------------------------------------------------
revno: 59
revision-id: john at arbash-meinel.com-20090227204002-fdzk52zc3frd4ddi
parent: john at arbash-meinel.com-20090227201847-181ruulj0worz3ra
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress_rabin
timestamp: Fri 2009-02-27 14:40:02 -0600
message:
  Insert a fulltext if the delta is more than half the total size.
  Also, gcr deltas are more pithy, they probably are approx the same after
  compression, but decrease the range limits since the copy instructions are
  effectively pre-compressed.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py	2009-02-27 20:18:47 +0000
+++ b/groupcompress.py	2009-02-27 20:40:02 +0000
@@ -142,8 +142,15 @@
         input_len = len(target_text)
         new_chunks = ['label: %s\nsha1: %s\n' % (label, sha1)]
         source_text = ''.join(self.lines)
+        # XXX: We have a few possibilities here. We could consider a few
+        #      different 'previous' windows, such as only the initial text, we
+        #      could do something with the 'just inserted' text
+        #      we could try a delta against whatever the last delta we
+        #      computed, (the idea being we just computed the delta_index, so
+        #      we re-use it here, and see if that is good enough, etc)
         delta = _groupcompress_c.make_delta(source_text, target_text)
-        if delta is None:
+        if (delta is None
+            or len(delta) > len(target_text) / 2):
             # We can't delta (perhaps source_text is empty)
             # so mark this as an insert
             new_chunks.insert(0, 'fulltext\n')
@@ -558,7 +565,7 @@
                 prefix = record.key[0]
                 if (last_prefix is not None and prefix != last_prefix):
                     soft = True
-                    if basis_end > 1024 * 1024 * 4:
+                    if basis_end > 1024 * 1024 * 2:
                         flush()
                         self._compressor = GroupCompressor(self._delta)
                         self._unadded_refs = {}
@@ -577,7 +584,7 @@
             keys_to_add.append((key, '%d %d' % (basis_end, end_point),
                 (record.parents,)))
             basis_end = end_point
-            if basis_end > 1024 * 1024 * 8:
+            if basis_end > 1024 * 1024 * 4:
                 flush()
                 self._compressor = GroupCompressor(self._delta)
                 self._unadded_refs = {}

=== modified file 'setup.py'
--- a/setup.py	2009-02-27 17:36:23 +0000
+++ b/setup.py	2009-02-27 20:40:02 +0000
@@ -74,7 +74,8 @@
     else:
         source = [c_name]
     source.extend(extra_source)
-    ext_modules.append(Extension(module_name, source))
+    ext_modules.append(Extension(module_name, source,
+        extra_compile_args = ['-O3']))
 
 add_pyrex_extension('_groupcompress_c',
                     extra_source=['diff-delta.c', 'patch-delta.c'])



More information about the bazaar-commits mailing list