Rev 37: Prototype using LZMA as the secondary compressor, rather than zlib. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/lzma

Thu Mar 5 18:43:49 GMT 2009

At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/lzma

------------------------------------------------------------
revno: 37
revision-id: john at arbash-meinel.com-20090305184029-05aqk336dekq5h7z
parent: john at arbash-meinel.com-20090305181021-dsjdgu54gva425r7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lzma
timestamp: Thu 2009-03-05 12:40:29 -0600
message:
  Prototype using LZMA as the secondary compressor, rather than zlib.
-------------- next part --------------
=== modified file 'groupcompress.py'

--- a/groupcompress.py	2009-03-05 18:10:21 +0000
+++ b/groupcompress.py	2009-03-05 18:40:29 +0000
@@ -21,6 +21,7 @@
 from cStringIO import StringIO
 import struct
 import zlib
+import pylzma
 
 from bzrlib import (
     annotate,
@@ -167,15 +168,14 @@
             assert header_length == 0
             zcontent = bytes[pos2+1:]
             if zcontent:
-                out._content = zlib.decompress(zcontent)
+                out._content = pylzma.decompress(zcontent)
                 out._size = len(out._content)
             return out
         pos = pos2 + 1
         pos2 = pos + z_header_length
         z_header_bytes = bytes[pos:pos2]
         assert len(z_header_bytes) == z_header_length
-        d = zlib.decompressobj()
-        header_bytes = d.decompress(z_header_bytes)
+        header_bytes = pylzma.decompress(z_header_bytes)
         assert len(header_bytes) == header_length
         del z_header_bytes
         lines = header_bytes.split('\n')
@@ -199,8 +199,7 @@
             info_dict[key] = value
         zcontent = bytes[pos2:]
         if zcontent:
-            out._content = d.decompress(zcontent)
-            assert d.flush() == ''
+            out._content = pylzma.decompress(zcontent)
             out._size = header_len + len(out._content)
         return out
 
@@ -233,9 +232,9 @@
             elif entry.type == 'delta':
                 assert c == 'd'
         content_len, len_len = decode_base128_int(
-                                self._content[start + 1:start + 11])
+                            self._content[entry.start + 1:entry.start + 11])
         assert entry.length == content_len + 1 + len_len
-        content_start = start + 1 + len_len
+        content_start = entry.start + 1 + len_len
         end = entry.start + entry.length
         content = self._content[content_start:end]
         if c == 'f':
@@ -281,9 +280,8 @@
             chunks.append(chunk)
         bytes = ''.join(chunks)
         info_len = len(bytes)
-        c = zlib.compressobj()
         z_bytes = []
-        z_bytes.append(c.compress(bytes))
+        z_bytes.append(pylzma.compress(bytes))
         del bytes
         # TODO: we may want to have the header compressed in the same chain
         #       as the data, or we may not, evaulate it
@@ -292,16 +290,13 @@
         #       label in the header is duplicated in the text.
         #       For chk pages and real bytes, I would guess this is not
         #       true.
-        z_bytes.append(c.flush(zlib.Z_SYNC_FLUSH))
         z_len = sum(map(len, z_bytes))
         c_len = len(content)
         if _NO_LABELS:
             z_bytes = []
             z_len = 0
             info_len = 0
-            c = zlib.compressobj()
-        z_bytes.append(c.compress(content))
-        z_bytes.append(c.flush())
+        z_bytes.append(pylzma.compress(content))
         chunks = [self.GCB_HEADER,
                   '%d\n' % (z_len,),
                   '%d\n' % (info_len,),