Rev 2488: Handle the extra fields. in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/gzip_reader

Wed May 9 06:06:58 BST 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/gzip_reader

------------------------------------------------------------
revno: 2488
revision-id: john at arbash-meinel.com-20070509050629-72vivj9la5gn959g
parent: john at arbash-meinel.com-20070509043821-utn042iu0k4xyojw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: gzip_reader
timestamp: Wed 2007-05-09 00:06:29 -0500
message:
  Handle the extra fields.
modified:
  bzrlib/tests/test_tuned_gzip.py test_tuned_gzip.py-20060418042056-c576dfc708984968
  bzrlib/tuned_gzip.py           tuned_gzip.py-20060407014720-5aadc518e928e8d2
-------------- next part --------------
=== modified file 'bzrlib/tests/test_tuned_gzip.py'

--- a/bzrlib/tests/test_tuned_gzip.py	2007-05-09 04:38:21 +0000
+++ b/bzrlib/tests/test_tuned_gzip.py	2007-05-09 05:06:29 +0000
@@ -137,6 +137,10 @@
     def assertDecompress(self, expected, gzip_hunk):
         """Check that tuned_gzip.decompress_gzip_hunk() returns valid text.
         """
+        # Make sure the gzip_hunk is correct
+        plain_text = tuned_gzip.GzipFile(mode='rb',
+                                         fileobj=StringIO(gzip_hunk)).read()
+        self.assertEqual(expected, plain_text)
         plain_text = tuned_gzip.decompress_gzip_hunk(gzip_hunk)
         self.assertEqual(expected, plain_text)
 
@@ -198,8 +202,75 @@
             '\x01\x00\x00\x00'  # size
             )
 
-    # TODO: Test that decompress_gzip_hunk supports the extra flags in the gzip
-    # header. FEXTRA, FCOMMENT, FNAME, FHCRC
+    def test_decompress_fextra(self):
+        self.assertDecompress('a',
+            '\x1f\x8b'          # magic num
+            '\x08'              # compression 
+            '\x04'              # flag | FEXTRA
+            '\x00\x00\x00\x00'  # timestamp
+            '\x02\xff'          # ???
+            '\x01\x00'          # extra length
+            '\xff'              # extra
+            'K\x04\x00'         # zlib
+            'C\xbe\xb7\xe8'     # crc32
+            '\x01\x00\x00\x00'  # size
+            )
+
+    def test_decompress_fname(self):
+        self.assertDecompress('a',
+            '\x1f\x8b'          # magic num
+            '\x08'              # compression 
+            '\x08'              # flag | FNAME
+            '\x00\x00\x00\x00'  # timestamp
+            '\x02\xff'          # ???
+            'this is a name\x00'# null terminated filename
+            'K\x04\x00'         # zlib
+            'C\xbe\xb7\xe8'     # crc32
+            '\x01\x00\x00\x00'  # size
+            )
+
+    def test_decompress_fcomment(self):
+        self.assertDecompress('a',
+            '\x1f\x8b'          # magic num
+            '\x08'              # compression 
+            '\x10'              # flag | FCOMMENT
+            '\x00\x00\x00\x00'  # timestamp
+            '\x02\xff'          # ???
+            'comment\x00'       # null terminated comment
+            'K\x04\x00'         # zlib
+            'C\xbe\xb7\xe8'     # crc32
+            '\x01\x00\x00\x00'  # size
+            )
+
+    def test_decompress_fcrc(self):
+        self.assertDecompress('a',
+            '\x1f\x8b'          # magic num
+            '\x08'              # compression 
+            '\x02'              # flag | FHCRC
+            '\x00\x00\x00\x00'  # timestamp
+            '\x02\xff'          # ???
+            '\xff\xff'          # 16-bit header crc
+            'K\x04\x00'         # zlib
+            'C\xbe\xb7\xe8'     # crc32
+            '\x01\x00\x00\x00'  # size
+            )
+
+    def test_decompress_f_all(self):
+        self.assertDecompress('a',
+            '\x1f\x8b'          # magic num
+            '\x08'              # compression 
+            '\x1e'              # flag | FHCRC | FCOMMENT | FNAME | FEXTRA
+            '\x00\x00\x00\x00'  # timestamp
+            '\x02\xff'          # ???
+            '\x02\x00'          # extra length
+            '\xff\xaa'          # extra
+            'this is a name\x00'# null terminated filename
+            'comment\x00'       # null terminated comment
+            '\xff\xff'          # 16-bit header crc
+            'K\x04\x00'         # zlib
+            'C\xbe\xb7\xe8'     # crc32
+            '\x01\x00\x00\x00'  # size
+            )
 
     def test_decompress_not_a_gzip_string(self):
         self.assertRaises(IOError, tuned_gzip.decompress_gzip_hunk,

=== modified file 'bzrlib/tuned_gzip.py'
--- a/bzrlib/tuned_gzip.py	2007-05-09 04:38:21 +0000
+++ b/bzrlib/tuned_gzip.py	2007-05-09 05:06:29 +0000
@@ -381,8 +381,29 @@
         # os = self.fileobj.read(1) (remaining[9:10])
 
         # TODO: handle the flags field
+        pos = 10
+        if flag & FEXTRA:
+            # skip past the extra field, if present
+            xlen = struct.unpack('<h', remaining[pos:pos+2])[0]
+            pos += 2 + xlen
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            end = remaining.find('\x00', pos)
+            if end == -1:
+                pos = len(remaining)
+            else:
+                pos = end+1
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing the comment
+            end = remaining.find('\x00', pos)
+            if end == -1:
+                pos = len(remaining)
+            else:
+                pos = end+1
+        if flag & FHCRC:
+            pos += 2 # discard the 16-bit header CRC
 
-        remaining = remaining[10:]
+        remaining = remaining[pos:]
 
         crc = zlib.crc32('')
         decompressor = zlib.decompressobj(-zlib.MAX_WBITS)