Rev 6580: (vila) Fix test failure for tuned_gzip. (Vincent Ladeuil) in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

Sat Jul 13 19:33:30 UTC 2013

At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6580 [merge]
revision-id: pqm at pqm.ubuntu.com-20130713193329-toxd5u7s4tk19fb0
parent: pqm at pqm.ubuntu.com-20130624130533-51c8htekjd8mix3u
parent: v.ladeuil+lp at free.fr-20130713190524-3bclzq4hpwkd6hkw
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sat 2013-07-13 19:33:29 +0000
message:
  (vila) Fix test failure for tuned_gzip. (Vincent Ladeuil)
modified:
  bzrlib/tests/test_tuned_gzip.py test_tuned_gzip.py-20060418042056-c576dfc708984968
  bzrlib/tuned_gzip.py           tuned_gzip.py-20060407014720-5aadc518e928e8d2
  doc/en/release-notes/bzr-2.6.txt bzr2.6.txt-20120116134316-8w1xxom1c7vcu1t5-1
=== modified file 'bzrlib/tests/test_tuned_gzip.py'

--- a/bzrlib/tests/test_tuned_gzip.py	2011-05-13 12:51:05 +0000
+++ b/bzrlib/tests/test_tuned_gzip.py	2013-07-09 07:58:10 +0000
@@ -106,14 +106,17 @@
 class TestToGzip(tests.TestCase):
 
     def assertToGzip(self, chunks):
-        bytes = ''.join(chunks)
+        raw_bytes = ''.join(chunks)
         gzfromchunks = tuned_gzip.chunks_to_gzip(chunks)
-        gzfrombytes = tuned_gzip.bytes_to_gzip(bytes)
+        gzfrombytes = tuned_gzip.bytes_to_gzip(raw_bytes)
         self.assertEqual(gzfrombytes, gzfromchunks)
         decoded = self.applyDeprecated(
             symbol_versioning.deprecated_in((2, 3, 0)),
             tuned_gzip.GzipFile, fileobj=StringIO(gzfromchunks)).read()
-        self.assertEqual(bytes, decoded)
+        lraw, ldecoded = len(raw_bytes), len(decoded)
+        self.assertEqual(lraw, ldecoded,
+                         'Expecting data length %d, got %d' % (lraw, ldecoded))
+        self.assertEqual(raw_bytes, decoded)
 
     def test_single_chunk(self):
         self.assertToGzip(['a modest chunk\nwith some various\nbits\n'])

=== modified file 'bzrlib/tuned_gzip.py'
--- a/bzrlib/tuned_gzip.py	2011-12-19 13:23:58 +0000
+++ b/bzrlib/tuned_gzip.py	2013-07-13 19:05:24 +0000
@@ -127,15 +127,28 @@
             DeprecationWarning, stacklevel=2)
         gzip.GzipFile.__init__(self, *args, **kwargs)
 
-    def _add_read_data(self, data):
-        # 4169 calls in 183
-        # temp var for len(data) and switch to +='s.
-        # 4169 in 139
-        len_data = len(data)
-        self.crc = zlib.crc32(data, self.crc)
-        self.extrabuf += data
-        self.extrasize += len_data
-        self.size += len_data
+    if sys.version_info >= (2, 7, 4):
+        def _add_read_data(self, data):
+            # 4169 calls in 183
+            # temp var for len(data) and switch to +='s.
+            # 4169 in 139
+            len_data = len(data)
+            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+            offset = self.offset - self.extrastart
+            self.extrabuf = self.extrabuf[offset:] + data
+            self.extrasize = self.extrasize + len_data
+            self.extrastart = self.offset
+            self.size = self.size + len_data
+    else:
+        def _add_read_data(self, data):
+            # 4169 calls in 183
+            # temp var for len(data) and switch to +='s.
+            # 4169 in 139
+            len_data = len(data)
+            self.crc = zlib.crc32(data, self.crc)
+            self.extrabuf += data
+            self.extrasize += len_data
+            self.size += len_data
 
     def _write_gzip_header(self):
         """A tuned version of gzip._write_gzip_header
@@ -161,97 +174,98 @@
             ''          #     self.fileobj.write(fname + '\000')
             )
 
-    def _read(self, size=1024):
-        # various optimisations:
-        # reduces lsprof count from 2500 to
-        # 8337 calls in 1272, 365 internal
-        if self.fileobj is None:
-            raise EOFError, "Reached EOF"
-
-        if self._new_member:
-            # If the _new_member flag is set, we have to
-            # jump to the next member, if there is one.
-            #
-            # First, check if we're at the end of the file;
-            # if so, it's time to stop; no more members to read.
-            next_header_bytes = self.fileobj.read(10)
-            if next_header_bytes == '':
+    if sys.version_info < (2, 7, 4):
+        def _read(self, size=1024):
+            # various optimisations:
+            # reduces lsprof count from 2500 to
+            # 8337 calls in 1272, 365 internal
+            if self.fileobj is None:
                 raise EOFError, "Reached EOF"
 
-            self._init_read()
-            self._read_gzip_header(next_header_bytes)
-            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
-            self._new_member = False
-
-        # Read a chunk of data from the file
-        buf = self.fileobj.read(size)
-
-        # If the EOF has been reached, flush the decompression object
-        # and mark this object as finished.
-
-        if buf == "":
-            self._add_read_data(self.decompress.flush())
-            if len(self.decompress.unused_data) < 8:
-                raise AssertionError("what does flush do?")
-            self._gzip_tail = self.decompress.unused_data[0:8]
-            self._read_eof()
-            # tell the driving read() call we have stuffed all the data
-            # in self.extrabuf
-            raise EOFError, 'Reached EOF'
-
-        self._add_read_data(self.decompress.decompress(buf))
-
-        if self.decompress.unused_data != "":
-            # Ending case: we've come to the end of a member in the file,
-            # so seek back to the start of the data for the next member which
-            # is the length of the decompress objects unused data - the first
-            # 8 bytes for the end crc and size records.
-            #
-            # so seek back to the start of the unused data, finish up
-            # this member, and read a new gzip header.
-            # (The number of bytes to seek back is the length of the unused
-            # data, minus 8 because those 8 bytes are part of this member.
-            seek_length = len (self.decompress.unused_data) - 8
-            if seek_length > 0:
-                # we read too much data
-                self.fileobj.seek(-seek_length, 1)
+            if self._new_member:
+                # If the _new_member flag is set, we have to
+                # jump to the next member, if there is one.
+                #
+                # First, check if we're at the end of the file;
+                # if so, it's time to stop; no more members to read.
+                next_header_bytes = self.fileobj.read(10)
+                if next_header_bytes == '':
+                    raise EOFError, "Reached EOF"
+
+                self._init_read()
+                self._read_gzip_header(next_header_bytes)
+                self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+                self._new_member = False
+
+            # Read a chunk of data from the file
+            buf = self.fileobj.read(size)
+
+            # If the EOF has been reached, flush the decompression object
+            # and mark this object as finished.
+
+            if buf == "":
+                self._add_read_data(self.decompress.flush())
+                if len(self.decompress.unused_data) < 8:
+                    raise AssertionError("what does flush do?")
                 self._gzip_tail = self.decompress.unused_data[0:8]
-            elif seek_length < 0:
-                # we haven't read enough to check the checksum.
-                if not (-8 < seek_length):
-                    raise AssertionError("too great a seek")
-                buf = self.fileobj.read(-seek_length)
-                self._gzip_tail = self.decompress.unused_data + buf
-            else:
-                self._gzip_tail = self.decompress.unused_data
-
-            # Check the CRC and file size, and set the flag so we read
-            # a new member on the next call
-            self._read_eof()
-            self._new_member = True
-
-    def _read_eof(self):
-        """tuned to reduce function calls and eliminate file seeking:
-        pass 1:
-        reduces lsprof count from 800 to 288
-        4168 in 296
-        avoid U32 call by using struct format L
-        4168 in 200
-        """
-        # We've read to the end of the file, so we should have 8 bytes of
-        # unused data in the decompressor. If we don't, there is a corrupt file.
-        # We use these 8 bytes to calculate the CRC and the recorded file size.
-        # We then check the that the computed CRC and size of the
-        # uncompressed data matches the stored values.  Note that the size
-        # stored is the true file size mod 2**32.
-        if not (len(self._gzip_tail) == 8):
-            raise AssertionError("gzip trailer is incorrect length.")
-        crc32, isize = struct.unpack("<LL", self._gzip_tail)
-        # note that isize is unsigned - it can exceed 2GB
-        if crc32 != U32(self.crc):
-            raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
-        elif isize != LOWU32(self.size):
-            raise IOError, "Incorrect length of data produced"
+                self._read_eof()
+                # tell the driving read() call we have stuffed all the data
+                # in self.extrabuf
+                raise EOFError, 'Reached EOF'
+
+            self._add_read_data(self.decompress.decompress(buf))
+
+            if self.decompress.unused_data != "":
+                # Ending case: we've come to the end of a member in the file,
+                # so seek back to the start of the data for the next member
+                # which is the length of the decompress objects unused data -
+                # the first 8 bytes for the end crc and size records.
+                #
+                # so seek back to the start of the unused data, finish up
+                # this member, and read a new gzip header.
+                # (The number of bytes to seek back is the length of the unused
+                # data, minus 8 because those 8 bytes are part of this member.
+                seek_length = len (self.decompress.unused_data) - 8
+                if seek_length > 0:
+                    # we read too much data
+                    self.fileobj.seek(-seek_length, 1)
+                    self._gzip_tail = self.decompress.unused_data[0:8]
+                elif seek_length < 0:
+                    # we haven't read enough to check the checksum.
+                    if not (-8 < seek_length):
+                        raise AssertionError("too great a seek")
+                    buf = self.fileobj.read(-seek_length)
+                    self._gzip_tail = self.decompress.unused_data + buf
+                else:
+                    self._gzip_tail = self.decompress.unused_data
+
+                # Check the CRC and file size, and set the flag so we read
+                # a new member on the next call
+                self._read_eof()
+                self._new_member = True
+
+        def _read_eof(self):
+            """tuned to reduce function calls and eliminate file seeking:
+            pass 1:
+            reduces lsprof count from 800 to 288
+            4168 in 296
+            avoid U32 call by using struct format L
+            4168 in 200
+            """
+            # We've read to the end of the file, so we should have 8 bytes of
+            # unused data in the decompressor. If we don't, there is a corrupt
+            # file.  We use these 8 bytes to calculate the CRC and the recorded
+            # file size.  We then check the that the computed CRC and size of
+            # the uncompressed data matches the stored values.  Note that the
+            # size stored is the true file size mod 2**32.
+            if not (len(self._gzip_tail) == 8):
+                raise AssertionError("gzip trailer is incorrect length.")
+            crc32, isize = struct.unpack("<LL", self._gzip_tail)
+            # note that isize is unsigned - it can exceed 2GB
+            if crc32 != U32(self.crc):
+                raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
+            elif isize != LOWU32(self.size):
+                raise IOError, "Incorrect length of data produced"
 
     def _read_gzip_header(self, bytes=None):
         """Supply bytes if the minimum header size is already read.

=== modified file 'doc/en/release-notes/bzr-2.6.txt'
--- a/doc/en/release-notes/bzr-2.6.txt	2013-05-27 09:13:55 +0000
+++ b/doc/en/release-notes/bzr-2.6.txt	2013-07-09 07:47:49 +0000
@@ -103,6 +103,11 @@
 * The launchpad plugin now requires API 1.6.0 or later.  This version shipped
   with Ubuntu 9.10.  (Aaron Bentley)
 
+* Better align with upstream gzip.py in tuned_gzip.py. We may lose a bit of
+  performance but that's for knit and weave formats and already partly
+  deprecated, better keep compatibility than failing fast ;)
+  (Vincent Ladeuil, #1116079)
+
 Testing
 *******