Rev 6580: (vila) Fix test failure for tuned_gzip. (Vincent Ladeuil) in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
Patch Queue Manager
pqm at pqm.ubuntu.com
Sat Jul 13 19:33:30 UTC 2013
At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 6580 [merge]
revision-id: pqm at pqm.ubuntu.com-20130713193329-toxd5u7s4tk19fb0
parent: pqm at pqm.ubuntu.com-20130624130533-51c8htekjd8mix3u
parent: v.ladeuil+lp at free.fr-20130713190524-3bclzq4hpwkd6hkw
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sat 2013-07-13 19:33:29 +0000
message:
(vila) Fix test failure for tuned_gzip. (Vincent Ladeuil)
modified:
bzrlib/tests/test_tuned_gzip.py test_tuned_gzip.py-20060418042056-c576dfc708984968
bzrlib/tuned_gzip.py tuned_gzip.py-20060407014720-5aadc518e928e8d2
doc/en/release-notes/bzr-2.6.txt bzr2.6.txt-20120116134316-8w1xxom1c7vcu1t5-1
=== modified file 'bzrlib/tests/test_tuned_gzip.py'
--- a/bzrlib/tests/test_tuned_gzip.py 2011-05-13 12:51:05 +0000
+++ b/bzrlib/tests/test_tuned_gzip.py 2013-07-09 07:58:10 +0000
@@ -106,14 +106,17 @@
class TestToGzip(tests.TestCase):
def assertToGzip(self, chunks):
- bytes = ''.join(chunks)
+ raw_bytes = ''.join(chunks)
gzfromchunks = tuned_gzip.chunks_to_gzip(chunks)
- gzfrombytes = tuned_gzip.bytes_to_gzip(bytes)
+ gzfrombytes = tuned_gzip.bytes_to_gzip(raw_bytes)
self.assertEqual(gzfrombytes, gzfromchunks)
decoded = self.applyDeprecated(
symbol_versioning.deprecated_in((2, 3, 0)),
tuned_gzip.GzipFile, fileobj=StringIO(gzfromchunks)).read()
- self.assertEqual(bytes, decoded)
+ lraw, ldecoded = len(raw_bytes), len(decoded)
+ self.assertEqual(lraw, ldecoded,
+ 'Expecting data length %d, got %d' % (lraw, ldecoded))
+ self.assertEqual(raw_bytes, decoded)
def test_single_chunk(self):
self.assertToGzip(['a modest chunk\nwith some various\nbits\n'])
=== modified file 'bzrlib/tuned_gzip.py'
--- a/bzrlib/tuned_gzip.py 2011-12-19 13:23:58 +0000
+++ b/bzrlib/tuned_gzip.py 2013-07-13 19:05:24 +0000
@@ -127,15 +127,28 @@
DeprecationWarning, stacklevel=2)
gzip.GzipFile.__init__(self, *args, **kwargs)
- def _add_read_data(self, data):
- # 4169 calls in 183
- # temp var for len(data) and switch to +='s.
- # 4169 in 139
- len_data = len(data)
- self.crc = zlib.crc32(data, self.crc)
- self.extrabuf += data
- self.extrasize += len_data
- self.size += len_data
+ if sys.version_info >= (2, 7, 4):
+ def _add_read_data(self, data):
+ # 4169 calls in 183
+ # temp var for len(data) and switch to +='s.
+ # 4169 in 139
+ len_data = len(data)
+ self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+ offset = self.offset - self.extrastart
+ self.extrabuf = self.extrabuf[offset:] + data
+ self.extrasize = self.extrasize + len_data
+ self.extrastart = self.offset
+ self.size = self.size + len_data
+ else:
+ def _add_read_data(self, data):
+ # 4169 calls in 183
+ # temp var for len(data) and switch to +='s.
+ # 4169 in 139
+ len_data = len(data)
+ self.crc = zlib.crc32(data, self.crc)
+ self.extrabuf += data
+ self.extrasize += len_data
+ self.size += len_data
def _write_gzip_header(self):
"""A tuned version of gzip._write_gzip_header
@@ -161,97 +174,98 @@
'' # self.fileobj.write(fname + '\000')
)
- def _read(self, size=1024):
- # various optimisations:
- # reduces lsprof count from 2500 to
- # 8337 calls in 1272, 365 internal
- if self.fileobj is None:
- raise EOFError, "Reached EOF"
-
- if self._new_member:
- # If the _new_member flag is set, we have to
- # jump to the next member, if there is one.
- #
- # First, check if we're at the end of the file;
- # if so, it's time to stop; no more members to read.
- next_header_bytes = self.fileobj.read(10)
- if next_header_bytes == '':
+ if sys.version_info < (2, 7, 4):
+ def _read(self, size=1024):
+ # various optimisations:
+ # reduces lsprof count from 2500 to
+ # 8337 calls in 1272, 365 internal
+ if self.fileobj is None:
raise EOFError, "Reached EOF"
- self._init_read()
- self._read_gzip_header(next_header_bytes)
- self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
- self._new_member = False
-
- # Read a chunk of data from the file
- buf = self.fileobj.read(size)
-
- # If the EOF has been reached, flush the decompression object
- # and mark this object as finished.
-
- if buf == "":
- self._add_read_data(self.decompress.flush())
- if len(self.decompress.unused_data) < 8:
- raise AssertionError("what does flush do?")
- self._gzip_tail = self.decompress.unused_data[0:8]
- self._read_eof()
- # tell the driving read() call we have stuffed all the data
- # in self.extrabuf
- raise EOFError, 'Reached EOF'
-
- self._add_read_data(self.decompress.decompress(buf))
-
- if self.decompress.unused_data != "":
- # Ending case: we've come to the end of a member in the file,
- # so seek back to the start of the data for the next member which
- # is the length of the decompress objects unused data - the first
- # 8 bytes for the end crc and size records.
- #
- # so seek back to the start of the unused data, finish up
- # this member, and read a new gzip header.
- # (The number of bytes to seek back is the length of the unused
- # data, minus 8 because those 8 bytes are part of this member.
- seek_length = len (self.decompress.unused_data) - 8
- if seek_length > 0:
- # we read too much data
- self.fileobj.seek(-seek_length, 1)
+ if self._new_member:
+ # If the _new_member flag is set, we have to
+ # jump to the next member, if there is one.
+ #
+ # First, check if we're at the end of the file;
+ # if so, it's time to stop; no more members to read.
+ next_header_bytes = self.fileobj.read(10)
+ if next_header_bytes == '':
+ raise EOFError, "Reached EOF"
+
+ self._init_read()
+ self._read_gzip_header(next_header_bytes)
+ self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+ self._new_member = False
+
+ # Read a chunk of data from the file
+ buf = self.fileobj.read(size)
+
+ # If the EOF has been reached, flush the decompression object
+ # and mark this object as finished.
+
+ if buf == "":
+ self._add_read_data(self.decompress.flush())
+ if len(self.decompress.unused_data) < 8:
+ raise AssertionError("what does flush do?")
self._gzip_tail = self.decompress.unused_data[0:8]
- elif seek_length < 0:
- # we haven't read enough to check the checksum.
- if not (-8 < seek_length):
- raise AssertionError("too great a seek")
- buf = self.fileobj.read(-seek_length)
- self._gzip_tail = self.decompress.unused_data + buf
- else:
- self._gzip_tail = self.decompress.unused_data
-
- # Check the CRC and file size, and set the flag so we read
- # a new member on the next call
- self._read_eof()
- self._new_member = True
-
- def _read_eof(self):
- """tuned to reduce function calls and eliminate file seeking:
- pass 1:
- reduces lsprof count from 800 to 288
- 4168 in 296
- avoid U32 call by using struct format L
- 4168 in 200
- """
- # We've read to the end of the file, so we should have 8 bytes of
- # unused data in the decompressor. If we don't, there is a corrupt file.
- # We use these 8 bytes to calculate the CRC and the recorded file size.
- # We then check the that the computed CRC and size of the
- # uncompressed data matches the stored values. Note that the size
- # stored is the true file size mod 2**32.
- if not (len(self._gzip_tail) == 8):
- raise AssertionError("gzip trailer is incorrect length.")
- crc32, isize = struct.unpack("<LL", self._gzip_tail)
- # note that isize is unsigned - it can exceed 2GB
- if crc32 != U32(self.crc):
- raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
- elif isize != LOWU32(self.size):
- raise IOError, "Incorrect length of data produced"
+ self._read_eof()
+ # tell the driving read() call we have stuffed all the data
+ # in self.extrabuf
+ raise EOFError, 'Reached EOF'
+
+ self._add_read_data(self.decompress.decompress(buf))
+
+ if self.decompress.unused_data != "":
+ # Ending case: we've come to the end of a member in the file,
+ # so seek back to the start of the data for the next member
+ # which is the length of the decompress objects unused data -
+ # the first 8 bytes for the end crc and size records.
+ #
+ # so seek back to the start of the unused data, finish up
+ # this member, and read a new gzip header.
+ # (The number of bytes to seek back is the length of the unused
+ # data, minus 8 because those 8 bytes are part of this member.
+ seek_length = len (self.decompress.unused_data) - 8
+ if seek_length > 0:
+ # we read too much data
+ self.fileobj.seek(-seek_length, 1)
+ self._gzip_tail = self.decompress.unused_data[0:8]
+ elif seek_length < 0:
+ # we haven't read enough to check the checksum.
+ if not (-8 < seek_length):
+ raise AssertionError("too great a seek")
+ buf = self.fileobj.read(-seek_length)
+ self._gzip_tail = self.decompress.unused_data + buf
+ else:
+ self._gzip_tail = self.decompress.unused_data
+
+ # Check the CRC and file size, and set the flag so we read
+ # a new member on the next call
+ self._read_eof()
+ self._new_member = True
+
+ def _read_eof(self):
+ """tuned to reduce function calls and eliminate file seeking:
+ pass 1:
+ reduces lsprof count from 800 to 288
+ 4168 in 296
+ avoid U32 call by using struct format L
+ 4168 in 200
+ """
+ # We've read to the end of the file, so we should have 8 bytes of
+ # unused data in the decompressor. If we don't, there is a corrupt
+ # file. We use these 8 bytes to calculate the CRC and the recorded
+ # file size. We then check the that the computed CRC and size of
+ # the uncompressed data matches the stored values. Note that the
+ # size stored is the true file size mod 2**32.
+ if not (len(self._gzip_tail) == 8):
+ raise AssertionError("gzip trailer is incorrect length.")
+ crc32, isize = struct.unpack("<LL", self._gzip_tail)
+ # note that isize is unsigned - it can exceed 2GB
+ if crc32 != U32(self.crc):
+ raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
+ elif isize != LOWU32(self.size):
+ raise IOError, "Incorrect length of data produced"
def _read_gzip_header(self, bytes=None):
"""Supply bytes if the minimum header size is already read.
=== modified file 'doc/en/release-notes/bzr-2.6.txt'
--- a/doc/en/release-notes/bzr-2.6.txt 2013-05-27 09:13:55 +0000
+++ b/doc/en/release-notes/bzr-2.6.txt 2013-07-09 07:47:49 +0000
@@ -103,6 +103,11 @@
* The launchpad plugin now requires API 1.6.0 or later. This version shipped
with Ubuntu 9.10. (Aaron Bentley)
+* Better align with upstream gzip.py in tuned_gzip.py. We may lose a bit of
+ performance but that's for knit and weave formats and already partly
+ deprecated, better keep compatibility than failing fast ;)
+ (Vincent Ladeuil, #1116079)
+
Testing
*******
More information about the bazaar-commits
mailing list