Rev 2841: tuned_gzip.bytes_to_gzip() for faster commits (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Fri Sep 21 01:50:27 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2841
revision-id: pqm at pqm.ubuntu.com-20070921005024-anlkzk5nrdtujta4
parent: pqm at pqm.ubuntu.com-20070920235505-6w61gqyajy9i0ioj
parent: ian.clatworthy at internode.on.net-20070920224415-92hsfa28iflycxvz
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2007-09-21 01:50:24 +0100
message:
  tuned_gzip.bytes_to_gzip() for faster commits (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tuned_gzip.py           tuned_gzip.py-20060407014720-5aadc518e928e8d2
    ------------------------------------------------------------
    revno: 2839.1.1
    merged: ian.clatworthy at internode.on.net-20070920224415-92hsfa28iflycxvz
    parent: pqm at pqm.ubuntu.com-20070920125023-upjqmzln7mjtvj1h
    parent: robertc at robertcollins.net-20070913031607-fit1cj291o8yu1z2
    committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
    branch nick: ianc-integration2
    timestamp: Fri 2007-09-21 08:44:15 +1000
    message:
      tuned_gzip.bytes_to_gzip() for faster commits (Robert Collins)
    ------------------------------------------------------------
    revno: 2817.3.1
    merged: robertc at robertcollins.net-20070913031607-fit1cj291o8yu1z2
    parent: pqm at pqm.ubuntu.com-20070912222627-zvqit350mf6gvrbh
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: knits
    timestamp: Thu 2007-09-13 13:16:07 +1000
    message:
      * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string
        and returns a gzipped version of the same. This is used to avoid a bunch
        of api friction during adding of knit hunks. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS	2007-09-20 01:18:47 +0000
+++ b/NEWS	2007-09-20 22:44:15 +0000
@@ -91,6 +91,10 @@
    * New method on xml serialisers, write_inventory_to_lines, which matches the
      API used by knits for adding content. (Robert Collins)
 
+   * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string
+     and returns a gzipped version of the same. This is used to avoid a bunch
+     of api friction during adding of knit hunks. (Robert Collins)
+
   TESTING:
 
 

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-09-20 06:12:51 +0000
+++ b/bzrlib/knit.py	2007-09-21 00:50:24 +0000
@@ -100,7 +100,7 @@
     RevisionNotPresent,
     RevisionAlreadyPresent,
     )
-from bzrlib.tuned_gzip import GzipFile
+from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
 from bzrlib.osutils import (
     contains_whitespace,
     contains_linebreaks,
@@ -1978,22 +1978,15 @@
         
         :return: (len, a StringIO instance with the raw data ready to read.)
         """
-        sio = StringIO()
-        data_file = GzipFile(None, mode='wb', fileobj=sio,
-            compresslevel=Z_DEFAULT_COMPRESSION)
-
-        assert isinstance(version_id, str)
-        data_file.writelines(chain(
+        bytes = (''.join(chain(
             ["version %s %d %s\n" % (version_id,
                                      len(lines),
                                      digest)],
             lines,
-            ["end %s\n" % version_id]))
-        data_file.close()
-        length= sio.tell()
-
-        sio.seek(0)
-        return length, sio
+            ["end %s\n" % version_id])))
+        assert bytes.__class__ == str
+        compressed_bytes = bytes_to_gzip(bytes)
+        return len(compressed_bytes), compressed_bytes
 
     def add_raw_records(self, sizes, raw_data):
         """Append a prepared record to the data file.
@@ -2011,10 +2004,10 @@
         
         Returns index data for retrieving it later, as per add_raw_records.
         """
-        size, sio = self._record_to_data(version_id, digest, lines)
-        result = self.add_raw_records([size], sio.getvalue())
+        size, bytes = self._record_to_data(version_id, digest, lines)
+        result = self.add_raw_records([size], bytes)
         if self._do_cache:
-            self._cache[version_id] = sio.getvalue()
+            self._cache[version_id] = bytes
         return result[0]
 
     def _parse_record_header(self, version_id, raw_data):

=== modified file 'bzrlib/tuned_gzip.py'
--- a/bzrlib/tuned_gzip.py	2006-10-11 23:08:27 +0000
+++ b/bzrlib/tuned_gzip.py	2007-09-13 03:16:07 +0000
@@ -29,7 +29,37 @@
 # we want a \n preserved, break on \n only splitlines.
 import bzrlib
 
-__all__ = ["GzipFile"]
+__all__ = ["GzipFile", "bytes_to_gzip"]
+
+
+def bytes_to_gzip(bytes, factory=zlib.compressobj,
+    level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
+    width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
+    crc32=zlib.crc32):
+    """Create a gzip file containing bytes and return its content."""
+    result = [
+        '\037\213'  # self.fileobj.write('\037\213')  # magic header
+        '\010'      # self.fileobj.write('\010')      # compression method
+                    # fname = self.filename[:-3]
+                    # flags = 0
+                    # if fname:
+                    #     flags = FNAME
+        '\x00'      # self.fileobj.write(chr(flags))
+        '\0\0\0\0'  # write32u(self.fileobj, long(time.time()))
+        '\002'      # self.fileobj.write('\002')
+        '\377'      # self.fileobj.write('\377')
+                    # if fname:
+        ''          #     self.fileobj.write(fname + '\000')
+        ]
+    # using a compressobj avoids a small header and trailer that the compress()
+    # utility function adds.
+    compress = factory(level, method, width, mem, 0)
+    result.append(compress.compress(bytes))
+    result.append(compress.flush())
+    result.append(struct.pack("<L", LOWU32(crc32(bytes))))
+    # size may exceed 2GB, or even 4GB
+    result.append(struct.pack("<L", LOWU32(len(bytes))))
+    return ''.join(result)
 
 
 class GzipFile(gzip.GzipFile):




More information about the bazaar-commits mailing list