Rev 2818: * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string in http://people.ubuntu.com/~robertc/baz2.0/knits
Robert Collins
robertc at robertcollins.net
Thu Sep 13 04:16:30 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/knits
------------------------------------------------------------
revno: 2818
revision-id: robertc at robertcollins.net-20070913031607-fit1cj291o8yu1z2
parent: pqm at pqm.ubuntu.com-20070912222627-zvqit350mf6gvrbh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: knits
timestamp: Thu 2007-09-13 13:16:07 +1000
message:
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string
and returns a gzipped version of the same. This is used to avoid a bunch
of api friction during adding of knit hunks. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tuned_gzip.py tuned_gzip.py-20060407014720-5aadc518e928e8d2
=== modified file 'NEWS'
--- a/NEWS 2007-09-12 21:27:42 +0000
+++ b/NEWS 2007-09-13 03:16:07 +0000
@@ -22,7 +22,13 @@
* The ``VersionedFile`` interface now allows content checks to be bypassed
by supplying check_content=False. This saves nearly 30% of the minimum
cost to store a version of a file. (Robert Collins)
-
+
+ INTERNALS:
+
+ * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string
+ and returns a gzipped version of the same. This is used to avoid a bunch
+ of api friction during adding of knit hunks. (Robert Collins)
+
TESTING:
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-09-12 04:21:51 +0000
+++ b/bzrlib/knit.py 2007-09-13 03:16:07 +0000
@@ -100,7 +100,7 @@
RevisionNotPresent,
RevisionAlreadyPresent,
)
-from bzrlib.tuned_gzip import GzipFile
+from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
from bzrlib.osutils import (
contains_whitespace,
contains_linebreaks,
@@ -1983,22 +1983,15 @@
:return: (len, a StringIO instance with the raw data ready to read.)
"""
- sio = StringIO()
- data_file = GzipFile(None, mode='wb', fileobj=sio,
- compresslevel=Z_DEFAULT_COMPRESSION)
-
- assert isinstance(version_id, str)
- data_file.writelines(chain(
+ bytes = (''.join(chain(
["version %s %d %s\n" % (version_id,
len(lines),
digest)],
lines,
- ["end %s\n" % version_id]))
- data_file.close()
- length= sio.tell()
-
- sio.seek(0)
- return length, sio
+ ["end %s\n" % version_id])))
+ assert bytes.__class__ == str
+ compressed_bytes = bytes_to_gzip(bytes)
+ return len(compressed_bytes), compressed_bytes
def add_raw_records(self, sizes, raw_data):
"""Append a prepared record to the data file.
@@ -2016,10 +2009,10 @@
Returns index data for retrieving it later, as per add_raw_records.
"""
- size, sio = self._record_to_data(version_id, digest, lines)
- result = self.add_raw_records([size], sio.getvalue())
+ size, bytes = self._record_to_data(version_id, digest, lines)
+ result = self.add_raw_records([size], bytes)
if self._do_cache:
- self._cache[version_id] = sio.getvalue()
+ self._cache[version_id] = bytes
return result[0]
def _parse_record_header(self, version_id, raw_data):
=== modified file 'bzrlib/tuned_gzip.py'
--- a/bzrlib/tuned_gzip.py 2006-10-11 23:08:27 +0000
+++ b/bzrlib/tuned_gzip.py 2007-09-13 03:16:07 +0000
@@ -29,7 +29,37 @@
# we want a \n preserved, break on \n only splitlines.
import bzrlib
-__all__ = ["GzipFile"]
+__all__ = ["GzipFile", "bytes_to_gzip"]
+
+
+def bytes_to_gzip(bytes, factory=zlib.compressobj,
+ level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
+ width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
+ crc32=zlib.crc32):
+ """Create a gzip file containing bytes and return its content."""
+ result = [
+ '\037\213' # self.fileobj.write('\037\213') # magic header
+ '\010' # self.fileobj.write('\010') # compression method
+ # fname = self.filename[:-3]
+ # flags = 0
+ # if fname:
+ # flags = FNAME
+ '\x00' # self.fileobj.write(chr(flags))
+ '\0\0\0\0' # write32u(self.fileobj, long(time.time()))
+ '\002' # self.fileobj.write('\002')
+ '\377' # self.fileobj.write('\377')
+ # if fname:
+ '' # self.fileobj.write(fname + '\000')
+ ]
+ # using a compressobj avoids a small header and trailer that the compress()
+ # utility function adds.
+ compress = factory(level, method, width, mem, 0)
+ result.append(compress.compress(bytes))
+ result.append(compress.flush())
+ result.append(struct.pack("<L", LOWU32(crc32(bytes))))
+ # size may exceed 2GB, or even 4GB
+ result.append(struct.pack("<L", LOWU32(len(bytes))))
+ return ''.join(result)
class GzipFile(gzip.GzipFile):
More information about the bazaar-commits
mailing list