Rev 2914: Switch to using raw zlib instead of gzip. in http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents
John Arbash Meinel
john at arbash-meinel.com
Sat Oct 27 00:00:14 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents
------------------------------------------------------------
revno: 2914
revision-id:john at arbash-meinel.com-20071026225938-ownnbnj3nng3pfes
parent: john at arbash-meinel.com-20071026220603-ks85ogcjuw13cw1k
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parents
timestamp: Fri 2007-10-26 17:59:38 -0500
message:
Switch to using raw zlib instead of gzip.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-10-26 22:06:03 +0000
+++ b/bzrlib/knit.py 2007-10-26 22:59:38 +0000
@@ -67,7 +67,7 @@
import os
import sys
import warnings
-from zlib import Z_DEFAULT_COMPRESSION
+import zlib
import bzrlib
from bzrlib.lazy_import import lazy_import
@@ -740,7 +740,7 @@
# produce.
raw_data = reader_callable(length)
my_fulltext_sha1 = self.get_sha1(version_id)
- df, rec = self._data._parse_record_header(version_id, raw_data)
+ rec = self._data._parse_record_header(version_id, raw_data)
stream_fulltext_sha1 = rec[3]
if my_fulltext_sha1 != stream_fulltext_sha1:
# Actually, we don't know if it's this knit that's corrupt,
@@ -2088,7 +2088,8 @@
raise KnitCorrupt(self._access,
"While reading {%s} got %s(%s)"
% (version_id, e.__class__.__name__, str(e)))
- return df, rec
+ df.close()
+ return rec
def _check_header(self, version_id, line):
rec = line.split()
@@ -2162,8 +2163,7 @@
self._cache[version_id] = data
# validate the header
- df, rec = self._parse_record_header(version_id, data)
- df.close()
+ rec = self._parse_record_header(version_id, data)
yield version_id, data
def read_records_iter(self, records):
@@ -2261,7 +2261,7 @@
dense_lines or lines,
["end %s\n" % version_id]))
assert bytes.__class__ == str
- compressed_bytes = bytes_to_gzip(bytes)
+ compressed_bytes = zlib.compress(bytes)
return len(compressed_bytes), compressed_bytes
def _check_header(self, version_id, line):
@@ -2343,17 +2343,69 @@
parents = rec[8:8+num_parents]
return (rec, num_lines, rec[3], has_eol, text_kind, basis, parents)
+ def _parse_record_header(self, version_id, raw_data):
+ """Parse a record header for consistency.
+
+ :return: the header and the decompressor stream.
+ as (stream, header_record)
+ """
+ try:
+ # TODO: use a decompressobj instead so we don't have to decompress
+ # the whole stream
+ data = zlib.decompress(raw_data)
+ header_newline = data.find('\n')
+ assert header_newline != -1
+ header = data[:header_newline+1]
+ rec = self._check_header(version_id, header)
+ except Exception, e:
+ raise KnitCorrupt(self._access,
+ "While reading {%s} got %s(%s)"
+ % (version_id, e.__class__.__name__, str(e)))
+ return rec
+
+ def _parse_record(self, version_id, data):
+ # profiling notes:
+ # 4168 calls in 2880 217 internal
+ # 4168 calls to _parse_record_header in 2121
+ # 4168 calls to readlines in 330
+ data = zlib.decompress(raw_data)
+ df = StringIO(data)
+ try:
+ record_contents = df.readlines()
+ except Exception, e:
+ raise KnitCorrupt(self._access,
+ "While reading {%s} got %s(%s)"
+ % (version_id, e.__class__.__name__, str(e)))
+ df.close()
+ header = record_contents.pop(0)
+ (rec, num_lines, digest, has_eol, text_kind, basis,
+ parents) = self._check_info_header(version_id, header)
+
+ last_line = record_contents.pop()
+ if len(record_contents) != num_lines:
+ raise KnitCorrupt(self._access,
+ 'incorrect number of lines %s != %s'
+ ' for version {%s}'
+ % (len(record_contents), int(rec[2]),
+ version_id))
+ if last_line != 'end %s\n' % (version_id,):
+ raise KnitCorrupt(self._access,
+ 'unexpected version end line %r, wanted %r'
+ % (last_line, version_id))
+ return record_contents, digest
+
def read_info_record(self, version_id, index_memo):
"""Read records"""
raw_data = list(self._access.get_raw_records([index_memo]))[0]
- df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
+ data = zlib.decompress(raw_data)
+ df = StringIO(data)
lines = df.readlines()
df.close()
header_line = lines.pop(0)
last_line = lines.pop()
if last_line != 'end %s\n' % (version_id,):
raise KnitCorrupt(self._access,
- 'unexpected version end line %r, wanted %r'
+ 'unexpected version end line %r, wanted %r'
% (last_line, version_id))
try:
(rec, num_lines, digest, has_eol, text_kind, basis,
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2007-10-26 21:29:59 +0000
+++ b/bzrlib/tests/test_knit.py 2007-10-26 22:59:38 +0000
@@ -20,6 +20,7 @@
import difflib
import sha
import sys
+import zlib
from bzrlib import (
errors,
@@ -509,50 +510,49 @@
def test_read_info_record(self):
sha1sum = sha.new('foo\nbar\n').hexdigest()
- gz_txt = tuned_gzip.bytes_to_gzip('version rev-id-1 2 %s eol full 0\n'
- 'foo\n'
- 'bar\n'
- 'end rev-id-1\n'
- % (sha1sum,))
- transport = MockTransport([gz_txt])
+ z_data = zlib.compress(
+ 'version rev-id-1 2 %s eol full 0\n'
+ 'foo\n'
+ 'bar\n'
+ 'end rev-id-1\n'
+ % (sha1sum,))
+ transport = MockTransport([z_data])
access = _KnitAccess(transport, 'filename', None, None, False, False)
data = knit._KnitDataV2(access=access)
- index_memo = (None, 0, len(gz_txt))
+ index_memo = (None, 0, len(z_data))
self.assertEqual((['foo\n', 'bar\n'], sha1sum, True, 'full', None, []),
data.read_info_record('rev-id-1', index_memo))
def test__record_to_data_basic(self):
knit_data = knit._KnitDataV2(None)
- data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ data_len, z_data = knit_data._record_to_data('version-id', 'digest',
['two\n', 'lines\n'])
- self.assertEqual(data_len, len(gz_data))
- data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data),
- mode='rb').read()
+ self.assertEqual(data_len, len(z_data))
+ data = zlib.decompress(z_data)
self.assertEqualDiff('version version-id 2 digest eol full 0\n'
'two\n'
'lines\n'
'end version-id\n',
data)
- transport = MockTransport([gz_data])
+ transport = MockTransport([z_data])
access = _KnitAccess(transport, 'filename', None, None, False, False)
knit_data = knit._KnitDataV2(access=access)
- index_memo = (None, 0, len(gz_data))
+ index_memo = (None, 0, len(z_data))
self.assertEqual((['two\n', 'lines\n'], 'digest', True, 'full', None, []),
knit_data.read_info_record('version-id', index_memo))
def test__record_to_data_extra(self):
knit_data = knit._KnitDataV2(None)
- data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ data_len, z_data = knit_data._record_to_data('version-id', 'digest',
['two\n', 'lines\n'],
['dense\nlines\n'], # Intentionally different
parents=['parent-version-id'],
has_eol=False,
delta_parent='delta-version-id',
)
- self.assertEqual(data_len, len(gz_data))
- data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data),
- mode='rb').read()
+ self.assertEqual(data_len, len(z_data))
+ data = zlib.decompress(z_data)
self.assertEqualDiff('version version-id 2 digest no-eol'
' linedelta delta-version-id'
' 1 parent-version-id\n'
@@ -560,10 +560,10 @@
'lines\n'
'end version-id\n',
data)
- transport = MockTransport([gz_data])
+ transport = MockTransport([z_data])
access = _KnitAccess(transport, 'filename', None, None, False, False)
knit_data = knit._KnitDataV2(access=access)
- index_memo = (None, 0, len(gz_data))
+ index_memo = (None, 0, len(z_data))
self.assertEqual((['dense\n', 'lines\n'], 'digest', False,
'linedelta', 'delta-version-id',
['parent-version-id']),
@@ -571,16 +571,16 @@
def test__check_header_v1(self):
# This is the v1 format
- gz_data = tuned_gzip.bytes_to_gzip(
+ z_data = zlib.compress(
'version rev-id-1 2 digest\n'
'foo\n'
'bar\n'
'end rev-id-1\n'
)
- transport = MockTransport([gz_data])
+ transport = MockTransport([z_data])
access = _KnitAccess(transport, 'filename', None, None, False, False)
knit_data = knit._KnitDataV2(access=access)
- index_memo = (None, 0, len(gz_data))
+ index_memo = (None, 0, len(z_data))
self.assertRaises(errors.KnitCorrupt,
knit_data.read_info_record,
'rev-id-1', index_memo)
More information about the bazaar-commits
mailing list