Rev 2680: Lots of tests that exercise the error handling. in http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 3 17:23:58 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract
------------------------------------------------------------
revno: 2680
revision-id: john at arbash-meinel.com-20070803161657-imr9sa6de3htjdok
parent: john at arbash-meinel.com-20070803023207-emqm0vc641no4lps
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pyrex_knit_extract
timestamp: Fri 2007-08-03 11:16:57 -0500
message:
Lots of tests that exercise the error handling.
Even uncovered a couple small bugs, these have been fixed.
modified:
bzrlib/_knit_helpers_c.pyx knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
bzrlib/_knit_helpers_py.py _knit_load_data_py.p-20070629000948-9a0nh4s118bi5y8n-1
bzrlib/tests/test__knit_helpers.py test__knit_helpers.p-20070724214127-vkhtjicq8lrlotz3-1
-------------- next part --------------
=== modified file 'bzrlib/_knit_helpers_c.pyx'
--- a/bzrlib/_knit_helpers_c.pyx 2007-08-03 02:32:07 +0000
+++ b/bzrlib/_knit_helpers_c.pyx 2007-08-03 16:16:57 +0000
@@ -110,6 +110,7 @@
char *PyString_AsString(object p)
object PyString_FromStringAndSize(char *, int)
+ object PyString_FromString(char *)
int PyString_Size(object p)
void Py_INCREF(object)
@@ -149,6 +150,16 @@
return 0
+cdef object string_from_pointer(char *s):
+ """Convert a C pointer into a string.
+
+ Just watch out for NULL.
+ """
+ if s == NULL:
+ return None
+ return PyString_FromString(s)
+
+
cdef class KnitIndexReader:
cdef object kndx
@@ -398,6 +409,7 @@
cdef z_stream strm
cdef int stream_finished
+ cdef int stream_initialized
# We use a few pointers into our decompression buffer, to track what has
# been processed, and what we need to decompress. Specifically, we have:
@@ -421,8 +433,6 @@
cdef char *cur_line
cdef ssize_t cur_line_size
- cdef int processing
-
cdef object knit_data_name
cdef char *version_id
cdef ssize_t version_id_size
@@ -436,8 +446,6 @@
memset(&self.strm, 0, sizeof(z_stream))
self.decompress_buf_size = _decompress_buffer_size
self.decompress_buffer = <char *>PyMem_Malloc(self.decompress_buf_size)
- self.processing = 0
-
self.set_knit_data_name(kd_name)
self.set_version_id(version_id)
self.set_is_annotated(is_annotated)
@@ -446,6 +454,10 @@
def __dealloc__(self):
PyMem_Free(self.decompress_buffer)
self.decompress_buffer = NULL
+ if self.stream_initialized:
+ # TODO: What return codes do we expect here?
+ inflateEnd(&self.strm)
+ stream_initialized = 0
cdef int set_knit_data_name(self, name) except -1:
self.knit_data_name = name
@@ -468,17 +480,14 @@
c_data = PyString_AsString(data)
data_size = PyString_Size(data)
- assert self.processing == 0, (
- "use_gzip_hunk called while processing.")
-
self.strm.next_in = <Bytef *>c_data
self.strm.avail_in = data_size
self.strm.avail_out = 0 # Avail out will be set when we start
# decompressing
- self.processing = 1
self.stream_finished = 0
self.bytes_available = 0
inflateInit2(&self.strm, 15+16)
+ self.stream_initialized = 1
cdef int _extract_from_stream(self) except -1:
"""Extract some more data from the compressed stream.
@@ -489,6 +498,9 @@
cdef uInt avail_out
cdef int retval
+ # Check if we have already closed this stream
+ if not self.stream_initialized:
+ return 0
# TODO: handle self.strm.avail_out == 0, meaning the output buffer
# has been filled, and we need to reset to extract more data.
# basically, we just need to memmove() the current unprocessed data
@@ -510,14 +522,20 @@
retval = inflate(&self.strm, Z_NO_FLUSH)
if retval == Z_STREAM_END:
self.stream_finished = 1 # True
+ retval = inflateEnd(&self.strm)
+ if retval != Z_OK:
+ raise errors.KnitCorrupt(self.knit_data_name,
+ "While closing the stream, we got %d not Z_OK: %s"
+ % (retval, string_from_pointer(self.strm.msg)))
+ self.stream_initialized = 0
elif retval != Z_OK:
# We were able to decompress some data. good
raise errors.KnitCorrupt(self.knit_data_name,
"We expected to get Z_OK or Z_STREAM_END, but we got:"
" %d %s"
- % (retval, self.strm.msg))
+ % (retval, string_from_pointer(self.strm.msg)))
- assert avail_out != self.strm.avail_out
+ assert avail_out >= self.strm.avail_out
# We have more bytes available to process
self.bytes_available = self.bytes_available + (avail_out -
self.strm.avail_out)
@@ -602,8 +620,14 @@
# return 0 to indicate we have nothing more to process
# (we should probably also cleanup self.cur*)
return 0
- else: # No bytes extracted, but we haven't finished reading the stream
- assert False, 'not implemented yet'
+ else:
+ # No bytes extracted, but we haven't finished reading the stream
+ if self.strm.avail_in == 0:
+ # It seems we've read all we've been given, complain
+ raise errors.KnitCorrupt(self.knit_data_name,
+ "Reached the end of the data stream")
+ assert False, ("I don't know how we could get an incomplete"
+ " stream with bytes left, but no output")
return 0
cdef int _check_header(self) except -1:
@@ -710,8 +734,8 @@
if next_retval == 0:
raise errors.KnitCorrupt(self.knit_data_name,
"Not enough lines in knit data."
- " Expected %d, only got %d"
- % (self.num_lines, i))
+ " Expected %d"
+ % (self.num_lines,))
if self.is_annotated:
# Skip until the first ' ' character
text = <char *>memchr(self.cur_line, c' ', self.cur_line_size)
@@ -732,8 +756,8 @@
if next_retval == 0:
raise errors.KnitCorrupt(self.knit_data_name,
"Not enough lines in knit data."
- " Expected %d, only got %d"
- % (self.num_lines, i))
+ " Expected %d"
+ % (self.num_lines,))
# TODO: jam 20070802 We might think about resizing the string in
# place, or something like that, but for now, we just create
# a new string by combining the two.
=== modified file 'bzrlib/_knit_helpers_py.py'
--- a/bzrlib/_knit_helpers_py.py 2007-08-03 00:00:35 +0000
+++ b/bzrlib/_knit_helpers_py.py 2007-08-03 16:16:57 +0000
@@ -116,6 +116,9 @@
if len(rec) != 4:
raise errors.KnitCorrupt(kd_name,
'unexpected number of elements in record header')
+ if rec[0] != 'version':
+ raise errors.KnitCorrupt(kd_name,
+ 'knit header did not start with "version"')
if rec[1] != version_id:
raise errors.KnitCorrupt(kd_name,
'unexpected version, wanted %r, got %r'
@@ -139,7 +142,12 @@
raise errors.KnitCorrupt(kd_name,
"While reading {%s} got %s(%s)"
% (version_id, e.__class__.__name__, str(e)))
- header = record_contents.pop(0)
+ try:
+ header = record_contents.pop(0)
+ except IndexError, e:
+ raise errors.KnitCorrupt(kd_name,
+ "No header line for {%s}"
+ % (version_id,))
rec = _check_header(version_id, header, kd_name)
last_line = record_contents.pop()
=== modified file 'bzrlib/tests/test__knit_helpers.py'
--- a/bzrlib/tests/test__knit_helpers.py 2007-08-02 23:32:53 +0000
+++ b/bzrlib/tests/test__knit_helpers.py 2007-08-03 16:16:57 +0000
@@ -19,6 +19,7 @@
from cStringIO import StringIO
from bzrlib import (
+ errors,
knit,
_knit_helpers_py,
tests,
@@ -107,6 +108,179 @@
self.fail('%s did not extract the expected text'
% (extract_fulltext.__name__,))
+ def test_empty_hunk(self):
+ extract_fulltext = self.get_extract_knit_fulltext_from_gzip()
+ self.assertRaises(errors.KnitCorrupt,
+ extract_fulltext, 'rev-id', '', 'fname', False)
+ self.assertRaises(errors.KnitCorrupt,
+ extract_fulltext, 'rev-id', '', 'fname', True)
+
+ def test_bad_header(self):
+ extract_fulltext = self.get_extract_knit_fulltext_from_gzip()
+ # not gzip data
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ 'foo rev-id 1 12345\n'
+ 'a b\n'
+ 'end rev-id\n',
+ 'fname', False)
+ # doesn't start with 'version'
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('foo rev-id 1 12345\n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+ # incomplete first line
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('f'),
+ 'fname', False)
+ # incomplete first line
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('f\n'),
+ 'fname', False)
+ # incomplete first line
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('f\0\n'),
+ 'fname', False)
+ # incomplete gzip stream
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'a b\n'
+ 'end rev-id\n')[:10],
+ 'fname', False)
+ # corrupted gzip stream
+ gz_txt = gzip_compress('version rev-id 1 12345\n'
+ 'a b\n'
+ 'end rev-id\n')
+
+ # All nulls
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ '\0'*len(gz_txt), 'fname', False)
+ # nulls in the middle of the valid stream
+ corrupt_gz_txt = gz_txt[:10] + '\0\0' + gz_txt[12:]
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ corrupt_gz_txt, 'fname', False)
+
+ # too many fields
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345 6\n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ # missing sha1sum
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1\n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+ # missing sha1 and line count
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id\n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version \n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ # blank entries
+ e = self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version \n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ # Non integer line count
+ # Both code paths raise ValueError, it isn't perfect, but it works
+ self.assertRaises(ValueError, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id x 12345\n'
+ 'a b\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ def test_bad_contents(self):
+ extract_fulltext = self.get_extract_knit_fulltext_from_gzip()
+ # Too many lines
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'baz bing\n'
+ 'end rev-id\n'),
+ 'fname', False)
+ # Too few lines
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 2 12345\n'
+ 'foo bar\n'
+ 'end rev-id\n'),
+ 'fname', False)
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 3 12345\n'
+ 'foo bar\n'
+ 'end rev-id\n'),
+ 'fname', False)
+ # Trailing lines after 'end'
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end rev-id\n'
+ 'more cruft\n'),
+ 'fname', False)
+ # Trailing lines after 'end'
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end rev-id\n'
+ 'end rev-id\n'),
+ 'fname', False)
+
+ def test_tail(self):
+ extract_fulltext = self.get_extract_knit_fulltext_from_gzip()
+ # no tail
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'),
+ 'fname', False)
+ # tail version_id doesn't match
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end rev-id2\n'),
+ 'fname', False)
+ # No 'end'
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'back rev-id\n'),
+ 'fname', False)
+ # No revid
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end \n'),
+ 'fname', False)
+ # No revid
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end\n'),
+ 'fname', False)
+ # No revid
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end'),
+ 'fname', False)
+ # No newline
+ self.assertRaises(errors.KnitCorrupt, extract_fulltext, 'rev-id',
+ gzip_compress('version rev-id 1 12345\n'
+ 'foo bar\n'
+ 'end rev-id'),
+ 'fname', False)
+
+
class TestExtractKnitFulltextFromGzipCompiled(TestExtractKnitFulltextFromGzip):
"""Test the complied implementation."""
@@ -208,6 +382,13 @@
self.fail('%s did not extract the expected delta'
% (extract_fulltext.__name__,))
+ def test_empty_hunk(self):
+ extract_linedelta = self.get_extract_knit_linedelta_from_gzip()
+ self.assertRaises(errors.KnitCorrupt,
+ extract_linedelta, 'rev-id', '', 'fname', False)
+ self.assertRaises(errors.KnitCorrupt,
+ extract_linedelta, 'rev-id', '', 'fname', True)
+
class TestExtractKnitLineDeltaFromGzipCompiled(TestExtractKnitLinedeltaFromGzip):
"""Test the complied implementation."""
More information about the bazaar-commits
mailing list