Rev 4936: (jam) Clean up pyrex cdef functions to be aware of exceptions. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Jan 6 20:48:32 GMT 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4936 [merge]
revision-id: pqm at pqm.ubuntu.com-20100106204826-rehs7s23w0jbz6nj
parent: pqm at pqm.ubuntu.com-20100106074025-ga21h28spm91o1h8
parent: john at arbash-meinel.com-20100106185008-ak602m6fkd3fq4q9
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2010-01-06 20:48:26 +0000
message:
(jam) Clean up pyrex cdef functions to be aware of exceptions.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/_annotator_pyx.pyx _annotator_pyx.pyx-20090623201937-ic33ic1aqvd9mjje-1
bzrlib/_btree_serializer_pyx.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
bzrlib/_chk_map_pyx.pyx _chk_map_pyx.pyx-20090309111231-peyz7p2azr0dzdrb-1
bzrlib/_dirstate_helpers_pyx.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
bzrlib/_groupcompress_pyx.pyx _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
bzrlib/_knit_load_data_pyx.pyx knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
bzrlib/_known_graph_pyx.pyx _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
bzrlib/_rio_pyx.pyx _rio_pyx.pyx-20090514104636-8203jcqvfny56yrd-1
bzrlib/_simple_set_pyx.pxd _static_tuple_intern-20091002191442-ix6rkqbw8ktoj7r5-1
bzrlib/_simple_set_pyx.pyx _static_tuple_intern-20091002053806-sid67p8spedru51w-1
bzrlib/_walkdirs_win32.pyx _walkdirs_win32.pyx-20080716220454-kweh3tgxez5dvw2l-2
bzrlib/tests/test_source.py test_source.py-20051207061333-a58dea6abecc030d
=== modified file 'NEWS'
--- a/NEWS 2010-01-05 16:55:12 +0000
+++ b/NEWS 2010-01-06 20:48:26 +0000
@@ -184,6 +184,11 @@
Testing
*******
+* We have a new ``test_source`` that ensures all pyrex ``cdef`` functions
+ handle exceptions somehow. (Possibly by setting ``# cannot_raise``
+ rather than an ``except ?:`` clause.) This should help prevent bugs like
+ bug #495023. (John Arbash Meinel)
+
bzr 2.1.0b4
###########
=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx 2009-07-08 17:09:03 +0000
+++ b/bzrlib/_annotator_pyx.pyx 2010-01-05 04:59:57 +0000
@@ -83,7 +83,14 @@
return 0
-cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos):
+cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise
+ """Return the next entry from this tuple.
+
+ :param tpl: The tuple we are investigating, *must* be a PyTuple
+ :param pos: The last item we found. Will be updated to the new position.
+
+ This cannot raise an exception, as it does no error checking.
+ """
pos[0] = pos[0] + 1
if pos[0] >= PyTuple_GET_SIZE(tpl):
return NULL
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-10-12 20:07:09 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2010-01-05 05:09:45 +0000
@@ -65,7 +65,7 @@
# TODO: Find some way to import this from _dirstate_helpers
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
# memrchr seems to be a GNU extension, so we have to implement it ourselves
# It is not present in any win32 standard library
cdef char *pos
=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx 2009-10-21 21:27:19 +0000
+++ b/bzrlib/_chk_map_pyx.pyx 2010-01-05 05:09:45 +0000
@@ -82,7 +82,7 @@
_unknown = None
# We shouldn't just copy this from _dirstate_helpers_pyx
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
# memrchr seems to be a GNU extension, so we have to implement it ourselves
cdef char *pos
cdef char *start
=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'
--- a/bzrlib/_dirstate_helpers_pyx.pyx 2010-01-05 04:08:35 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx 2010-01-05 07:22:59 +0000
@@ -119,7 +119,7 @@
# void *memrchr(void *s, int c, size_t len)
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
# memrchr seems to be a GNU extension, so we have to implement it ourselves
cdef char *pos
cdef char *start
@@ -165,7 +165,7 @@
return PyString_FromStringAndSize(s, size)
-cdef int _is_aligned(void *ptr):
+cdef int _is_aligned(void *ptr): # cannot_raise
"""Is this pointer aligned to an integer size offset?
:return: 1 if this pointer is aligned, 0 otherwise.
@@ -173,7 +173,7 @@
return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
-cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
+cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
cdef unsigned char *cur1
cdef unsigned char *cur2
cdef unsigned char *end1
@@ -295,7 +295,7 @@
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
- char *path2, int path2_len):
+ char *path2, int path2_len): # cannot_raise
"""Compare two paths by what directory they are in.
see ``_cmp_path_by_dirblock`` for details.
@@ -768,7 +768,7 @@
state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
-cdef int minikind_from_mode(int mode):
+cdef int minikind_from_mode(int mode): # cannot_raise
# in order of frequency:
if S_ISREG(mode):
return c"f"
@@ -915,7 +915,8 @@
return link_or_sha1
-cdef char _minikind_from_string(object string):
+# TODO: Do we want to worry about exceptions here?
+cdef char _minikind_from_string(object string) except? -1:
"""Convert a python string to a char."""
return PyString_AsString(string)[0]
@@ -953,7 +954,7 @@
raise KeyError(PyString_FromStringAndSize(_minikind, 1))
-cdef int _versioned_minikind(char minikind):
+cdef int _versioned_minikind(char minikind): # cannot_raise
"""Return non-zero if minikind is in fltd"""
return (minikind == c'f' or
minikind == c'd' or
@@ -1373,7 +1374,7 @@
def iter_changes(self):
return self
- cdef void _gather_result_for_consistency(self, result):
+ cdef int _gather_result_for_consistency(self, result) except -1:
"""Check a result we will yield to make sure we are consistent later.
This gathers result's parents into a set to output later.
@@ -1381,7 +1382,7 @@
:param result: A result tuple.
"""
if not self.partial or not result[0]:
- return
+ return 0
self.seen_ids.add(result[0])
new_path = result[1][1]
if new_path:
@@ -1391,6 +1392,7 @@
# Add the root directory which parent_directories does not
# provide.
self.search_specific_file_parents.add('')
+ return 0
cdef int _update_current_block(self) except -1:
if (self.block_index < len(self.state._dirblocks) and
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx 2009-11-06 18:05:03 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx 2010-01-05 05:09:45 +0000
@@ -279,7 +279,8 @@
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
- unsigned char cmd, unsigned int *offset, unsigned int *length) nogil:
+ unsigned char cmd, unsigned int *offset,
+ unsigned int *length) nogil: # cannot_raise
"""Decode a copy instruction from the next few bytes.
A copy instruction is a variable number of bytes, so we will parse the
=== modified file 'bzrlib/_knit_load_data_pyx.pyx'
--- a/bzrlib/_knit_load_data_pyx.pyx 2009-06-22 12:52:39 +0000
+++ b/bzrlib/_knit_load_data_pyx.pyx 2010-01-04 23:13:20 +0000
@@ -97,11 +97,12 @@
self.end_str = NULL
self.history_len = 0
- cdef void validate(self):
+ cdef int validate(self) except -1:
if not PyDict_CheckExact(self.cache):
raise TypeError('kndx._cache must be a python dict')
if not PyList_CheckExact(self.history):
raise TypeError('kndx._history must be a python list')
+ return 0
cdef object process_options(self, char *option_str, char *end):
"""Process the options string into a list."""
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx 2009-11-30 17:25:22 +0000
+++ b/bzrlib/_known_graph_pyx.pyx 2010-01-05 05:09:45 +0000
@@ -703,7 +703,7 @@
self._revno_first, self._revno_second, self._revno_last,
self.is_first_child, self.seen_by_child)
- cdef int has_pending_parents(self):
+ cdef int has_pending_parents(self): # cannot_raise
if self.left_pending_parent is not None or self.pending_parents:
return 1
return 0
=== modified file 'bzrlib/_rio_pyx.pyx'
--- a/bzrlib/_rio_pyx.pyx 2009-05-15 02:36:03 +0000
+++ b/bzrlib/_rio_pyx.pyx 2010-01-05 04:59:57 +0000
@@ -49,7 +49,7 @@
from bzrlib.rio import Stanza
-cdef int _valid_tag_char(char c):
+cdef int _valid_tag_char(char c): # cannot_raise
return (c == c'_' or c == c'-' or
(c >= c'a' and c <= c'z') or
(c >= c'A' and c <= c'Z') or
=== modified file 'bzrlib/_simple_set_pyx.pxd'
--- a/bzrlib/_simple_set_pyx.pxd 2009-10-09 16:49:45 +0000
+++ b/bzrlib/_simple_set_pyx.pxd 2010-01-05 05:28:42 +0000
@@ -88,4 +88,4 @@
cdef api int SimpleSet_Discard(object self, object key) except -1
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL
cdef api Py_ssize_t SimpleSet_Size(object self) except -1
-cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key)
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key) except -1
=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx 2009-10-14 15:57:06 +0000
+++ b/bzrlib/_simple_set_pyx.pyx 2010-01-05 05:28:42 +0000
@@ -540,7 +540,8 @@
return _check_self(self)._used
-cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
+ PyObject **key) except -1:
"""Walk over items in a SimpleSet.
:param pos: should be initialized to 0 by the caller, and will be updated
@@ -567,7 +568,8 @@
return 1
-cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
+cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
+ void *arg) except -1:
"""This is an implementation of 'tp_traverse' that hits the whole table.
Cython/Pyrex don't seem to let you define a tp_traverse, and they only
=== modified file 'bzrlib/_walkdirs_win32.pyx'
--- a/bzrlib/_walkdirs_win32.pyx 2009-03-23 14:59:43 +0000
+++ b/bzrlib/_walkdirs_win32.pyx 2010-01-05 04:59:57 +0000
@@ -109,7 +109,7 @@
wcslen(data.cFileName))
-cdef int _get_mode_bits(WIN32_FIND_DATAW *data):
+cdef int _get_mode_bits(WIN32_FIND_DATAW *data): # cannot_raise
cdef int mode_bits
mode_bits = 0100666 # writeable file, the most common
@@ -121,13 +121,13 @@
return mode_bits
-cdef __int64 _get_size(WIN32_FIND_DATAW *data):
+cdef __int64 _get_size(WIN32_FIND_DATAW *data): # cannot_raise
# Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32
# on a DWORD
return ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow
-cdef double _ftime_to_timestamp(FILETIME *ft):
+cdef double _ftime_to_timestamp(FILETIME *ft): # cannot_raise
"""Convert from a FILETIME struct into a floating point timestamp.
The fields of a FILETIME structure are the hi and lo part
@@ -147,7 +147,7 @@
return (val * 1.0e-7) - 11644473600.0
-cdef int _should_skip(WIN32_FIND_DATAW *data):
+cdef int _should_skip(WIN32_FIND_DATAW *data): # cannot_raise
"""Is this '.' or '..' so we should skip it?"""
if (data.cFileName[0] != c'.'):
return 0
=== modified file 'bzrlib/tests/test_source.py'
--- a/bzrlib/tests/test_source.py 2009-09-17 19:16:58 +0000
+++ b/bzrlib/tests/test_source.py 2010-01-05 05:28:42 +0000
@@ -372,3 +372,53 @@
self.fail(
"these files contain an assert statement and should not:\n%s"
% '\n'.join(badfiles))
+
+ def test_extension_exceptions(self):
+ """Extension functions should propagate exceptions.
+
+ Either they should return an object, have an 'except' clause, or have a
+ "# cannot_raise" to indicate that we've audited them and defined them as not
+ raising exceptions.
+ """
+ both_exc_and_no_exc = []
+ missing_except = []
+ class_re = re.compile(r'^(cdef\s+)?(public\s+)?(api\s+)?class (\w+).*:',
+ re.MULTILINE)
+ except_re = re.compile(r'cdef\s+' # start with cdef
+ r'([\w *]*?)\s*' # this is the return signature
+ r'(\w+)\s*\(' # the function name
+ r'[^)]*\)\s*' # parameters
+ r'(.*)\s*:' # the except clause
+ r'\s*(#\s*cannot[- _]raise)?' # cannot raise comment
+ )
+ for fname, text in self.get_source_file_contents(
+ extensions=('.pyx',)):
+ known_classes = set([m[-1] for m in class_re.findall(text)])
+ cdefs = except_re.findall(text)
+ for sig, func, exc_clause, no_exc_comment in cdefs:
+ if sig.startswith('api '):
+ sig = sig[4:]
+ if not sig or sig in known_classes:
+ sig = 'object'
+ if 'nogil' in exc_clause:
+ exc_clause = exc_clause.replace('nogil', '').strip()
+ if exc_clause and no_exc_comment:
+ both_exc_and_no_exc.append((fname, func))
+ if sig != 'object' and not (exc_clause or no_exc_comment):
+ missing_except.append((fname, func))
+ error_msg = []
+ if both_exc_and_no_exc:
+ error_msg.append('The following functions had "cannot raise" comments'
+ ' but did have an except clause set:')
+ for fname, func in both_exc_and_no_exc:
+ error_msg.append('%s:%s' % (fname, func))
+ error_msg.extend(('', ''))
+ if missing_except:
+ error_msg.append('The following functions have fixed return types,'
+ ' but no except clause.')
+ error_msg.append('Either add an except or append "# cannot_raise".')
+ for fname, func in missing_except:
+ error_msg.append('%s:%s' % (fname, func))
+ error_msg.extend(('', ''))
+ if error_msg:
+ self.fail('\n'.join(error_msg))
More information about the bazaar-commits
mailing list