Rev 4936: (jam) Clean up pyrex cdef functions to be aware of exceptions. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Jan 6 20:48:32 GMT 2010


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4936 [merge]
revision-id: pqm at pqm.ubuntu.com-20100106204826-rehs7s23w0jbz6nj
parent: pqm at pqm.ubuntu.com-20100106074025-ga21h28spm91o1h8
parent: john at arbash-meinel.com-20100106185008-ak602m6fkd3fq4q9
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2010-01-06 20:48:26 +0000
message:
  (jam) Clean up pyrex cdef functions to be aware of exceptions.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/_annotator_pyx.pyx      _annotator_pyx.pyx-20090623201937-ic33ic1aqvd9mjje-1
  bzrlib/_btree_serializer_pyx.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
  bzrlib/_chk_map_pyx.pyx        _chk_map_pyx.pyx-20090309111231-peyz7p2azr0dzdrb-1
  bzrlib/_dirstate_helpers_pyx.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/_groupcompress_pyx.pyx  _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
  bzrlib/_knit_load_data_pyx.pyx knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
  bzrlib/_known_graph_pyx.pyx    _known_graph_pyx.pyx-20090610194911-yjk73td9hpjilas0-1
  bzrlib/_rio_pyx.pyx            _rio_pyx.pyx-20090514104636-8203jcqvfny56yrd-1
  bzrlib/_simple_set_pyx.pxd     _static_tuple_intern-20091002191442-ix6rkqbw8ktoj7r5-1
  bzrlib/_simple_set_pyx.pyx     _static_tuple_intern-20091002053806-sid67p8spedru51w-1
  bzrlib/_walkdirs_win32.pyx     _walkdirs_win32.pyx-20080716220454-kweh3tgxez5dvw2l-2
  bzrlib/tests/test_source.py    test_source.py-20051207061333-a58dea6abecc030d
=== modified file 'NEWS'
--- a/NEWS	2010-01-05 16:55:12 +0000
+++ b/NEWS	2010-01-06 20:48:26 +0000
@@ -184,6 +184,11 @@
 Testing
 *******
 
+* We have a new ``test_source`` that ensures all pyrex ``cdef`` functions
+  handle exceptions somehow. (Possibly by setting ``# cannot_raise``
+  rather than an ``except ?:`` clause.) This should help prevent bugs like
+  bug #495023. (John Arbash Meinel)
+
 
 bzr 2.1.0b4
 ###########

=== modified file 'bzrlib/_annotator_pyx.pyx'
--- a/bzrlib/_annotator_pyx.pyx	2009-07-08 17:09:03 +0000
+++ b/bzrlib/_annotator_pyx.pyx	2010-01-05 04:59:57 +0000
@@ -83,7 +83,14 @@
     return 0
 
 
-cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos):
+cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise
+    """Return the next entry from this tuple.
+
+    :param tpl: The tuple we are investigating, *must* be a PyTuple
+    :param pos: The last item we found. Will be updated to the new position.
+    
+    This cannot raise an exception, as it does no error checking.
+    """
     pos[0] = pos[0] + 1
     if pos[0] >= PyTuple_GET_SIZE(tpl):
         return NULL

=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-10-12 20:07:09 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2010-01-05 05:09:45 +0000
@@ -65,7 +65,7 @@
 
 
 # TODO: Find some way to import this from _dirstate_helpers
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
     # memrchr seems to be a GNU extension, so we have to implement it ourselves
     # It is not present in any win32 standard library
     cdef char *pos

=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx	2009-10-21 21:27:19 +0000
+++ b/bzrlib/_chk_map_pyx.pyx	2010-01-05 05:09:45 +0000
@@ -82,7 +82,7 @@
 _unknown = None
 
 # We shouldn't just copy this from _dirstate_helpers_pyx
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
     # memrchr seems to be a GNU extension, so we have to implement it ourselves
     cdef char *pos
     cdef char *start

=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'
--- a/bzrlib/_dirstate_helpers_pyx.pyx	2010-01-05 04:08:35 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx	2010-01-05 07:22:59 +0000
@@ -119,7 +119,7 @@
     # void *memrchr(void *s, int c, size_t len)
 
 
-cdef void* _my_memrchr(void *s, int c, size_t n):
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
     # memrchr seems to be a GNU extension, so we have to implement it ourselves
     cdef char *pos
     cdef char *start
@@ -165,7 +165,7 @@
     return PyString_FromStringAndSize(s, size)
 
 
-cdef int _is_aligned(void *ptr):
+cdef int _is_aligned(void *ptr): # cannot_raise
     """Is this pointer aligned to an integer size offset?
 
     :return: 1 if this pointer is aligned, 0 otherwise.
@@ -173,7 +173,7 @@
     return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
 
 
-cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
+cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
     cdef unsigned char *cur1
     cdef unsigned char *cur2
     cdef unsigned char *end1
@@ -295,7 +295,7 @@
 
 
 cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
-                                      char *path2, int path2_len):
+                                      char *path2, int path2_len): # cannot_raise
     """Compare two paths by what directory they are in.
 
     see ``_cmp_path_by_dirblock`` for details.
@@ -768,7 +768,7 @@
     state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
 
-cdef int minikind_from_mode(int mode):
+cdef int minikind_from_mode(int mode): # cannot_raise
     # in order of frequency:
     if S_ISREG(mode):
         return c"f"
@@ -915,7 +915,8 @@
     return link_or_sha1
 
 
-cdef char _minikind_from_string(object string):
+# TODO: Do we want to worry about exceptions here?
+cdef char _minikind_from_string(object string) except? -1:
     """Convert a python string to a char."""
     return PyString_AsString(string)[0]
 
@@ -953,7 +954,7 @@
     raise KeyError(PyString_FromStringAndSize(_minikind, 1))
 
 
-cdef int _versioned_minikind(char minikind):
+cdef int _versioned_minikind(char minikind): # cannot_raise
     """Return non-zero if minikind is in fltd"""
     return (minikind == c'f' or
             minikind == c'd' or
@@ -1373,7 +1374,7 @@
     def iter_changes(self):
         return self
 
-    cdef void _gather_result_for_consistency(self, result):
+    cdef int _gather_result_for_consistency(self, result) except -1:
         """Check a result we will yield to make sure we are consistent later.
         
         This gathers result's parents into a set to output later.
@@ -1381,7 +1382,7 @@
         :param result: A result tuple.
         """
         if not self.partial or not result[0]:
-            return
+            return 0
         self.seen_ids.add(result[0])
         new_path = result[1][1]
         if new_path:
@@ -1391,6 +1392,7 @@
             # Add the root directory which parent_directories does not
             # provide.
             self.search_specific_file_parents.add('')
+        return 0
 
     cdef int _update_current_block(self) except -1:
         if (self.block_index < len(self.state._dirblocks) and

=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2009-11-06 18:05:03 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2010-01-05 05:09:45 +0000
@@ -279,7 +279,8 @@
 
 
 cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
-    unsigned char cmd, unsigned int *offset, unsigned int *length) nogil:
+    unsigned char cmd, unsigned int *offset,
+    unsigned int *length) nogil: # cannot_raise
     """Decode a copy instruction from the next few bytes.
 
     A copy instruction is a variable number of bytes, so we will parse the

=== modified file 'bzrlib/_knit_load_data_pyx.pyx'
--- a/bzrlib/_knit_load_data_pyx.pyx	2009-06-22 12:52:39 +0000
+++ b/bzrlib/_knit_load_data_pyx.pyx	2010-01-04 23:13:20 +0000
@@ -97,11 +97,12 @@
         self.end_str = NULL
         self.history_len = 0
 
-    cdef void validate(self):
+    cdef int validate(self) except -1:
         if not PyDict_CheckExact(self.cache):
             raise TypeError('kndx._cache must be a python dict')
         if not PyList_CheckExact(self.history):
             raise TypeError('kndx._history must be a python list')
+        return 0
 
     cdef object process_options(self, char *option_str, char *end):
         """Process the options string into a list."""

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-11-30 17:25:22 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2010-01-05 05:09:45 +0000
@@ -703,7 +703,7 @@
             self._revno_first, self._revno_second, self._revno_last,
             self.is_first_child, self.seen_by_child)
 
-    cdef int has_pending_parents(self):
+    cdef int has_pending_parents(self): # cannot_raise
         if self.left_pending_parent is not None or self.pending_parents:
             return 1
         return 0

=== modified file 'bzrlib/_rio_pyx.pyx'
--- a/bzrlib/_rio_pyx.pyx	2009-05-15 02:36:03 +0000
+++ b/bzrlib/_rio_pyx.pyx	2010-01-05 04:59:57 +0000
@@ -49,7 +49,7 @@
 
 from bzrlib.rio import Stanza
 
-cdef int _valid_tag_char(char c):
+cdef int _valid_tag_char(char c): # cannot_raise
     return (c == c'_' or c == c'-' or 
             (c >= c'a' and c <= c'z') or
             (c >= c'A' and c <= c'Z') or

=== modified file 'bzrlib/_simple_set_pyx.pxd'
--- a/bzrlib/_simple_set_pyx.pxd	2009-10-09 16:49:45 +0000
+++ b/bzrlib/_simple_set_pyx.pxd	2010-01-05 05:28:42 +0000
@@ -88,4 +88,4 @@
 cdef api int SimpleSet_Discard(object self, object key) except -1
 cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL
 cdef api Py_ssize_t SimpleSet_Size(object self) except -1
-cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key)
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key) except -1

=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx	2009-10-14 15:57:06 +0000
+++ b/bzrlib/_simple_set_pyx.pyx	2010-01-05 05:28:42 +0000
@@ -540,7 +540,8 @@
     return _check_self(self)._used
 
 
-cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
+                            PyObject **key) except -1:
     """Walk over items in a SimpleSet.
 
     :param pos: should be initialized to 0 by the caller, and will be updated
@@ -567,7 +568,8 @@
     return 1
 
 
-cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
+cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
+                            void *arg) except -1:
     """This is an implementation of 'tp_traverse' that hits the whole table.
 
     Cython/Pyrex don't seem to let you define a tp_traverse, and they only

=== modified file 'bzrlib/_walkdirs_win32.pyx'
--- a/bzrlib/_walkdirs_win32.pyx	2009-03-23 14:59:43 +0000
+++ b/bzrlib/_walkdirs_win32.pyx	2010-01-05 04:59:57 +0000
@@ -109,7 +109,7 @@
                                  wcslen(data.cFileName))
 
 
-cdef int _get_mode_bits(WIN32_FIND_DATAW *data):
+cdef int _get_mode_bits(WIN32_FIND_DATAW *data): # cannot_raise
     cdef int mode_bits
 
     mode_bits = 0100666 # writeable file, the most common
@@ -121,13 +121,13 @@
     return mode_bits
 
 
-cdef __int64 _get_size(WIN32_FIND_DATAW *data):
+cdef __int64 _get_size(WIN32_FIND_DATAW *data): # cannot_raise
     # Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32
     # on a DWORD
     return ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow
 
 
-cdef double _ftime_to_timestamp(FILETIME *ft):
+cdef double _ftime_to_timestamp(FILETIME *ft): # cannot_raise
     """Convert from a FILETIME struct into a floating point timestamp.
 
     The fields of a FILETIME structure are the hi and lo part
@@ -147,7 +147,7 @@
     return (val * 1.0e-7) - 11644473600.0
 
 
-cdef int _should_skip(WIN32_FIND_DATAW *data):
+cdef int _should_skip(WIN32_FIND_DATAW *data): # cannot_raise
     """Is this '.' or '..' so we should skip it?"""
     if (data.cFileName[0] != c'.'):
         return 0

=== modified file 'bzrlib/tests/test_source.py'
--- a/bzrlib/tests/test_source.py	2009-09-17 19:16:58 +0000
+++ b/bzrlib/tests/test_source.py	2010-01-05 05:28:42 +0000
@@ -372,3 +372,53 @@
             self.fail(
                 "these files contain an assert statement and should not:\n%s"
                 % '\n'.join(badfiles))
+
+    def test_extension_exceptions(self):
+        """Extension functions should propagate exceptions.
+
+        Either they should return an object, have an 'except' clause, or have a
+        "# cannot_raise" to indicate that we've audited them and defined them as not
+        raising exceptions.
+        """
+        both_exc_and_no_exc = []
+        missing_except = []
+        class_re = re.compile(r'^(cdef\s+)?(public\s+)?(api\s+)?class (\w+).*:',
+                              re.MULTILINE)
+        except_re = re.compile(r'cdef\s+' # start with cdef
+                               r'([\w *]*?)\s*' # this is the return signature
+                               r'(\w+)\s*\(' # the function name
+                               r'[^)]*\)\s*' # parameters
+                               r'(.*)\s*:' # the except clause
+                               r'\s*(#\s*cannot[- _]raise)?' # cannot raise comment
+                              )
+        for fname, text in self.get_source_file_contents(
+                extensions=('.pyx',)):
+            known_classes = set([m[-1] for m in class_re.findall(text)])
+            cdefs = except_re.findall(text)
+            for sig, func, exc_clause, no_exc_comment in cdefs:
+                if sig.startswith('api '):
+                    sig = sig[4:]
+                if not sig or sig in known_classes:
+                    sig = 'object'
+                if 'nogil' in exc_clause:
+                    exc_clause = exc_clause.replace('nogil', '').strip()
+                if exc_clause and no_exc_comment:
+                    both_exc_and_no_exc.append((fname, func))
+                if sig != 'object' and not (exc_clause or no_exc_comment):
+                    missing_except.append((fname, func))
+        error_msg = []
+        if both_exc_and_no_exc:
+            error_msg.append('The following functions had "cannot raise" comments'
+                             ' but did have an except clause set:')
+            for fname, func in both_exc_and_no_exc:
+                error_msg.append('%s:%s' % (fname, func))
+            error_msg.extend(('', ''))
+        if missing_except:
+            error_msg.append('The following functions have fixed return types,'
+                             ' but no except clause.')
+            error_msg.append('Either add an except or append "# cannot_raise".')
+            for fname, func in missing_except:
+                error_msg.append('%s:%s' % (fname, func))
+            error_msg.extend(('', ''))
+        if error_msg:
+            self.fail('\n'.join(error_msg))




More information about the bazaar-commits mailing list