Rev 3641: More safety checks around PyString_FromStringAndSize, in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/dirstate_segv_259350

John Arbash Meinel john at arbash-meinel.com
Tue Aug 19 16:41:52 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/dirstate_segv_259350

------------------------------------------------------------
revno: 3641
revision-id: john at arbash-meinel.com-20080819154150-78ta5hv2oorz0mpl
parent: pqm at pqm.ubuntu.com-20080819034437-8cr7y59abr4wemaz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate_segv_259350
timestamp: Tue 2008-08-19 10:41:50 -0500
message:
  More safety checks around PyString_FromStringAndSize,
  also make sure we have proper handling of trailing garbage in the
  C dirblock parser.
modified:
  bzrlib/_dirstate_helpers_c.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/tests/test__dirstate_helpers.py test_dirstate_helper-20070504035751-jsbn00xodv0y1eve-2
-------------- next part --------------
=== modified file 'bzrlib/_dirstate_helpers_c.pyx'
--- a/bzrlib/_dirstate_helpers_c.pyx	2007-10-10 21:18:06 +0000
+++ b/bzrlib/_dirstate_helpers_c.pyx	2008-08-19 15:41:50 +0000
@@ -59,7 +59,7 @@
     char *PyString_AsString(object p)
     char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
     object PyString_FromString(char *)
-    object PyString_FromStringAndSize(char *, int)
+    object PyString_FromStringAndSize(char *, Py_ssize_t)
     int PyString_Size(object p)
     int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
     int PyString_CheckExact(object p)
@@ -110,6 +110,13 @@
         return None
     return <char*>found - <char*>_s
 
+cdef object safe_string_from_size(char *s, Py_ssize_t size):
+    if size < 0:
+        raise AssertionError(
+            'tried to create a string with an invalid size: %d @0x%x'
+            % (size, <int>s))
+    return PyString_FromStringAndSize(s, size)
+
 
 cdef int _is_aligned(void *ptr):
     """Is this pointer aligned to an integer size offset?
@@ -485,11 +492,23 @@
         self.end_cstr = self.text_cstr + self.text_size
         self.cur_cstr = self.text_cstr
 
-    cdef char *get_next(self, int *size):
+    cdef char *get_next(self, int *size) except NULL:
         """Return a pointer to the start of the next field."""
         cdef char *next
+        cdef Py_ssize_t extra_len
+
+        if self.cur_cstr == NULL:
+            raise AssertionError('get_next() called when cur_str is NULL')
+        elif self.cur_cstr >= self.end_cstr:
+            raise AssertionError('get_next() called when there are no chars'
+                                 ' left')
         next = self.cur_cstr
-        self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr-next)
+        self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr - next)
+        if self.cur_cstr == NULL:
+            extra_len = self.end_cstr - next
+            raise AssertionError('failed to find trailing NULL (\\0).'
+                ' Trailing garbage: %r'
+                % safe_string_from_size(next, extra_len))
         size[0] = self.cur_cstr - next
         self.cur_cstr = self.cur_cstr + 1
         return next
@@ -499,7 +518,7 @@
         cdef int size
         cdef char *next
         next = self.get_next(&size)
-        return PyString_FromStringAndSize(next, size)
+        return safe_string_from_size(next, size)
 
     cdef int _init(self) except -1:
         """Get the pointer ready.
@@ -570,7 +589,7 @@
                        # <object>
                        PyString_AS_STRING_void(p_current_dirname[0]),
                        cur_size+1) != 0):
-            dirname = PyString_FromStringAndSize(dirname_cstr, cur_size)
+            dirname = safe_string_from_size(dirname_cstr, cur_size)
             p_current_dirname[0] = <void*>dirname
             new_block[0] = 1
         else:
@@ -623,7 +642,7 @@
         if cur_size != 1 or trailing[0] != c'\n':
             raise AssertionError(
                 'Bad parse, we expected to end on \\n, not: %d %s: %s'
-                % (cur_size, PyString_FromStringAndSize(trailing, cur_size),
+                % (cur_size, safe_string_from_size(trailing, cur_size),
                    ret))
         return ret
 

=== modified file 'bzrlib/tests/test__dirstate_helpers.py'
--- a/bzrlib/tests/test__dirstate_helpers.py	2007-07-18 20:30:14 +0000
+++ b/bzrlib/tests/test__dirstate_helpers.py	2008-08-19 15:41:50 +0000
@@ -691,6 +691,19 @@
         self.assertEqual(dirstate.DirState.IN_MEMORY_UNMODIFIED,
                          state._dirblock_state)
 
+    def test_trailing_garbage(self):
+        tree, state, expected = self.create_basic_dirstate()
+        # We can modify the file as long as it hasn't been read yet.
+        f = open('dirstate', 'ab')
+        try:
+            # Add bogus trailing garbage
+            f.write('bogus\n')
+        finally:
+            f.close()
+        e = self.assertRaises(AssertionError, state._read_dirblocks_if_needed)
+        # Make sure we mention the bogus characters in the error
+        self.assertContainsRe(str(e), 'bogus')
+
 
 class TestCompiledReadDirblocks(TestReadDirblocks):
     """Test the pyrex implementation of _read_dirblocks"""



More information about the bazaar-commits mailing list