[PATCH 2/2] drm/i915: Synchronize active and retire callbacks
Andrea Righi
andrea.righi at canonical.com
Thu Apr 9 16:40:43 UTC 2020
On Tue, Apr 07, 2020 at 03:27:40PM -0700, Sultan Alsawaf wrote:
> From: Sultan Alsawaf <sultan at kerneltoast.com>
>
> Active and retire callbacks can run simultaneously, causing panics and
> mayhem. The most notable case is with the intel_context_pin/unpin race
> that causes ring and page table corruption. In 5.4, this race is more
> noticeable because intel_ring_unpin() sets ring->vaddr to NULL and
> causes a clean NULL-pointer-dereference panic, but in newer kernels this
> race goes unnoticed.
>
> Here is an example of a crash caused by this race on 5.4:
> BUG: unable to handle page fault for address: 0000000000003448
> RIP: 0010:gen8_emit_flush_render+0x163/0x190
> Call Trace:
> execlists_request_alloc+0x25/0x40
> __i915_request_create+0x1f4/0x2c0
> i915_request_create+0x71/0xc0
> i915_gem_do_execbuffer+0xb98/0x1a80
> ? preempt_count_add+0x68/0xa0
> ? _raw_spin_lock+0x13/0x30
> ? _raw_spin_unlock+0x16/0x30
> i915_gem_execbuffer2_ioctl+0x1de/0x3c0
> ? i915_gem_busy_ioctl+0x7f/0x1d0
> ? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
> drm_ioctl_kernel+0xb2/0x100
> drm_ioctl+0x209/0x360
> ? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
> ksys_ioctl+0x87/0xc0
> __x64_sys_ioctl+0x16/0x20
> do_syscall_64+0x4e/0x150
> entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Protect the active and retire callbacks with their own lock to prevent
> them from running at the same time as one another.
>
> Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback")
> Cc: <stable at vger.kernel.org>
> Signed-off-by: Sultan Alsawaf <sultan at kerneltoast.com>
> Signed-off-by: Sultan Alsawaf <sultan.alsawaf at canonical.com>
> ---
> drivers/gpu/drm/i915/i915_active.c | 48 ++++++++++++++++++++----
> drivers/gpu/drm/i915/i915_active.h | 10 ++---
> drivers/gpu/drm/i915/i915_active_types.h | 3 ++
> 3 files changed, 48 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 51dc8753b527..d0df1e0881c2 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -149,8 +149,22 @@ __active_retire(struct i915_active *ref, bool lock)
> }
>
> /* After the final retire, the entire struct may be freed */
> - if (ref->retire)
> - ref->retire(ref);
> + if (ref->retire) {
> + if (ref->active) {
> + bool freed = false;
> +
> + /* Don't race with the active callback, and avoid UaF */
> + down_write(&ref->rwsem);
> + ref->freed = &freed;
So, we allocate "freed" in the stack and assign to ref->freed, so
another thread can potentially access this thread's stack... hm.. I'm
not sure if this is a safe practice.
I'm still trying to understand the logic behind this, sorry if I'm
asking something dumb... but why not doing something simple like this:
down_write(&ref->rwsem);
if (ref->retire)
ref->retire(ref);
up_write(&ref->rwsem);
> + ref->retire(ref);
> + if (!freed) {
> + ref->freed = NULL;
> + up_write(&ref->rwsem);
> + }
> + } else {
> + ref->retire(ref);
> + }
> + }
> }
>
> static void
> @@ -241,7 +255,8 @@ void __i915_active_init(struct drm_i915_private *i915,
> struct i915_active *ref,
> int (*active)(struct i915_active *ref),
> void (*retire)(struct i915_active *ref),
> - struct lock_class_key *key)
> + struct lock_class_key *key,
> + struct lock_class_key *rkey)
> {
> debug_active_init(ref);
>
> @@ -254,6 +269,9 @@ void __i915_active_init(struct drm_i915_private *i915,
> init_llist_head(&ref->preallocated_barriers);
> atomic_set(&ref->count, 0);
> __mutex_init(&ref->mutex, "i915_active", key);
> + ref->freed = NULL;
> + if (ref->active && ref->retire)
> + __init_rwsem(&ref->rwsem, "i915_active.rwsem", rkey);
> }
>
> static bool ____active_del_barrier(struct i915_active *ref,
> @@ -357,8 +375,20 @@ int i915_active_acquire(struct i915_active *ref)
> if (err)
> return err;
>
> - if (!atomic_read(&ref->count) && ref->active)
> - err = ref->active(ref);
Again, why not a simple:
down_read(&ref->rwsem);
if (!atomic_read(&ref->count) && ref->active)
err = ref->active(ref);
up_read(&ref->rwsem);
> + if (!atomic_read(&ref->count) && ref->active) {
> + if (ref->retire) {
> + /*
> + * This can be a recursive call, and the mutex above
> + * already protects from concurrent active callbacks, so
> + * a read lock fits best.
> + */
> + down_read(&ref->rwsem);
> + err = ref->active(ref);
> + up_read(&ref->rwsem);
> + } else {
> + err = ref->active(ref);
> + }
> + }
> if (!err) {
> debug_active_activate(ref);
> atomic_inc(&ref->count);
> @@ -482,15 +512,19 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
> return err;
> }
>
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> void i915_active_fini(struct i915_active *ref)
> {
> + if (ref->freed) {
> + *ref->freed = true;
> + up_write(&ref->rwsem);
> + }
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> debug_active_fini(ref);
> GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
> GEM_BUG_ON(atomic_read(&ref->count));
> mutex_destroy(&ref->mutex);
> -}
> #endif
> +}
>
> static inline bool is_idle_barrier(struct active_node *node, u64 idx)
> {
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 0ad7ef60d15f..aaec0a773551 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -362,11 +362,13 @@ void __i915_active_init(struct drm_i915_private *i915,
> struct i915_active *ref,
> int (*active)(struct i915_active *ref),
> void (*retire)(struct i915_active *ref),
> - struct lock_class_key *key);
> + struct lock_class_key *key,
> + struct lock_class_key *rkey);
> #define i915_active_init(i915, ref, active, retire) do { \
> static struct lock_class_key __key; \
> + static struct lock_class_key __rkey; \
> \
> - __i915_active_init(i915, ref, active, retire, &__key); \
> + __i915_active_init(i915, ref, active, retire, &__key, &__rkey); \
> } while (0)
>
> int i915_active_ref(struct i915_active *ref,
> @@ -393,11 +395,7 @@ i915_active_is_idle(const struct i915_active *ref)
> return !atomic_read(&ref->count);
> }
>
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> void i915_active_fini(struct i915_active *ref);
> -#else
> -static inline void i915_active_fini(struct i915_active *ref) { }
> -#endif
>
> int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> struct intel_engine_cs *engine);
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index 1854e7d168c1..6619342eaa0d 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -12,6 +12,7 @@
> #include <linux/mutex.h>
> #include <linux/rbtree.h>
> #include <linux/rcupdate.h>
> +#include <linux/rwsem.h>
>
> struct drm_i915_private;
> struct i915_active_request;
> @@ -50,6 +51,8 @@ struct i915_active {
> struct rb_root tree;
> struct mutex mutex;
> atomic_t count;
> + struct rw_semaphore rwsem;
> + bool *freed;
Why does freed need to be a pointer? Why not just a bool?
>
> unsigned long flags;
> #define I915_ACTIVE_GRAB_BIT 0
Thanks,
-Andrea
More information about the kernel-team
mailing list