[Oneiric CVE 2/3] ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL

Colin Ian King colin.king at canonical.com
Tue Feb 19 16:16:07 UTC 2013


On 19/02/13 14:18, Luis Henriques wrote:
> From: Oleg Nesterov <oleg at redhat.com>
>
> CVE-2013-0871
>
> putreg() assumes that the tracee is not running and pt_regs_access() can
> safely play with its stack.  However a killed tracee can return from
> ptrace_stop() to the low-level asm code and do RESTORE_REST, this means
> that debugger can actually read/modify the kernel stack until the tracee
> does SAVE_REST again.
>
> set_task_blockstep() can race with SIGKILL too and in some sense this
> race is even worse, the very fact the tracee can be woken up breaks the
> logic.
>
> As Linus suggested we can clear TASK_WAKEKILL around the arch_ptrace()
> call, this ensures that nobody can ever wakeup the tracee while the
> debugger looks at it.  Not only this fixes the mentioned problems, we
> can do some cleanups/simplifications in arch_ptrace() paths.
>
> Probably ptrace_unfreeze_traced() needs more callers, for example it
> makes sense to make the tracee killable for oom-killer before
> access_process_vm().
>
> While at it, add the comment into may_ptrace_stop() to explain why
> ptrace_stop() still can't rely on SIGKILL and signal_pending_state().
>
> Reported-by: Salman Qazi <sqazi at google.com>
> Reported-by: Suleiman Souhlal <suleiman at google.com>
> Suggested-by: Linus Torvalds <torvalds at linux-foundation.org>
> Signed-off-by: Oleg Nesterov <oleg at redhat.com>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> (backported from commit 9899d11f654474d2d54ea52ceaa2a1f4db3abd68)
>
> Conflicts:
> 	arch/x86/kernel/step.c
> 	kernel/ptrace.c
>
> Signed-off-by: Luis Henriques <luis.henriques at canonical.com>
> ---
>   kernel/ptrace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++-----------
>   kernel/signal.c |  5 +++++
>   2 files changed, 55 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index d77fa9e..0f5157c 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -97,9 +97,36 @@ void __ptrace_unlink(struct task_struct *child)
>   	spin_unlock(&child->sighand->siglock);
>   }
>
> -/*
> - * Check that we have indeed attached to the thing..
> - */
> +/* Ensure that nothing can wake it up, even SIGKILL */
> +static bool ptrace_freeze_traced(struct task_struct *task)
> +{
> +	bool ret = false;
> +
> +	spin_lock_irq(&task->sighand->siglock);
> +	if (task_is_traced(task) && !__fatal_signal_pending(task)) {
> +		task->state = __TASK_TRACED;
> +		ret = true;
> +	}
> +	spin_unlock_irq(&task->sighand->siglock);
> +
> +	return ret;
> +}
> +
> +static void ptrace_unfreeze_traced(struct task_struct *task)
> +{
> +	if (task->state != __TASK_TRACED)
> +		return;
> +
> +	WARN_ON(!task->ptrace || task->parent != current);
> +
> +	spin_lock_irq(&task->sighand->siglock);
> +	if (__fatal_signal_pending(task))
> +		wake_up_state(task, __TASK_TRACED);
> +	else
> +		task->state = TASK_TRACED;
> +	spin_unlock_irq(&task->sighand->siglock);
> +}
> +
>   int ptrace_check_attach(struct task_struct *child, int kill)
>   {
>   	int ret = -ESRCH;
> @@ -112,23 +139,29 @@ int ptrace_check_attach(struct task_struct *child, int kill)
>   	 * be changed by us so it's not changing right after this.
>   	 */
>   	read_lock(&tasklist_lock);
> -	if ((child->ptrace & PT_PTRACED) && child->parent == current) {
> +	if (child->ptrace && child->parent == current) {
> +		WARN_ON(child->state == __TASK_TRACED);
>   		/*
>   		 * child->sighand can't be NULL, release_task()
>   		 * does ptrace_unlink() before __exit_signal().
>   		 */
> -		spin_lock_irq(&child->sighand->siglock);
> -		WARN_ON_ONCE(task_is_stopped(child));
> -		if (task_is_traced(child) || kill)
> +		if (kill || ptrace_freeze_traced(child))
>   			ret = 0;
> -		spin_unlock_irq(&child->sighand->siglock);
>   	}
>   	read_unlock(&tasklist_lock);
>
> -	if (!ret && !kill)
> -		ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
> +	if (!ret && !kill) {
> +		if (!wait_task_inactive(child, __TASK_TRACED)) {
> +			/*
> +			 * This can only happen if may_ptrace_stop() fails and
> +			 * ptrace_stop() changes ->state back to TASK_RUNNING,
> +			 * so we should not worry about leaking __TASK_TRACED.
> +			 */
> +			WARN_ON(child->state == __TASK_TRACED);
> +			ret = -ESRCH;
> +		}
> +	}
>
> -	/* All systems go.. */
>   	return ret;
>   }
>
> @@ -777,6 +810,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
>   		goto out_put_task_struct;
>
>   	ret = arch_ptrace(child, request, addr, data);
> +	if (ret || request != PTRACE_DETACH)
> +		ptrace_unfreeze_traced(child);
>
>    out_put_task_struct:
>   	put_task_struct(child);
> @@ -915,8 +950,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
>   	}
>
>   	ret = ptrace_check_attach(child, request == PTRACE_KILL);
> -	if (!ret)
> +	if (!ret) {
>   		ret = compat_arch_ptrace(child, request, addr, data);
> +		if (ret || request != PTRACE_DETACH)
> +			ptrace_unfreeze_traced(child);
> +	}
>
>    out_put_task_struct:
>   	put_task_struct(child);
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 8b0dd5b..51f2e69 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -1669,6 +1669,10 @@ static inline int may_ptrace_stop(void)
>   	 * If SIGKILL was already sent before the caller unlocked
>   	 * ->siglock we must see ->core_state != NULL. Otherwise it
>   	 * is safe to enter schedule().
> +	 *
> +	 * This is almost outdated, a task with the pending SIGKILL can't
> +	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
> +	 * after SIGKILL was already dequeued.
>   	 */
>   	if (unlikely(current->mm->core_state) &&
>   	    unlikely(current->mm == current->parent->mm))
> @@ -1800,6 +1804,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
>   		if (gstop_done)
>   			do_notify_parent_cldstop(current, false, why);
>
> +		/* tasklist protects us from ptrace_freeze_traced() */
>   		__set_current_state(TASK_RUNNING);
>   		if (clear_code)
>   			current->exit_code = 0;
>

Acked-by: Colin Ian King <colin.king at canonical.com>




More information about the kernel-team mailing list