[PATCH 1/2] x86/retpoline: Fill RSB on context switch for affected CPUs

Tyler Hicks tyhicks at canonical.com
Sat Sep 1 14:11:23 UTC 2018


On 08/29/2018 02:20 PM, Tyler Hicks wrote:
> From: David Woodhouse <dwmw at amazon.co.uk>
> 
> On context switch from a shallow call stack to a deeper one, as the CPU
> does 'ret' up the deeper side it may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace.
> 
> This is problematic if neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel) are active, as malicious code in
> userspace may then be executed speculatively.
> 
> Overwrite the CPU's return prediction stack with calls which are predicted
> to return to an infinite loop, to "capture" speculation if this
> happens. This is required both for retpoline, and also in conjunction with
> IBRS for !SMEP && !KPTI.
> 
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so much
> overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.
> 
> [ tglx: Added missing vendor check and slighty massaged comments and
>   	changelog ]
> 
> [js] backport to 4.4 -- __switch_to_asm does not exist there, we
>      have to patch the switch_to macros for both x86_32 and x86_64.
> 
> Signed-off-by: David Woodhouse <dwmw at amazon.co.uk>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
> Acked-by: Arjan van de Ven <arjan at linux.intel.com>
> Cc: gnomes at lxorguk.ukuu.org.uk
> Cc: Rik van Riel <riel at redhat.com>
> Cc: Andi Kleen <ak at linux.intel.com>
> Cc: Josh Poimboeuf <jpoimboe at redhat.com>
> Cc: thomas.lendacky at amd.com
> Cc: Peter Zijlstra <peterz at infradead.org>
> Cc: Linus Torvalds <torvalds at linux-foundation.org>
> Cc: Jiri Kosina <jikos at kernel.org>
> Cc: Andy Lutomirski <luto at amacapital.net>
> Cc: Dave Hansen <dave.hansen at intel.com>
> Cc: Kees Cook <keescook at google.com>
> Cc: Tim Chen <tim.c.chen at linux.intel.com>
> Cc: Greg Kroah-Hartman <gregkh at linux-foundation.org>
> Cc: Paul Turner <pjt at google.com>
> Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk
> Signed-off-by: Jiri Slaby <jslaby at suse.cz>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> 
> CVE-2017-5715

I made a mistake here by not realizing that userspace-to-userspace
SpectreRSB attacks *did* receive a new CVE ID. This patch and the
corresponding Bionic and Xenial patches should be tagged with
CVE-2018-15572 instead of CVE-2017-5715.

Tyler

> 
> (backported from commit c995efd5a740d9cbafbf58bde4973e8b50b4d761)
> Signed-off-by: Tyler Hicks <tyhicks at canonical.com>
> ---
>  arch/x86/include/asm/cpufeature.h |  1 +
>  arch/x86/include/asm/switch_to.h  | 38 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/cpu/bugs.c        | 36 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 75 insertions(+)
> 
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index fdeda4cac75c..0c9f622c99f4 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -187,6 +187,7 @@
>  #define X86_FEATURE_HW_PSTATE	(7*32+ 8) /* AMD HW-PState */
>  #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
>  #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */
> +#define X86_FEATURE_RSB_CTXSW	( 7*32+21) /* Fill RSB on context switches */
>  #define X86_FEATURE_SSBD	( 7*32+22) /* Speculative Store Bypass Disable */
>  #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23 ) /* Disable Speculative Store Bypass. */
>  #define X86_FEATURE_LS_CFG_SSBD	( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
> diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
> index d7f3b3b78ac3..53ff351ded61 100644
> --- a/arch/x86/include/asm/switch_to.h
> +++ b/arch/x86/include/asm/switch_to.h
> @@ -1,6 +1,8 @@
>  #ifndef _ASM_X86_SWITCH_TO_H
>  #define _ASM_X86_SWITCH_TO_H
>  
> +#include <asm/nospec-branch.h>
> +
>  struct task_struct; /* one of the stranger aspects of C forward declarations */
>  __visible struct task_struct *__switch_to(struct task_struct *prev,
>  					   struct task_struct *next);
> @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
>  #define __switch_canary_iparam
>  #endif	/* CC_STACKPROTECTOR */
>  
> +#ifdef CONFIG_RETPOLINE
> +	/*
> +	 * When switching from a shallower to a deeper call stack
> +	 * the RSB may either underflow or use entries populated
> +	 * with userspace addresses. On CPUs where those concerns
> +	 * exist, overwrite the RSB with entries which capture
> +	 * speculative execution to prevent attack.
> +	 */
> +#define __retpoline_fill_return_buffer					\
> +	ALTERNATIVE("jmp 910f",						\
> +		__stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\
> +		X86_FEATURE_RSB_CTXSW)					\
> +	"910:\n\t"
> +#else
> +#define __retpoline_fill_return_buffer
> +#endif
> +
>  /*
>   * Saving eflags is important. It switches not only IOPL between tasks,
>   * it also protects other tasks from NT leaking through sysenter etc.
> @@ -46,6 +65,7 @@ do {									\
>  		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
>  		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
>  		     __switch_canary					\
> +		     __retpoline_fill_return_buffer			\
>  		     "jmp __switch_to\n"	/* regparm call  */	\
>  		     "1:\t"						\
>  		     "popl %%ebp\n\t"		/* restore EBP   */	\
> @@ -100,6 +120,23 @@ do {									\
>  #define __switch_canary_iparam
>  #endif	/* CC_STACKPROTECTOR */
>  
> +#ifdef CONFIG_RETPOLINE
> +	/*
> +	 * When switching from a shallower to a deeper call stack
> +	 * the RSB may either underflow or use entries populated
> +	 * with userspace addresses. On CPUs where those concerns
> +	 * exist, overwrite the RSB with entries which capture
> +	 * speculative execution to prevent attack.
> +	 */
> +#define __retpoline_fill_return_buffer					\
> +	ALTERNATIVE("jmp 910f",						\
> +		__stringify(__FILL_RETURN_BUFFER(%%r12, RSB_CLEAR_LOOPS, %%rsp)),\
> +		X86_FEATURE_RSB_CTXSW)					\
> +	"910:\n\t"
> +#else
> +#define __retpoline_fill_return_buffer
> +#endif
> +
>  /* Save restore flags to clear handle leaking NT */
>  #define switch_to(prev, next, last) \
>  	asm volatile(SAVE_CONTEXT					  \
> @@ -108,6 +145,7 @@ do {									\
>  	     "call __switch_to\n\t"					  \
>  	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
>  	     __switch_canary						  \
> +	     __retpoline_fill_return_buffer				  \
>  	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
>  	     "movq %%rax,%%rdi\n\t" 					  \
>  	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 74207bda3067..ad435e671bd0 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -26,6 +26,7 @@
>  #include <asm/alternative.h>
>  #include <asm/pgtable.h>
>  #include <asm/cacheflush.h>
> +#include <asm/intel-family.h>
>  #include <asm/e820.h>
>  
>  static double __initdata x = 4195835.0;
> @@ -323,6 +324,23 @@ disable:
>  	return SPECTRE_V2_CMD_NONE;
>  }
>  
> +/* Check for Skylake-like CPUs (for RSB handling) */
> +static bool __init is_skylake_era(void)
> +{
> +	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
> +	    boot_cpu_data.x86 == 6) {
> +		switch (boot_cpu_data.x86_model) {
> +		case INTEL_FAM6_SKYLAKE_MOBILE:
> +		case INTEL_FAM6_SKYLAKE_DESKTOP:
> +		case INTEL_FAM6_SKYLAKE_X:
> +		case INTEL_FAM6_KABYLAKE_MOBILE:
> +		case INTEL_FAM6_KABYLAKE_DESKTOP:
> +			return true;
> +		}
> +	}
> +	return false;
> +}
> +
>  static void __init spectre_v2_select_mitigation(void)
>  {
>  	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
> @@ -387,6 +405,24 @@ retpoline_auto:
>  	        ibrs_supported ? "supported" : "not-supported");
>  
>  	/*
> +	 * If neither SMEP or KPTI are available, there is a risk of
> +	 * hitting userspace addresses in the RSB after a context switch
> +	 * from a shallow call stack to a deeper one. To prevent this fill
> +	 * the entire RSB, even when using IBRS.
> +	 *
> +	 * Skylake era CPUs have a separate issue with *underflow* of the
> +	 * RSB, when they will predict 'ret' targets from the generic BTB.
> +	 * The proper mitigation for this is IBRS. If IBRS is not supported
> +	 * or deactivated in favour of retpolines the RSB fill on context
> +	 * switch is required.
> +	 */
> +	if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
> +	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> +		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> +		pr_info("Filling RSB on context switch\n");
> +	}
> +
> +	/*
>  	 * If we have a full retpoline mode and then disable IBPB in kernel mode
>  	 * we do not require both.
>  	 */
> 





More information about the kernel-team mailing list