[PATCH 1/2] x86/retpoline: Fill RSB on context switch for affected CPUs
Tyler Hicks
tyhicks at canonical.com
Sat Sep 1 14:11:23 UTC 2018
On 08/29/2018 02:20 PM, Tyler Hicks wrote:
> From: David Woodhouse <dwmw at amazon.co.uk>
>
> On context switch from a shallow call stack to a deeper one, as the CPU
> does 'ret' up the deeper side it may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace.
>
> This is problematic if neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel) are active, as malicious code in
> userspace may then be executed speculatively.
>
> Overwrite the CPU's return prediction stack with calls which are predicted
> to return to an infinite loop, to "capture" speculation if this
> happens. This is required both for retpoline, and also in conjunction with
> IBRS for !SMEP && !KPTI.
>
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so much
> overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.
>
> [ tglx: Added missing vendor check and slighty massaged comments and
> changelog ]
>
> [js] backport to 4.4 -- __switch_to_asm does not exist there, we
> have to patch the switch_to macros for both x86_32 and x86_64.
>
> Signed-off-by: David Woodhouse <dwmw at amazon.co.uk>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
> Acked-by: Arjan van de Ven <arjan at linux.intel.com>
> Cc: gnomes at lxorguk.ukuu.org.uk
> Cc: Rik van Riel <riel at redhat.com>
> Cc: Andi Kleen <ak at linux.intel.com>
> Cc: Josh Poimboeuf <jpoimboe at redhat.com>
> Cc: thomas.lendacky at amd.com
> Cc: Peter Zijlstra <peterz at infradead.org>
> Cc: Linus Torvalds <torvalds at linux-foundation.org>
> Cc: Jiri Kosina <jikos at kernel.org>
> Cc: Andy Lutomirski <luto at amacapital.net>
> Cc: Dave Hansen <dave.hansen at intel.com>
> Cc: Kees Cook <keescook at google.com>
> Cc: Tim Chen <tim.c.chen at linux.intel.com>
> Cc: Greg Kroah-Hartman <gregkh at linux-foundation.org>
> Cc: Paul Turner <pjt at google.com>
> Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk
> Signed-off-by: Jiri Slaby <jslaby at suse.cz>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
>
> CVE-2017-5715
I made a mistake here by not realizing that userspace-to-userspace
SpectreRSB attacks *did* receive a new CVE ID. This patch and the
corresponding Bionic and Xenial patches should be tagged with
CVE-2018-15572 instead of CVE-2017-5715.
Tyler
>
> (backported from commit c995efd5a740d9cbafbf58bde4973e8b50b4d761)
> Signed-off-by: Tyler Hicks <tyhicks at canonical.com>
> ---
> arch/x86/include/asm/cpufeature.h | 1 +
> arch/x86/include/asm/switch_to.h | 38 ++++++++++++++++++++++++++++++++++++++
> arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 75 insertions(+)
>
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index fdeda4cac75c..0c9f622c99f4 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -187,6 +187,7 @@
> #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
> #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
> #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */
> +#define X86_FEATURE_RSB_CTXSW ( 7*32+21) /* Fill RSB on context switches */
> #define X86_FEATURE_SSBD ( 7*32+22) /* Speculative Store Bypass Disable */
> #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23 ) /* Disable Speculative Store Bypass. */
> #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
> diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
> index d7f3b3b78ac3..53ff351ded61 100644
> --- a/arch/x86/include/asm/switch_to.h
> +++ b/arch/x86/include/asm/switch_to.h
> @@ -1,6 +1,8 @@
> #ifndef _ASM_X86_SWITCH_TO_H
> #define _ASM_X86_SWITCH_TO_H
>
> +#include <asm/nospec-branch.h>
> +
> struct task_struct; /* one of the stranger aspects of C forward declarations */
> __visible struct task_struct *__switch_to(struct task_struct *prev,
> struct task_struct *next);
> @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
> #define __switch_canary_iparam
> #endif /* CC_STACKPROTECTOR */
>
> +#ifdef CONFIG_RETPOLINE
> + /*
> + * When switching from a shallower to a deeper call stack
> + * the RSB may either underflow or use entries populated
> + * with userspace addresses. On CPUs where those concerns
> + * exist, overwrite the RSB with entries which capture
> + * speculative execution to prevent attack.
> + */
> +#define __retpoline_fill_return_buffer \
> + ALTERNATIVE("jmp 910f", \
> + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\
> + X86_FEATURE_RSB_CTXSW) \
> + "910:\n\t"
> +#else
> +#define __retpoline_fill_return_buffer
> +#endif
> +
> /*
> * Saving eflags is important. It switches not only IOPL between tasks,
> * it also protects other tasks from NT leaking through sysenter etc.
> @@ -46,6 +65,7 @@ do { \
> "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
> "pushl %[next_ip]\n\t" /* restore EIP */ \
> __switch_canary \
> + __retpoline_fill_return_buffer \
> "jmp __switch_to\n" /* regparm call */ \
> "1:\t" \
> "popl %%ebp\n\t" /* restore EBP */ \
> @@ -100,6 +120,23 @@ do { \
> #define __switch_canary_iparam
> #endif /* CC_STACKPROTECTOR */
>
> +#ifdef CONFIG_RETPOLINE
> + /*
> + * When switching from a shallower to a deeper call stack
> + * the RSB may either underflow or use entries populated
> + * with userspace addresses. On CPUs where those concerns
> + * exist, overwrite the RSB with entries which capture
> + * speculative execution to prevent attack.
> + */
> +#define __retpoline_fill_return_buffer \
> + ALTERNATIVE("jmp 910f", \
> + __stringify(__FILL_RETURN_BUFFER(%%r12, RSB_CLEAR_LOOPS, %%rsp)),\
> + X86_FEATURE_RSB_CTXSW) \
> + "910:\n\t"
> +#else
> +#define __retpoline_fill_return_buffer
> +#endif
> +
> /* Save restore flags to clear handle leaking NT */
> #define switch_to(prev, next, last) \
> asm volatile(SAVE_CONTEXT \
> @@ -108,6 +145,7 @@ do { \
> "call __switch_to\n\t" \
> "movq "__percpu_arg([current_task])",%%rsi\n\t" \
> __switch_canary \
> + __retpoline_fill_return_buffer \
> "movq %P[thread_info](%%rsi),%%r8\n\t" \
> "movq %%rax,%%rdi\n\t" \
> "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 74207bda3067..ad435e671bd0 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -26,6 +26,7 @@
> #include <asm/alternative.h>
> #include <asm/pgtable.h>
> #include <asm/cacheflush.h>
> +#include <asm/intel-family.h>
> #include <asm/e820.h>
>
> static double __initdata x = 4195835.0;
> @@ -323,6 +324,23 @@ disable:
> return SPECTRE_V2_CMD_NONE;
> }
>
> +/* Check for Skylake-like CPUs (for RSB handling) */
> +static bool __init is_skylake_era(void)
> +{
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
> + boot_cpu_data.x86 == 6) {
> + switch (boot_cpu_data.x86_model) {
> + case INTEL_FAM6_SKYLAKE_MOBILE:
> + case INTEL_FAM6_SKYLAKE_DESKTOP:
> + case INTEL_FAM6_SKYLAKE_X:
> + case INTEL_FAM6_KABYLAKE_MOBILE:
> + case INTEL_FAM6_KABYLAKE_DESKTOP:
> + return true;
> + }
> + }
> + return false;
> +}
> +
> static void __init spectre_v2_select_mitigation(void)
> {
> enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
> @@ -387,6 +405,24 @@ retpoline_auto:
> ibrs_supported ? "supported" : "not-supported");
>
> /*
> + * If neither SMEP or KPTI are available, there is a risk of
> + * hitting userspace addresses in the RSB after a context switch
> + * from a shallow call stack to a deeper one. To prevent this fill
> + * the entire RSB, even when using IBRS.
> + *
> + * Skylake era CPUs have a separate issue with *underflow* of the
> + * RSB, when they will predict 'ret' targets from the generic BTB.
> + * The proper mitigation for this is IBRS. If IBRS is not supported
> + * or deactivated in favour of retpolines the RSB fill on context
> + * switch is required.
> + */
> + if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
> + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");
> + }
> +
> + /*
> * If we have a full retpoline mode and then disable IBPB in kernel mode
> * we do not require both.
> */
>
More information about the kernel-team
mailing list