Cmnt: [PATCH 05/11] x86/syscall: Don't force use of indirect calls for system calls

Yuxuan Luo yuxuan.luo at canonical.com
Fri Apr 19 20:28:50 UTC 2024


On 4/17/24 19:53, Yuxuan Luo wrote:
> From: Linus Torvalds <torvalds at linux-foundation.org>
>
> Make <asm/syscall.h> build a switch statement instead, and the compiler can
> either decide to generate an indirect jump, or - more likely these days due
> to mitigations - just a series of conditional branches.
>
> Yes, the conditional branches also have branch prediction, but the branch
> prediction is much more controlled, in that it just causes speculatively
> running the wrong system call (harmless), rather than speculatively running
> possibly wrong random less controlled code gadgets.
>
> This doesn't mitigate other indirect calls, but the system call indirection
> is the first and most easily triggered case.
>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
> Reviewed-by: Josh Poimboeuf <jpoimboe at kernel.org>
>
> (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2)
> [yuxuan.luo:
> common.c:
>   - substitute *sys_call_table[nr]() with corresponding *sys_call(regs,
>     nr).
>   - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll
>     define different ia32_sys_call() in syscall_32.c using macros.
>
> syscall_x32.c:
>   - x32_sys_call() should move to syscall_64.c since they both rely on
>     __x64_ system calls.
>
> syscall_32.c:
>   - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute
>     __SYSCALL with __SYSCALL_I386 in #define lines.
>   - For the case where CONFIG_IA32_EMULATION=n, expand regs to six
>     variables.
>   - Since syscall table in Focal includes arch name in syscalls prefixes
>     already, change 'case nr: return __x64_##sym()' to '... return
>     sym()'. This applies to syscall_64.c as well.
>
> syscall_64.c:
>   - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64.
>   - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply
>     with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64
>     lines and substitute __SYSCALL_X32 lines.
> ]
> CVE-2024-2201
> Signed-off-by: Yuxuan Luo <yuxuan.luo at canonical.com>
> ---
>   arch/x86/entry/common.c        | 11 ++++-------
>   arch/x86/entry/syscall_32.c    | 33 +++++++++++++++++++++++++++++++++
>   arch/x86/entry/syscall_64.c    | 27 +++++++++++++++++++++++++++
>   arch/x86/include/asm/syscall.h |  4 ++++
>   4 files changed, 68 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 721109c0cf994..dec36a7133821 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>   
>   	if (likely(nr < NR_syscalls)) {
>   		nr = array_index_nospec(nr, NR_syscalls);
> -		regs->ax = sys_call_table[nr](regs);
> +		regs->ax = x64_sys_call(regs, nr);
>   #ifdef CONFIG_X86_X32_ABI
>   	} else if (likely((nr & __X32_SYSCALL_BIT) &&
>   			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
>   		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
>   					X32_NR_syscalls);
> -		regs->ax = x32_sys_call_table[nr](regs);
> +		regs->ax = x32_sys_call(regs, nr);
>   #endif
>   	}
>   
> @@ -331,7 +331,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
>   	if (likely(nr < IA32_NR_syscalls)) {
>   		nr = array_index_nospec(nr, IA32_NR_syscalls);
>   #ifdef CONFIG_IA32_EMULATION
> -		regs->ax = ia32_sys_call_table[nr](regs);
> +		regs->ax = ia32_sys_call(regs, nr);
>   #else
>   		/*
>   		 * It's possible that a 32-bit syscall implementation
> @@ -339,10 +339,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
>   		 * the high bits are zero.  Make sure we zero-extend all
>   		 * of the args.
>   		 */
> -		regs->ax = ia32_sys_call_table[nr](
> -			(unsigned int)regs->bx, (unsigned int)regs->cx,
> -			(unsigned int)regs->dx, (unsigned int)regs->si,
> -			(unsigned int)regs->di, (unsigned int)regs->bp);
> +		regs->ax = ia32_sys_call(regs, nr);
>   #endif /* CONFIG_IA32_EMULATION */
>   	}
>   
> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
> index 7d17b3addbbb3..e4e186ab20567 100644
> --- a/arch/x86/entry/syscall_32.c
> +++ b/arch/x86/entry/syscall_32.c
> @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] =
>   	[0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
>   #include <asm/syscalls_32.h>
>   };
> +#undef __SYSCALL_I386
> +
> +#ifdef CONFIG_IA32_EMULATION
> +
> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs);
> +
> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> +	switch (nr) {
> +	#include <asm/syscalls_32.h>
> +	default: return __ia32_sys_ni_syscall(regs);
> +	}
> +};
> +
> +#else /* CONFIG_IA32_EMULATION */
> +
> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(	\
> +		(unsigned int)regs->bx, (unsigned int)regs->cx,	\
> +		(unsigned int)regs->dx, (unsigned int)regs->si,	\
> +		(unsigned int)regs->di, (unsigned int)regs->bp);
> +
> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> +	switch (nr) {
> +	#include <asm/syscalls_32.h>
> +	default: return __ia32_sys_ni_syscall(
> +			(unsigned int)regs->bx, (unsigned int)regs->cx,
> +			(unsigned int)regs->dx, (unsigned int)regs->si,
> +			(unsigned int)regs->di, (unsigned int)regs->bp);
> +	}
> +};
> +
> +#endif /* CONFIG_IA32_EMULATION */
> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
> index adf619a856e8d..197835442c265 100644
> --- a/arch/x86/entry/syscall_64.c
> +++ b/arch/x86/entry/syscall_64.c
> @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
>   #undef __SYSCALL_X32
>   
>   #endif
> +
> +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs);
> +#define __SYSCALL_X32(nr, sym, qual)
> +
> +long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> +	switch (nr) {
> +	#include <asm/syscalls_64.h>
> +	default: return __x64_sys_ni_syscall(regs);
> +	}
> +};
> +
> +#ifdef CONFIG_X86_X32_ABI
> +
> +#define __SYSCALL_64(nr, sym, qual)
> +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs);
> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> +	switch (nr) {
> +	#include <asm/syscalls_64.h>
> +	default: return __x64_sys_ni_syscall(regs);
> +	}
> +}
> +#undef __SYSCALL_64
> +#undef __SYSCALL_X32
> +
> +#endif /* CONFIG_X86_X32_ABI */
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 8db3fdb6102ec..5d131e7206a26 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[];
>   extern const sys_call_ptr_t x32_sys_call_table[];
>   #endif
>   
> +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION) 
block above.
> +extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
> +extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
> +
>   /*
>    * Only the low 32 bits of orig_ax are meaningful, so we return int.
>    * This importantly ignores the high bits on 64-bit, so comparisons



More information about the kernel-team mailing list