Cmnt: [PATCH 05/11] x86/syscall: Don't force use of indirect calls for system calls
Yuxuan Luo
yuxuan.luo at canonical.com
Fri Apr 19 20:28:50 UTC 2024
On 4/17/24 19:53, Yuxuan Luo wrote:
> From: Linus Torvalds <torvalds at linux-foundation.org>
>
> Make <asm/syscall.h> build a switch statement instead, and the compiler can
> either decide to generate an indirect jump, or - more likely these days due
> to mitigations - just a series of conditional branches.
>
> Yes, the conditional branches also have branch prediction, but the branch
> prediction is much more controlled, in that it just causes speculatively
> running the wrong system call (harmless), rather than speculatively running
> possibly wrong random less controlled code gadgets.
>
> This doesn't mitigate other indirect calls, but the system call indirection
> is the first and most easily triggered case.
>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
> Reviewed-by: Josh Poimboeuf <jpoimboe at kernel.org>
>
> (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2)
> [yuxuan.luo:
> common.c:
> - substitute *sys_call_table[nr]() with corresponding *sys_call(regs,
> nr).
> - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll
> define different ia32_sys_call() in syscall_32.c using macros.
>
> syscall_x32.c:
> - x32_sys_call() should move to syscall_64.c since they both rely on
> __x64_ system calls.
>
> syscall_32.c:
> - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute
> __SYSCALL with __SYSCALL_I386 in #define lines.
> - For the case where CONFIG_IA32_EMULATION=n, expand regs to six
> variables.
> - Since syscall table in Focal includes arch name in syscalls prefixes
> already, change 'case nr: return __x64_##sym()' to '... return
> sym()'. This applies to syscall_64.c as well.
>
> syscall_64.c:
> - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64.
> - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply
> with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64
> lines and substitute __SYSCALL_X32 lines.
> ]
> CVE-2024-2201
> Signed-off-by: Yuxuan Luo <yuxuan.luo at canonical.com>
> ---
> arch/x86/entry/common.c | 11 ++++-------
> arch/x86/entry/syscall_32.c | 33 +++++++++++++++++++++++++++++++++
> arch/x86/entry/syscall_64.c | 27 +++++++++++++++++++++++++++
> arch/x86/include/asm/syscall.h | 4 ++++
> 4 files changed, 68 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 721109c0cf994..dec36a7133821 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>
> if (likely(nr < NR_syscalls)) {
> nr = array_index_nospec(nr, NR_syscalls);
> - regs->ax = sys_call_table[nr](regs);
> + regs->ax = x64_sys_call(regs, nr);
> #ifdef CONFIG_X86_X32_ABI
> } else if (likely((nr & __X32_SYSCALL_BIT) &&
> (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
> nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
> X32_NR_syscalls);
> - regs->ax = x32_sys_call_table[nr](regs);
> + regs->ax = x32_sys_call(regs, nr);
> #endif
> }
>
> @@ -331,7 +331,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
> if (likely(nr < IA32_NR_syscalls)) {
> nr = array_index_nospec(nr, IA32_NR_syscalls);
> #ifdef CONFIG_IA32_EMULATION
> - regs->ax = ia32_sys_call_table[nr](regs);
> + regs->ax = ia32_sys_call(regs, nr);
> #else
> /*
> * It's possible that a 32-bit syscall implementation
> @@ -339,10 +339,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
> * the high bits are zero. Make sure we zero-extend all
> * of the args.
> */
> - regs->ax = ia32_sys_call_table[nr](
> - (unsigned int)regs->bx, (unsigned int)regs->cx,
> - (unsigned int)regs->dx, (unsigned int)regs->si,
> - (unsigned int)regs->di, (unsigned int)regs->bp);
> + regs->ax = ia32_sys_call(regs, nr);
> #endif /* CONFIG_IA32_EMULATION */
> }
>
> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
> index 7d17b3addbbb3..e4e186ab20567 100644
> --- a/arch/x86/entry/syscall_32.c
> +++ b/arch/x86/entry/syscall_32.c
> @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] =
> [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
> #include <asm/syscalls_32.h>
> };
> +#undef __SYSCALL_I386
> +
> +#ifdef CONFIG_IA32_EMULATION
> +
> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs);
> +
> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> + switch (nr) {
> + #include <asm/syscalls_32.h>
> + default: return __ia32_sys_ni_syscall(regs);
> + }
> +};
> +
> +#else /* CONFIG_IA32_EMULATION */
> +
> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym( \
> + (unsigned int)regs->bx, (unsigned int)regs->cx, \
> + (unsigned int)regs->dx, (unsigned int)regs->si, \
> + (unsigned int)regs->di, (unsigned int)regs->bp);
> +
> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> + switch (nr) {
> + #include <asm/syscalls_32.h>
> + default: return __ia32_sys_ni_syscall(
> + (unsigned int)regs->bx, (unsigned int)regs->cx,
> + (unsigned int)regs->dx, (unsigned int)regs->si,
> + (unsigned int)regs->di, (unsigned int)regs->bp);
> + }
> +};
> +
> +#endif /* CONFIG_IA32_EMULATION */
> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
> index adf619a856e8d..197835442c265 100644
> --- a/arch/x86/entry/syscall_64.c
> +++ b/arch/x86/entry/syscall_64.c
> @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
> #undef __SYSCALL_X32
>
> #endif
> +
> +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs);
> +#define __SYSCALL_X32(nr, sym, qual)
> +
> +long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> + switch (nr) {
> + #include <asm/syscalls_64.h>
> + default: return __x64_sys_ni_syscall(regs);
> + }
> +};
> +
> +#ifdef CONFIG_X86_X32_ABI
> +
> +#define __SYSCALL_64(nr, sym, qual)
> +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs);
> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> +{
> + switch (nr) {
> + #include <asm/syscalls_64.h>
> + default: return __x64_sys_ni_syscall(regs);
> + }
> +}
> +#undef __SYSCALL_64
> +#undef __SYSCALL_X32
> +
> +#endif /* CONFIG_X86_X32_ABI */
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 8db3fdb6102ec..5d131e7206a26 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[];
> extern const sys_call_ptr_t x32_sys_call_table[];
> #endif
>
> +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION)
block above.
> +extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
> +extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
> +
> /*
> * Only the low 32 bits of orig_ax are meaningful, so we return int.
> * This importantly ignores the high bits on 64-bit, so comparisons
More information about the kernel-team
mailing list