NACK: [PATCH 05/11] x86/syscall: Don't force use of indirect calls for system calls
Cengiz Can
cengiz.can at canonical.com
Wed Apr 24 13:03:28 UTC 2024
On 4/19/24 23:28, Yuxuan Luo wrote:
>
> On 4/17/24 19:53, Yuxuan Luo wrote:
>> From: Linus Torvalds <torvalds at linux-foundation.org>
>>
>> Make <asm/syscall.h> build a switch statement instead, and the
>> compiler can
>> either decide to generate an indirect jump, or - more likely these
>> days due
>> to mitigations - just a series of conditional branches.
>>
>> Yes, the conditional branches also have branch prediction, but the branch
>> prediction is much more controlled, in that it just causes speculatively
>> running the wrong system call (harmless), rather than speculatively
>> running
>> possibly wrong random less controlled code gadgets.
>>
>> This doesn't mitigate other indirect calls, but the system call
>> indirection
>> is the first and most easily triggered case.
>>
>> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
>> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
>> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
>> Reviewed-by: Josh Poimboeuf <jpoimboe at kernel.org>
>>
>> (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2)
>> [yuxuan.luo:
>> common.c:
>> - substitute *sys_call_table[nr]() with corresponding *sys_call(regs,
>> nr).
>> - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll
>> define different ia32_sys_call() in syscall_32.c using macros.
>>
>> syscall_x32.c:
>> - x32_sys_call() should move to syscall_64.c since they both rely on
>> __x64_ system calls.
>>
>> syscall_32.c:
>> - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute
>> __SYSCALL with __SYSCALL_I386 in #define lines.
>> - For the case where CONFIG_IA32_EMULATION=n, expand regs to six
>> variables.
>> - Since syscall table in Focal includes arch name in syscalls prefixes
>> already, change 'case nr: return __x64_##sym()' to '... return
>> sym()'. This applies to syscall_64.c as well.
>>
>> syscall_64.c:
>> - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64.
>> - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply
>> with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64
>> lines and substitute __SYSCALL_X32 lines.
>> ]
>> CVE-2024-2201
>> Signed-off-by: Yuxuan Luo <yuxuan.luo at canonical.com>
>> ---
>> arch/x86/entry/common.c | 11 ++++-------
>> arch/x86/entry/syscall_32.c | 33 +++++++++++++++++++++++++++++++++
>> arch/x86/entry/syscall_64.c | 27 +++++++++++++++++++++++++++
>> arch/x86/include/asm/syscall.h | 4 ++++
>> 4 files changed, 68 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>> index 721109c0cf994..dec36a7133821 100644
>> --- a/arch/x86/entry/common.c
>> +++ b/arch/x86/entry/common.c
>> @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr,
>> struct pt_regs *regs)
>> if (likely(nr < NR_syscalls)) {
>> nr = array_index_nospec(nr, NR_syscalls);
>> - regs->ax = sys_call_table[nr](regs);
>> + regs->ax = x64_sys_call(regs, nr);
>> #ifdef CONFIG_X86_X32_ABI
>> } else if (likely((nr & __X32_SYSCALL_BIT) &&
>> (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
>> nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
>> X32_NR_syscalls);
>> - regs->ax = x32_sys_call_table[nr](regs);
>> + regs->ax = x32_sys_call(regs, nr);
>> #endif
>> }
>> @@ -331,7 +331,7 @@ static __always_inline void
>> do_syscall_32_irqs_on(struct pt_regs *regs)
>> if (likely(nr < IA32_NR_syscalls)) {
>> nr = array_index_nospec(nr, IA32_NR_syscalls);
>> #ifdef CONFIG_IA32_EMULATION
>> - regs->ax = ia32_sys_call_table[nr](regs);
>> + regs->ax = ia32_sys_call(regs, nr);
>> #else
>> /*
>> * It's possible that a 32-bit syscall implementation
>> @@ -339,10 +339,7 @@ static __always_inline void
>> do_syscall_32_irqs_on(struct pt_regs *regs)
>> * the high bits are zero. Make sure we zero-extend all
>> * of the args.
>> */
>> - regs->ax = ia32_sys_call_table[nr](
>> - (unsigned int)regs->bx, (unsigned int)regs->cx,
>> - (unsigned int)regs->dx, (unsigned int)regs->si,
>> - (unsigned int)regs->di, (unsigned int)regs->bp);
>> + regs->ax = ia32_sys_call(regs, nr);
>> #endif /* CONFIG_IA32_EMULATION */
>> }
>> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
>> index 7d17b3addbbb3..e4e186ab20567 100644
>> --- a/arch/x86/entry/syscall_32.c
>> +++ b/arch/x86/entry/syscall_32.c
>> @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t
>> ia32_sys_call_table[__NR_syscall_compat_max+1] =
>> [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
>> #include <asm/syscalls_32.h>
>> };
>> +#undef __SYSCALL_I386
>> +
>> +#ifdef CONFIG_IA32_EMULATION
>> +
>> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs);
>> +
>> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> + switch (nr) {
>> + #include <asm/syscalls_32.h>
>> + default: return __ia32_sys_ni_syscall(regs);
>> + }
>> +};
>> +
>> +#else /* CONFIG_IA32_EMULATION */
>> +
>> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym( \
>> + (unsigned int)regs->bx, (unsigned int)regs->cx, \
>> + (unsigned int)regs->dx, (unsigned int)regs->si, \
>> + (unsigned int)regs->di, (unsigned int)regs->bp);
>> +
>> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> + switch (nr) {
>> + #include <asm/syscalls_32.h>
>> + default: return __ia32_sys_ni_syscall(
>> + (unsigned int)regs->bx, (unsigned int)regs->cx,
>> + (unsigned int)regs->dx, (unsigned int)regs->si,
>> + (unsigned int)regs->di, (unsigned int)regs->bp);
>> + }
>> +};
>> +
>> +#endif /* CONFIG_IA32_EMULATION */
>> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
>> index adf619a856e8d..197835442c265 100644
>> --- a/arch/x86/entry/syscall_64.c
>> +++ b/arch/x86/entry/syscall_64.c
>> @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t
>> x32_sys_call_table[__NR_syscall_x32_max+1] = {
>> #undef __SYSCALL_X32
>> #endif
>> +
>> +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs);
>> +#define __SYSCALL_X32(nr, sym, qual)
>> +
>> +long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> + switch (nr) {
>> + #include <asm/syscalls_64.h>
>> + default: return __x64_sys_ni_syscall(regs);
>> + }
>> +};
>> +
>> +#ifdef CONFIG_X86_X32_ABI
>> +
>> +#define __SYSCALL_64(nr, sym, qual)
>> +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs);
>> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> + switch (nr) {
>> + #include <asm/syscalls_64.h>
>> + default: return __x64_sys_ni_syscall(regs);
>> + }
>> +}
>> +#undef __SYSCALL_64
>> +#undef __SYSCALL_X32
>> +
>> +#endif /* CONFIG_X86_X32_ABI */
>> diff --git a/arch/x86/include/asm/syscall.h
>> b/arch/x86/include/asm/syscall.h
>> index 8db3fdb6102ec..5d131e7206a26 100644
>> --- a/arch/x86/include/asm/syscall.h
>> +++ b/arch/x86/include/asm/syscall.h
>> @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[];
>> extern const sys_call_ptr_t x32_sys_call_table[];
>> #endif
>> +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
> Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION)
> block above.
Judging by the self comment here, I guess we're not very confident on
the patchset, yet.
Did we test this patchset on focal very throughly? Just a compile test
for a dramatic change like this is absolutely not enough.
>> +extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
>> +extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
>> +
>> /*
>> * Only the low 32 bits of orig_ax are meaningful, so we return int.
>> * This importantly ignores the high bits on 64-bit, so comparisons
>
More information about the kernel-team
mailing list