NACK: [PATCH 05/11] x86/syscall: Don't force use of indirect calls for system calls

Cengiz Can cengiz.can at canonical.com
Wed Apr 24 13:03:28 UTC 2024


On 4/19/24 23:28, Yuxuan Luo wrote:
> 
> On 4/17/24 19:53, Yuxuan Luo wrote:
>> From: Linus Torvalds <torvalds at linux-foundation.org>
>>
>> Make <asm/syscall.h> build a switch statement instead, and the 
>> compiler can
>> either decide to generate an indirect jump, or - more likely these 
>> days due
>> to mitigations - just a series of conditional branches.
>>
>> Yes, the conditional branches also have branch prediction, but the branch
>> prediction is much more controlled, in that it just causes speculatively
>> running the wrong system call (harmless), rather than speculatively 
>> running
>> possibly wrong random less controlled code gadgets.
>>
>> This doesn't mitigate other indirect calls, but the system call 
>> indirection
>> is the first and most easily triggered case.
>>
>> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
>> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
>> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
>> Reviewed-by: Josh Poimboeuf <jpoimboe at kernel.org>
>>
>> (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2)
>> [yuxuan.luo:
>> common.c:
>>   - substitute *sys_call_table[nr]() with corresponding *sys_call(regs,
>>     nr).
>>   - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll
>>     define different ia32_sys_call() in syscall_32.c using macros.
>>
>> syscall_x32.c:
>>   - x32_sys_call() should move to syscall_64.c since they both rely on
>>     __x64_ system calls.
>>
>> syscall_32.c:
>>   - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute
>>     __SYSCALL with __SYSCALL_I386 in #define lines.
>>   - For the case where CONFIG_IA32_EMULATION=n, expand regs to six
>>     variables.
>>   - Since syscall table in Focal includes arch name in syscalls prefixes
>>     already, change 'case nr: return __x64_##sym()' to '... return
>>     sym()'. This applies to syscall_64.c as well.
>>
>> syscall_64.c:
>>   - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64.
>>   - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply
>>     with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64
>>     lines and substitute __SYSCALL_X32 lines.
>> ]
>> CVE-2024-2201
>> Signed-off-by: Yuxuan Luo <yuxuan.luo at canonical.com>
>> ---
>>   arch/x86/entry/common.c        | 11 ++++-------
>>   arch/x86/entry/syscall_32.c    | 33 +++++++++++++++++++++++++++++++++
>>   arch/x86/entry/syscall_64.c    | 27 +++++++++++++++++++++++++++
>>   arch/x86/include/asm/syscall.h |  4 ++++
>>   4 files changed, 68 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>> index 721109c0cf994..dec36a7133821 100644
>> --- a/arch/x86/entry/common.c
>> +++ b/arch/x86/entry/common.c
>> @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, 
>> struct pt_regs *regs)
>>       if (likely(nr < NR_syscalls)) {
>>           nr = array_index_nospec(nr, NR_syscalls);
>> -        regs->ax = sys_call_table[nr](regs);
>> +        regs->ax = x64_sys_call(regs, nr);
>>   #ifdef CONFIG_X86_X32_ABI
>>       } else if (likely((nr & __X32_SYSCALL_BIT) &&
>>                 (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
>>           nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
>>                       X32_NR_syscalls);
>> -        regs->ax = x32_sys_call_table[nr](regs);
>> +        regs->ax = x32_sys_call(regs, nr);
>>   #endif
>>       }
>> @@ -331,7 +331,7 @@ static __always_inline void 
>> do_syscall_32_irqs_on(struct pt_regs *regs)
>>       if (likely(nr < IA32_NR_syscalls)) {
>>           nr = array_index_nospec(nr, IA32_NR_syscalls);
>>   #ifdef CONFIG_IA32_EMULATION
>> -        regs->ax = ia32_sys_call_table[nr](regs);
>> +        regs->ax = ia32_sys_call(regs, nr);
>>   #else
>>           /*
>>            * It's possible that a 32-bit syscall implementation
>> @@ -339,10 +339,7 @@ static __always_inline void 
>> do_syscall_32_irqs_on(struct pt_regs *regs)
>>            * the high bits are zero.  Make sure we zero-extend all
>>            * of the args.
>>            */
>> -        regs->ax = ia32_sys_call_table[nr](
>> -            (unsigned int)regs->bx, (unsigned int)regs->cx,
>> -            (unsigned int)regs->dx, (unsigned int)regs->si,
>> -            (unsigned int)regs->di, (unsigned int)regs->bp);
>> +        regs->ax = ia32_sys_call(regs, nr);
>>   #endif /* CONFIG_IA32_EMULATION */
>>       }
>> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
>> index 7d17b3addbbb3..e4e186ab20567 100644
>> --- a/arch/x86/entry/syscall_32.c
>> +++ b/arch/x86/entry/syscall_32.c
>> @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t 
>> ia32_sys_call_table[__NR_syscall_compat_max+1] =
>>       [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
>>   #include <asm/syscalls_32.h>
>>   };
>> +#undef __SYSCALL_I386
>> +
>> +#ifdef CONFIG_IA32_EMULATION
>> +
>> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs);
>> +
>> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> +    switch (nr) {
>> +    #include <asm/syscalls_32.h>
>> +    default: return __ia32_sys_ni_syscall(regs);
>> +    }
>> +};
>> +
>> +#else /* CONFIG_IA32_EMULATION */
>> +
>> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(    \
>> +        (unsigned int)regs->bx, (unsigned int)regs->cx,    \
>> +        (unsigned int)regs->dx, (unsigned int)regs->si,    \
>> +        (unsigned int)regs->di, (unsigned int)regs->bp);
>> +
>> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> +    switch (nr) {
>> +    #include <asm/syscalls_32.h>
>> +    default: return __ia32_sys_ni_syscall(
>> +            (unsigned int)regs->bx, (unsigned int)regs->cx,
>> +            (unsigned int)regs->dx, (unsigned int)regs->si,
>> +            (unsigned int)regs->di, (unsigned int)regs->bp);
>> +    }
>> +};
>> +
>> +#endif /* CONFIG_IA32_EMULATION */
>> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
>> index adf619a856e8d..197835442c265 100644
>> --- a/arch/x86/entry/syscall_64.c
>> +++ b/arch/x86/entry/syscall_64.c
>> @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t 
>> x32_sys_call_table[__NR_syscall_x32_max+1] = {
>>   #undef __SYSCALL_X32
>>   #endif
>> +
>> +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs);
>> +#define __SYSCALL_X32(nr, sym, qual)
>> +
>> +long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> +    switch (nr) {
>> +    #include <asm/syscalls_64.h>
>> +    default: return __x64_sys_ni_syscall(regs);
>> +    }
>> +};
>> +
>> +#ifdef CONFIG_X86_X32_ABI
>> +
>> +#define __SYSCALL_64(nr, sym, qual)
>> +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs);
>> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
>> +{
>> +    switch (nr) {
>> +    #include <asm/syscalls_64.h>
>> +    default: return __x64_sys_ni_syscall(regs);
>> +    }
>> +}
>> +#undef __SYSCALL_64
>> +#undef __SYSCALL_X32
>> +
>> +#endif /* CONFIG_X86_X32_ABI */
>> diff --git a/arch/x86/include/asm/syscall.h 
>> b/arch/x86/include/asm/syscall.h
>> index 8db3fdb6102ec..5d131e7206a26 100644
>> --- a/arch/x86/include/asm/syscall.h
>> +++ b/arch/x86/include/asm/syscall.h
>> @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[];
>>   extern const sys_call_ptr_t x32_sys_call_table[];
>>   #endif
>> +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
> Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION) 
> block above.

Judging by the self comment here, I guess we're not very confident on 
the patchset, yet.

Did we test this patchset on focal very throughly? Just a compile test 
for a dramatic change like this is absolutely not enough.

>> +extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
>> +extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
>> +
>>   /*
>>    * Only the low 32 bits of orig_ax are meaningful, so we return int.
>>    * This importantly ignores the high bits on 64-bit, so comparisons
> 




More information about the kernel-team mailing list