[SRU Mantic][PATCH 3/9] x86/bhi: Add support for clearing branch history at syscall entry

Stefan Bader stefan.bader at canonical.com
Mon Apr 15 10:39:16 UTC 2024


On 15.04.24 11:48, Stefan Bader wrote:
> From: Pawan Gupta <pawan.kumar.gupta at linux.intel.com>
> 
> commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 upstream.
> 
> Branch History Injection (BHI) attacks may allow a malicious application to
> influence indirect branch prediction in kernel by poisoning the branch
> history. eIBRS isolates indirect branch targets in ring0.  The BHB can
> still influence the choice of indirect branch predictor entry, and although
> branch predictor entries are isolated between modes when eIBRS is enabled,
> the BHB itself is not isolated between modes.
> 
> Alder Lake and new processors supports a hardware control BHI_DIS_S to
> mitigate BHI.  For older processors Intel has released a software sequence
> to clear the branch history on parts that don't support BHI_DIS_S. Add
> support to execute the software sequence at syscall entry and VMexit to
> overwrite the branch history.
> 
> For now, branch history is not cleared at interrupt entry, as malicious
> applications are not believed to have sufficient control over the
> registers, since previous register state is cleared at interrupt
> entry. Researchers continue to poke at this area and it may become
> necessary to clear at interrupt entry as well in the future.
> 
> This mitigation is only defined here. It is enabled later.
> 
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta at linux.intel.com>
> Co-developed-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
> Reviewed-by: Alexandre Chartre <alexandre.chartre at oracle.com>
> Reviewed-by: Josh Poimboeuf <jpoimboe at kernel.org>
> Signed-off-by: Daniel Sneddon <daniel.sneddon at linux.intel.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> 
> CVE-2024-2201
> (cherry picked from commit eb36b0dce2138581bc6b5e39d0273cb4c96ded81 linux-6.6.y)
> Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
> ---
>   arch/x86/entry/common.c              |  4 +-
>   arch/x86/entry/entry_64.S            | 61 ++++++++++++++++++++++++++++
>   arch/x86/entry/entry_64_compat.S     | 16 ++++++++

Here I accidentally dropped
arch/x86/include/asm/cpufeatures.h   |  3 +-

which adds X86_FEATURE_CLEAR_BHB_LOOP. I got this locally amended and 
doing a test compile again.

>   arch/x86/include/asm/nospec-branch.h | 12 ++++++
>   arch/x86/include/asm/syscall.h       |  1 +
>   arch/x86/kvm/vmx/vmenter.S           |  2 +
>   6 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index a60d19228890..e72dac092245 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void)
>   }
>   
>   /**
> - * int80_emulation - 32-bit legacy syscall entry
> + * do_int80_emulation - 32-bit legacy syscall C entry from asm
>    *
>    * This entry point can be used by 32-bit and 64-bit programs to perform
>    * 32-bit system calls.  Instances of INT $0x80 can be found inline in
> @@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void)
>    *   eax:				system call number
>    *   ebx, ecx, edx, esi, edi, ebp:	arg1 - arg 6
>    */
> -DEFINE_IDTENTRY_RAW(int80_emulation)
> +__visible noinstr void do_int80_emulation(struct pt_regs *regs)
>   {
>   	int nr;
>   
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 43606de22511..f61898385961 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
>   	/* clobbers %rax, make sure it is after saving the syscall nr */
>   	IBRS_ENTER
>   	UNTRAIN_RET
> +	CLEAR_BRANCH_HISTORY
>   
>   	call	do_syscall_64		/* returns with IRQs disabled */
>   
> @@ -1538,3 +1539,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
>   	call	make_task_dead
>   SYM_CODE_END(rewind_stack_and_make_dead)
>   .popsection
> +
> +/*
> + * This sequence executes branches in order to remove user branch information
> + * from the branch history tracker in the Branch Predictor, therefore removing
> + * user influence on subsequent BTB lookups.
> + *
> + * It should be used on parts prior to Alder Lake. Newer parts should use the
> + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
> + * virtualized on newer hardware the VMM should protect against BHI attacks by
> + * setting BHI_DIS_S for the guests.
> + *
> + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
> + * and not clearing the branch history. The call tree looks like:
> + *
> + * call 1
> + *    call 2
> + *      call 2
> + *        call 2
> + *          call 2
> + * 	      call 2
> + * 	      ret
> + * 	    ret
> + *        ret
> + *      ret
> + *    ret
> + * ret
> + *
> + * This means that the stack is non-constant and ORC can't unwind it with %rsp
> + * alone.  Therefore we unconditionally set up the frame pointer, which allows
> + * ORC to unwind properly.
> + *
> + * The alignment is for performance and not for safety, and may be safely
> + * refactored in the future if needed.
> + */
> +SYM_FUNC_START(clear_bhb_loop)
> +	push	%rbp
> +	mov	%rsp, %rbp
> +	movl	$5, %ecx
> +	ANNOTATE_INTRA_FUNCTION_CALL
> +	call	1f
> +	jmp	5f
> +	.align 64, 0xcc
> +	ANNOTATE_INTRA_FUNCTION_CALL
> +1:	call	2f
> +	RET
> +	.align 64, 0xcc
> +2:	movl	$5, %eax
> +3:	jmp	4f
> +	nop
> +4:	sub	$1, %eax
> +	jnz	3b
> +	sub	$1, %ecx
> +	jnz	1b
> +	RET
> +5:	lfence
> +	pop	%rbp
> +	RET
> +SYM_FUNC_END(clear_bhb_loop)
> +EXPORT_SYMBOL_GPL(clear_bhb_loop)
> +STACK_FRAME_NON_STANDARD(clear_bhb_loop)
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index 4e88f8438706..2819b8d5737f 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
>   
>   	IBRS_ENTER
>   	UNTRAIN_RET
> +	CLEAR_BRANCH_HISTORY
>   
>   	/*
>   	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
> @@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
>   
>   	IBRS_ENTER
>   	UNTRAIN_RET
> +	CLEAR_BRANCH_HISTORY
>   
>   	movq	%rsp, %rdi
>   	call	do_fast_syscall_32
> @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
>   	ANNOTATE_NOENDBR
>   	int3
>   SYM_CODE_END(entry_SYSCALL_compat)
> +
> +/*
> + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
> + * point to C routines, however since this is a system call interface the branch
> + * history needs to be scrubbed to protect against BHI attacks, and that
> + * scrubbing needs to take place in assembly code prior to entering any C
> + * routines.
> + */
> +SYM_CODE_START(int80_emulation)
> +	ANNOTATE_NOENDBR
> +	UNWIND_HINT_FUNC
> +	CLEAR_BRANCH_HISTORY
> +	jmp do_int80_emulation
> +SYM_CODE_END(int80_emulation)
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 0396458c201f..cbb337b792a5 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -329,6 +329,14 @@
>   #endif
>   .endm
>   
> +#ifdef CONFIG_X86_64
> +.macro CLEAR_BRANCH_HISTORY
> +	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
> +.endm
> +#else
> +#define CLEAR_BRANCH_HISTORY
> +#endif
> +
>   #else /* __ASSEMBLY__ */
>   
>   #define ANNOTATE_RETPOLINE_SAFE					\
> @@ -359,6 +367,10 @@ extern void srso_alias_untrain_ret(void);
>   extern void entry_untrain_ret(void);
>   extern void entry_ibpb(void);
>   
> +#ifdef CONFIG_X86_64
> +extern void clear_bhb_loop(void);
> +#endif
> +
>   extern void (*x86_return_thunk)(void);
>   
>   #ifdef CONFIG_CALL_DEPTH_TRACKING
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index b1d2a731cf60..03bb950eba69 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -125,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
>   }
>   
>   void do_syscall_64(struct pt_regs *regs, int nr);
> +void do_int80_emulation(struct pt_regs *regs);
>   
>   #endif	/* CONFIG_X86_32 */
>   
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index be275a0410a8..e75dff1d6f97 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -272,6 +272,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
>   
>   	call vmx_spec_ctrl_restore_host
>   
> +	CLEAR_BRANCH_HISTORY
> +
>   	/* Put return value in AX */
>   	mov %_ASM_BX, %_ASM_AX
>   

-- 
- Stefan

-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xE8675DEECBEECEA3.asc
Type: application/pgp-keys
Size: 48643 bytes
Desc: OpenPGP public key
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20240415/6393cb3f/attachment-0001.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20240415/6393cb3f/attachment-0001.sig>


More information about the kernel-team mailing list