[SRU][N:linux-gcp][PATCH 2/2] x86/tdx: Fix arch_safe_halt() execution for TDX VMs
Ian Whitfield
ian.whitfield at canonical.com
Mon Jun 9 22:00:06 UTC 2025
From: Vishal Annapurve <vannapurve at google.com>
BugLink: https://bugs.launchpad.net/bugs/2112602
commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 upstream.
Direct HLT instruction execution causes #VEs for TDX VMs which is routed
to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE
handler will enable interrupts before TDCALL is routed to hypervisor
leading to missed wakeup events, as current TDX spec doesn't expose
interruptibility state information to allow #VE handler to selectively
enable interrupts.
Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
prevented the idle routines from executing HLT instruction in STI-shadow.
But it missed the paravirt routine which can be reached via this path
as an example:
kvm_wait() =>
safe_halt() =>
raw_safe_halt() =>
arch_safe_halt() =>
irq.safe_halt() =>
pv_native_safe_halt()
To reliably handle arch_safe_halt() for TDX VMs, introduce explicit
dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt()
routines with TDX-safe versions that execute direct TDCALL and needed
interrupt flag updates. Executing direct TDCALL brings in additional
benefit of avoiding HLT related #VEs altogether.
As tested by Ryan Afranji:
"Tested with the specjbb2015 benchmark. It has heavy lock contention which leads
to many halt calls. TDX VMs suffered a poor score before this patchset.
Verified the major performance improvement with this patchset applied."
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
Signed-off-by: Vishal Annapurve <vannapurve at google.com>
Signed-off-by: Ingo Molnar <mingo at kernel.org>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
Tested-by: Ryan Afranji <afranji at google.com>
Cc: Andy Lutomirski <luto at kernel.org>
Cc: Brian Gerst <brgerst at gmail.com>
Cc: Juergen Gross <jgross at suse.com>
Cc: H. Peter Anvin <hpa at zytor.com>
Cc: Linus Torvalds <torvalds at linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe at redhat.com>
Cc: stable at vger.kernel.org
Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
(cherry picked from commit 29f040d4efb08497165a695f40d7049bd35f846b linux-6.6.y)
Signed-off-by: Ian Whitfield <ian.whitfield at canonical.com>
---
arch/x86/Kconfig | 1 +
arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++-
arch/x86/include/asm/tdx.h | 4 ++--
arch/x86/kernel/process.c | 2 +-
4 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2c681628d2bd..0f4ea362b441 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -881,6 +881,7 @@ config INTEL_TDX_GUEST
depends on X86_64 && CPU_SUP_INTEL
depends on X86_X2APIC
depends on EFI_STUB
+ depends on PARAVIRT
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 2f85ed005c42..b8aeb3ac7d28 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -359,7 +360,7 @@ static int handle_halt(struct ve_info *ve)
return ve_instr_len(ve);
}
-void __cpuidle tdx_safe_halt(void)
+void __cpuidle tdx_halt(void)
{
const bool irq_disabled = false;
@@ -370,6 +371,16 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
+static void __cpuidle tdx_safe_halt(void)
+{
+ tdx_halt();
+ /*
+ * "__cpuidle" section doesn't support instrumentation, so stick
+ * with raw_* variant that avoids tracing hooks.
+ */
+ raw_local_irq_enable();
+}
+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_module_args args = {
@@ -1056,6 +1067,19 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+ /*
+ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
+ * will enable interrupts before HLT TDCALL invocation if executed
+ * in STI-shadow, possibly resulting in missed wakeup events.
+ *
+ * Modify all possible HLT execution paths to use TDX specific routines
+ * that directly execute TDCALL and toggle the interrupt state as
+ * needed after TDCALL completion. This also reduces HLT related #VEs
+ * in addition to having a reliable halt logic execution.
+ */
+ pv_ops.irq.safe_halt = tdx_safe_halt;
+ pv_ops.irq.halt = tdx_halt;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index eba178996d84..b5b633294061 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void);
+void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -69,7 +69,7 @@ u64 tdx_hcall_get_quote(u8 *buf, size_t size);
#else
static inline void tdx_early_init(void) { };
-static inline void tdx_safe_halt(void) { };
+static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 45a9d496fe2a..fa9092d64055 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -950,7 +950,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
- static_call_update(x86_idle, tdx_safe_halt);
+ static_call_update(x86_idle, tdx_halt);
} else
static_call_update(x86_idle, default_idle);
}
--
2.43.0
More information about the kernel-team
mailing list