[PATCH 2/5] UBUNTU: SAUCE: Support TDX+HCL (July 9, 2023)
Tim Gardner
tim.gardner at canonical.com
Mon Jul 24 17:00:14 UTC 2023
From: Dexuan Cui <decui at microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2028286
The changes will be furher cleaned up and posted to LKML.
Tested the below scenarios and the VMs were able to boot up with 128 VPs:
1) TDX with the pavavisor.
2) TDX without the pavavisor.
3) SNP with the pavavisor.
4) VBS.
5) Regular VMs.
(cherry picked from commit 9893873bdef6f1e5574f784ed6e1d9d5bc54f1d8 https://github.com/dcui/linux/commit/9893873bdef6f1e5574f784ed6e1d9d5bc54f1d8)
Signed-off-by: Dexuan Cui <decui at microsoft.com>
(cherry picked from commit 52283f363634df9b096b94634100b1c945ea60eb https://github.com/dcui/linux)
Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
---
arch/x86/coco/core.c | 5 +-
arch/x86/hyperv/hv_apic.c | 13 +++-
arch/x86/hyperv/hv_init.c | 97 ++++++++++++++++++++-----
arch/x86/hyperv/ivm.c | 124 +++++++++++++++++++++++++++-----
arch/x86/include/asm/coco.h | 1 +
arch/x86/include/asm/mshyperv.h | 15 ++--
arch/x86/kernel/cpu/mshyperv.c | 41 +++++++----
arch/x86/kernel/eisa.c | 10 +++
drivers/hv/connection.c | 11 ++-
drivers/hv/hv.c | 32 +++++----
drivers/hv/hv_common.c | 4 +-
include/asm-generic/mshyperv.h | 3 +-
include/linux/cpuhotplug.h | 1 +
13 files changed, 280 insertions(+), 77 deletions(-)
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index f4f0625691fd..a39a92efb6de 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -14,16 +14,19 @@
#include <asm/processor.h>
static enum cc_vendor vendor __ro_after_init;
+bool cc_attr_cpu_hotplug_disabled __ro_after_init = true;
static u64 cc_mask __ro_after_init;
static bool intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
case CC_ATTR_GUEST_UNROLL_STRING_IO:
- case CC_ATTR_HOTPLUG_DISABLED:
case CC_ATTR_GUEST_MEM_ENCRYPT:
case CC_ATTR_MEM_ENCRYPT:
return true;
+
+ case CC_ATTR_HOTPLUG_DISABLED:
+ return cc_attr_cpu_hotplug_disabled;
default:
return false;
}
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index fb8b2c088681..4c9be526cb02 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -173,8 +173,10 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
(exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
return true;
- if (!hv_hypercall_pg)
- return false;
+ if (!hv_hypercall_pg) {
+ if (!hv_isolation_type_tdx() || hyperv_paravisor_present)
+ return false;
+ }
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;
@@ -227,9 +229,14 @@ static bool __send_ipi_one(int cpu, int vector)
trace_hyperv_send_ipi_one(cpu, vector);
- if (!hv_hypercall_pg || (vp == VP_INVAL))
+ if (vp == VP_INVAL)
return false;
+ if (!hv_hypercall_pg) {
+ if (!hv_isolation_type_tdx() || hyperv_paravisor_present)
+ return false;
+ }
+
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 28ee240a2c90..400ec1573287 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -381,6 +381,36 @@ static void __init hv_get_partition_id(void)
local_irq_restore(flags);
}
+static int hv_write_efer(unsigned int cpu)
+{
+ unsigned long long efer;
+
+ if (!hv_isolation_type_tdx() || !hyperv_paravisor_present)
+ return 0;
+
+ /*
+ * Write EFER by force, otherwise the paravisor's hypercall
+ * handler thinks that the VP is in 32-bit mode, and the
+ * returning RIP is truncated to 32-bits, causing a fatal
+ * page fault. This is a TDX-spefic issue because it looks
+ * like the initial default value of EFER on non-boot VPs
+ * already has the EFER.LMA bit, and when the reading of
+ * EFER on a non-boot VP is the same as the value of EER
+ * on VP0, Linux doesn't write the EFER register on a
+ * non-boot VP: see the code in arch/x86/kernel/head_64.S
+ * ("Avoid writing EFER if no change was made (for TDX guest)").
+ * Also see commit 77a512e35db7 ("x86/boot: Avoid #VE during boot for TDX platforms")
+ * Work around the issue for now by force an EFER write.
+ *
+ * XXX: This is a temporary hack. Need to figure out why the
+ * initial default value of EFER on non-boot VPs is not zero.
+ */
+ rdmsrl(MSR_EFER, efer);
+ wrmsrl(MSR_EFER, efer);
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -394,10 +424,19 @@ void __init hyperv_init(void)
u64 guest_id;
union hv_x64_msr_hypercall_contents hypercall_msr;
int cpuhp;
+ int ret;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
+ if (hv_isolation_type_tdx() && hyperv_paravisor_present) {
+ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_FORCE_EFER_WRITE,
+ "x86/hyperv_write_efer",
+ hv_write_efer, NULL);
+ if (WARN_ON(ret < 0))
+ return;
+ }
+
if (hv_common_init())
return;
@@ -429,24 +468,37 @@ void __init hyperv_init(void)
goto free_vp_assist_page;
}
- cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
- hv_cpu_init, hv_cpu_die);
- if (cpuhp < 0)
- goto free_ghcb_page;
-
/*
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
* 2. Enable the hypercall and register the hypercall page
+ *
+ * A TDX VM with no paravisor uses GHCI rather than hv_hypercall_pg.
+ * When the VM needs to pass an input page to Hyper-V, the page must
+ * be a shared page, e.g. hv_post_message() uses the per-CPU shared
+ * page hyperv_pcpu_input_arg.
+ *
+ * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls,
+ * which are handled by the paravisor and a private input page must be
+ * used, e.g. see hv_mark_gpa_visibility(). The VM uses GHCI for
+ * two hypercalls: HVCALL_SIGNAL_EVENT (see vmbus_set_event()) and
+ * HVCALL_POST_MESSAGE (the input page must be a shared page, i.e.
+ * hv_post_message() uses the per-CPU shared hyperv_pcpu_input_arg.)
+ * NOTE: we must initialize hv_hypercall_pg before hv_cpu_init(),
+ * because hv_cpu_init() -> hv_common_cpu_init() -> set_memory_decrypted()
+ * -> ... -> hv_vtom_set_host_visibility() -> ... -> hv_do_hypercall()
+ * needs to call the hv_hypercall_pg.
+ */
+
+ /*
+ * In the case of TDX with the paravisor, we should write the MSR
+ * before hv_cpu_init(), which needs to call the paravisor-handled
+ * HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY.
*/
guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
- hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
-
- /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
- if (hv_isolation_type_tdx())
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
goto skip_hypercall_pg_init;
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
@@ -454,7 +506,7 @@ void __init hyperv_init(void)
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
__builtin_return_address(0));
if (hv_hypercall_pg == NULL)
- goto clean_guest_os_id;
+ goto free_ghcb_page;
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
@@ -489,6 +541,18 @@ void __init hyperv_init(void)
}
skip_hypercall_pg_init:
+ cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, hv_cpu_die);
+ if (cpuhp < 0)
+ goto clean_guest_os_id;
+
+ /*
+ * In the case of SNP with the paravisor, we must write the MSR to
+ * the hypervisor after hv_cpu_init(), which maps the hv_ghcb_pg first.
+ */
+ if (hyperv_paravisor_present)
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
+
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -528,8 +592,8 @@ void __init hyperv_init(void)
clean_guest_os_id:
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
- cpuhp_remove_state(cpuhp);
+ if (hyperv_paravisor_present)
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
free_ghcb_page:
free_percpu(hv_ghcb_pg);
free_vp_assist_page:
@@ -549,7 +613,8 @@ void hyperv_cleanup(void)
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+ if (hyperv_paravisor_present)
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
* Reset hypercall page reference before reset the page,
@@ -612,8 +677,8 @@ bool hv_is_hyperv_initialized(void)
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return false;
- /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
- if (hv_isolation_type_tdx())
+ /* A TDX guest without paravisor uses the GHCI call rather than hv_hypercall_pg */
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return true;
/*
* Verify that earlier initialization succeeded by checking
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 5b6de5449704..4e31677d1c02 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -57,7 +57,7 @@ union hv_ghcb {
static u16 hv_ghcb_version __ro_after_init;
-u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
+static u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
@@ -100,6 +100,31 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
return status;
}
+
+u64 hv_ivm_hypercall(u64 control, void *input, void *output, u32 input_size)
+{
+ if (hv_isolation_type_tdx()) {
+ u64 input_address = input ? (virt_to_phys(input) | ms_hyperv.shared_gpa_boundary) : 0;
+ u64 output_address = output ? (virt_to_phys(output) | ms_hyperv.shared_gpa_boundary) : 0;
+ return hv_tdx_hypercall(control, input_address, output_address);
+ } else if (hv_isolation_type_snp()) {
+ return hv_ghcb_hypercall(control, input, output, input_size);
+ } else {
+ return HV_STATUS_INVALID_HYPERCALL_CODE;
+ }
+}
+
+u64 hv_tdx_hypercall_fast(u64 control, u64 input)
+{
+ u64 input_address = input;
+ u64 output_address = 0;
+
+ return hv_tdx_hypercall(control | HV_HYPERCALL_FAST_BIT,
+ input_address, output_address);
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall_fast);
+
+
static inline u64 rd_ghcb_msr(void)
{
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
@@ -174,7 +199,38 @@ bool hv_ghcb_negotiate_protocol(void)
return true;
}
-void hv_ghcb_msr_write(u64 msr, u64 value)
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+
+static void hv_tdx_read_msr(u64 msr, u64 *val)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_MSR_READ,
+ .r12 = msr,
+ };
+
+ u64 ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+ if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret))
+ *val = 0;
+ else
+ *val = args.r11;
+}
+
+static void hv_tdx_write_msr(u64 msr, u64 val)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_MSR_WRITE,
+ .r12 = msr,
+ .r13 = val,
+ };
+
+ u64 ret =__tdx_hypercall(&args, 0);
+ WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret);
+}
+
+static void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
@@ -202,9 +258,17 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
-void hv_ghcb_msr_read(u64 msr, u64 *value)
+void hv_ivm_msr_write(u64 msr, u64 value)
+{
+ if (hv_isolation_type_tdx())
+ hv_tdx_write_msr(msr, value);
+ else if (hv_isolation_type_snp())
+ hv_ghcb_msr_write(msr, value);
+}
+EXPORT_SYMBOL_GPL(hv_ivm_msr_write);
+
+static void hv_ghcb_msr_read(u64 msr, u64 *value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
@@ -234,7 +298,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
| ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
#ifdef CONFIG_INTEL_TDX_GUEST
DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
@@ -259,6 +322,17 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
#endif
+void hv_ivm_msr_read(u64 msr, u64 *value)
+{
+ if (hv_isolation_type_tdx())
+ hv_tdx_read_msr(msr, value);
+ else if (hv_isolation_type_snp())
+ hv_ghcb_msr_read(msr, value);
+}
+EXPORT_SYMBOL_GPL(hv_ivm_msr_read);
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct hv_gpa_range_for_visibility,
+ hv_gpa_range_for_visibility);
/*
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
*
@@ -266,10 +340,10 @@ EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
* needs to set memory visible to host via hvcall before sharing memory
* with host.
*/
-static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
enum hv_mem_host_visibility visibility)
{
- struct hv_gpa_range_for_visibility **input_pcpu, *input;
+ struct hv_gpa_range_for_visibility *input;
u16 pages_processed;
u64 hv_status;
unsigned long flags;
@@ -285,14 +359,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
}
local_irq_save(flags);
- input_pcpu = (struct hv_gpa_range_for_visibility **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
- input = *input_pcpu;
- if (unlikely(!input)) {
- local_irq_restore(flags);
- return -EINVAL;
- }
-
+ /*
+ * The page should be a private page, which is passed to the pavavisor
+ * and is not shared with the hypervisor. Note: we shouldn't use the
+ * hyperv_pcpu_input_arg, which is a shared page in the case of
+ * a TDX VM with the pavavisor.
+ */
+ input = this_cpu_ptr(&hv_gpa_range_for_visibility);
input->partition_id = HV_PARTITION_ID_SELF;
input->host_visibility = visibility;
input->reserved0 = 0;
@@ -381,13 +454,30 @@ static bool hv_is_private_mmio(u64 addr)
void __init hv_vtom_init(void)
{
+ enum hv_isolation_type type = hv_get_isolation_type();
/*
* By design, a VM using vTOM doesn't see the SEV setting,
* so SEV initialization is bypassed and sev_status isn't set.
* Set it here to indicate a vTOM VM.
*/
- sev_status = MSR_AMD64_SNP_VTOM;
- cc_set_vendor(CC_VENDOR_AMD);
+ switch (type) {
+ case HV_ISOLATION_TYPE_VBS:
+ fallthrough;
+
+ case HV_ISOLATION_TYPE_SNP:
+ sev_status = MSR_AMD64_SNP_VTOM;
+ cc_set_vendor(CC_VENDOR_AMD);
+ break;
+
+ case HV_ISOLATION_TYPE_TDX:
+ cc_set_vendor(CC_VENDOR_INTEL);
+ cc_attr_cpu_hotplug_disabled = false;
+ break;
+
+ default:
+ panic("hv_vtom_init: unsupported isolation type %d\n", type);
+ }
+
cc_set_mask(ms_hyperv.shared_gpa_boundary);
physical_mask &= ms_hyperv.shared_gpa_boundary - 1;
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index d2c6a2e8d04d..2bab556b7092 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -12,6 +12,7 @@ enum cc_vendor {
void cc_set_vendor(enum cc_vendor v);
void cc_set_mask(u64 mask);
+extern bool cc_attr_cpu_hotplug_disabled;
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
u64 cc_mkenc(u64 val);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 710b9e3cf2c7..734e94f4d3a8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -33,6 +33,7 @@ void hyperv_vector_handler(struct pt_regs *regs);
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
+extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
@@ -59,7 +60,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx())
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control,
cc_mkdec(input_address),
cc_mkdec(output_address));
@@ -106,7 +107,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
u64 hv_status;
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx())
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control, input1, 0);
{
@@ -154,7 +155,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
u64 hv_status;
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx())
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control, input1, input2);
{
@@ -236,14 +237,14 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
#ifdef CONFIG_AMD_MEM_ENCRYPT
-void hv_ghcb_msr_write(u64 msr, u64 value);
-void hv_ghcb_msr_read(u64 msr, u64 *value);
+void hv_ivm_msr_write(u64 msr, u64 value);
+void hv_ivm_msr_read(u64 msr, u64 *value);
bool hv_ghcb_negotiate_protocol(void);
void hv_ghcb_terminate(unsigned int set, unsigned int reason);
void hv_vtom_init(void);
#else
-static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
-static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
+static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
static inline void hv_vtom_init(void) {}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9aad261d2843..63223d40aa03 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -39,6 +39,10 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+bool hyperv_paravisor_present;
+/* The variable is used in modules via hv_do_hypercall() */
+EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
+
#if IS_ENABLED(CONFIG_HYPERV)
static inline unsigned int hv_get_nested_reg(unsigned int reg)
{
@@ -64,8 +68,8 @@ u64 hv_get_non_nested_register(unsigned int reg)
{
u64 value;
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
+ if (hv_is_synic_reg(reg) && hyperv_paravisor_present)
+ hv_ivm_msr_read(reg, &value);
else
rdmsrl(reg, value);
return value;
@@ -74,8 +78,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
void hv_set_non_nested_register(unsigned int reg, u64 value)
{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
+ if (hv_is_synic_reg(reg) && hyperv_paravisor_present) {
+ hv_ivm_msr_write(reg, value);
/* Write proxy bit via wrmsl instruction */
if (reg >= HV_REGISTER_SINT0 &&
@@ -424,6 +428,8 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.shared_gpa_boundary =
BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
+ hyperv_paravisor_present = ms_hyperv.paravisor_present;
+
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
@@ -434,17 +440,24 @@ static void __init ms_hyperv_init_platform(void)
hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
static_branch_enable(&isolation_type_tdx);
- /*
- * The GPAs of SynIC Event/Message pages and VMBus
- * Moniter pages need to be added by this offset.
- */
- ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
+ /* A TDX VM must use x2APIC and doesn't use lazy EOI. */
+ ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
+
+ if (!hyperv_paravisor_present) {
+ /*
+ * The GPAs of SynIC Event/Message pages and VMBus
+ * Moniter pages need to be added by this offset.
+ */
+ ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
+
+ /* HV_REGISTER_CRASH_CTL is unsupported */
+ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
- /* HV_REGISTER_CRASH_CTL is unsupported */
- ms_hyperv.misc_features &=
- ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ /* Don't trust Hyper-V's TLB-flushing hypercalls */
+ ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
- x86_init.acpi.reduced_hw_early_init = reduced_hw_init;
+ x86_init.acpi.reduced_hw_early_init = reduced_hw_init;
+ }
}
}
@@ -515,7 +528,7 @@ static void __init ms_hyperv_init_platform(void)
#if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
- (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP))
+ hyperv_paravisor_present)
hv_vtom_init();
/*
* Setup the hook to get control post apic initialization.
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b0449..715b41968420 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -8,10 +8,20 @@
#include <xen/xen.h>
+extern bool hyperv_paravisor_present;
+
static __init int eisa_bus_probe(void)
{
void __iomem *p;
+ /*
+ * It looks like Hyper-V hasn't emulated this MMIO access yet for a TDX
+ * VM with the pavavisor: in such a VM, the "readl(p)" below causes a
+ * soft lockup. Work around the issue for now.
+ */
+ if (hyperv_paravisor_present)
+ return 0;
+
if (xen_pv_domain() && !xen_initial_domain())
return 0;
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 5978e9dbc286..91837f72e7e3 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -483,10 +483,17 @@ void vmbus_set_event(struct vmbus_channel *channel)
++channel->sig_events;
+ if (!hyperv_paravisor_present) {
+ hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
+ return;
+ }
+
if (hv_isolation_type_snp())
- hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
+ hv_ivm_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
NULL, sizeof(channel->sig_event));
+ else if (hv_isolation_type_tdx())
+ hv_tdx_hypercall_fast(HVCALL_SIGNAL_EVENT, channel->sig_event);
else
- hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
+ WARN_ON_ONCE(1);
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a6ecf742534f..a5d388f3706c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -100,8 +100,8 @@ int hv_post_message(union hv_connection_id connection_id,
aligned_msg->payload_size = payload_size;
memcpy((void *)aligned_msg->payload, payload, payload_size);
- if (hv_isolation_type_snp())
- status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
+ if (hyperv_paravisor_present)
+ status = hv_ivm_hypercall(HVCALL_POST_MESSAGE,
(void *)aligned_msg, NULL,
sizeof(*aligned_msg));
else
@@ -146,7 +146,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp() && !hv_root_partition) {
+ if (!hyperv_paravisor_present && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -162,7 +162,8 @@ int hv_synic_alloc(void)
}
}
- if (hv_isolation_type_tdx()) {
+ /* It's better to leak the page if the decryption fails. */
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present) {
ret = set_memory_decrypted(
(unsigned long)hv_cpu->synic_message_page, 1);
if (ret) {
@@ -198,12 +199,15 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- if (hv_isolation_type_tdx()) {
- ret = set_memory_encrypted(
- (unsigned long)hv_cpu->synic_message_page, 1);
- if (ret) {
- pr_err("Failed to encrypt SYNIC msg page\n");
- continue;
+ /* It's better to leak the page if the encryption fails. */
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present) {
+ if (hv_cpu->synic_message_page) {
+ ret = set_memory_encrypted((unsigned long)
+ hv_cpu->synic_message_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
+ hv_cpu->synic_message_page = NULL;
+ }
}
ret = set_memory_encrypted(
@@ -241,7 +245,7 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
simp.simp_enabled = 1;
- if (hv_isolation_type_snp() || hv_root_partition) {
+ if (hyperv_paravisor_present || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
@@ -264,7 +268,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp() || hv_root_partition) {
+ if (hyperv_paravisor_present || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
@@ -351,7 +355,7 @@ void hv_synic_disable_regs(unsigned int cpu)
* addresses.
*/
simp.simp_enabled = 0;
- if (hv_isolation_type_snp() || hv_root_partition) {
+ if (hyperv_paravisor_present || hv_root_partition) {
iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
@@ -363,7 +367,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 0;
- if (hv_isolation_type_snp() || hv_root_partition) {
+ if (hyperv_paravisor_present || hv_root_partition) {
iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 7eb25345c904..6c1fcfc6894a 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -331,8 +331,8 @@ void __weak hyperv_cleanup(void)
}
EXPORT_SYMBOL_GPL(hyperv_cleanup);
-u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
+u64 __weak hv_ivm_hypercall(u64 control, void *input, void *output, u32 input_size)
{
return HV_STATUS_INVALID_PARAMETER;
}
-EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
+EXPORT_SYMBOL_GPL(hv_ivm_hypercall);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 3e48cdc02b74..e7e3445e99b7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -284,7 +284,8 @@ enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
bool hv_set_memory_enc_dec_needed(void);
bool hv_isolation_type_snp(void);
-u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
+u64 hv_ivm_hypercall(u64 control, void *input, void *output, u32 input_size);
+u64 hv_tdx_hypercall_fast(u64 control, u64 input);
void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0cd429ccfc7f..fb3299235fc7 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
*/
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
+ CPUHP_AP_HYPERV_FORCE_EFER_WRITE,
CPUHP_AP_CACHECTRL_STARTING,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_RCUTREE_DYING,
--
2.34.1
More information about the kernel-team
mailing list