ACK: [SRU][Artful][Bionic][PATCH 1/1] kvm: vmx: Reinstate support for CPUs without virtual NMI
Khaled Elmously
khalid.elmously at canonical.com
Tue Jan 30 16:15:40 UTC 2018
On 2018-01-19 10:04:36 , Joseph Salisbury wrote:
> From: Paolo Bonzini <pbonzini at redhat.com>
>
> BugLink: http://bugs.launchpad.net/bugs/1741655
>
> This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
> virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
> only had virtual NMIs in some SKUs.
>
> The revert is not trivial because in the meanwhile there have been several
> fixes to nested NMI injection. Therefore, the entire vNMI state is moved
> to struct loaded_vmcs.
>
> Another change compared to before the patch is a simplification here:
>
> if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
> !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
> get_vmcs12(vcpu))))) {
>
> The final condition here is always true (because nested_cpu_has_virtual_nmis
> is always false) and is removed.
>
> Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
> Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
> Cc: stable at vger.kernel.org
> Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
> Signed-off-by: Radim Krčmář <rkrcmar at redhat.com>
> (cherry picked from commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f)
> Signed-off-by: Joseph Salisbury <joseph.salisbury at canonical.com>
> ---
> arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 106 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 01e73b6..d61986a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -202,6 +202,10 @@ struct loaded_vmcs {
> bool nmi_known_unmasked;
> unsigned long vmcs_host_cr3; /* May not match real cr3 */
> unsigned long vmcs_host_cr4; /* May not match real cr4 */
> + /* Support for vnmi-less CPUs */
> + int soft_vnmi_blocked;
> + ktime_t entry_time;
> + s64 vnmi_blocked_time;
> struct list_head loaded_vmcss_on_cpu_link;
> };
>
> @@ -1288,6 +1292,11 @@ static inline bool cpu_has_vmx_invpcid(void)
> SECONDARY_EXEC_ENABLE_INVPCID;
> }
>
> +static inline bool cpu_has_virtual_nmis(void)
> +{
> + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
> +}
> +
> static inline bool cpu_has_vmx_wbinvd_exit(void)
> {
> return vmcs_config.cpu_based_2nd_exec_ctrl &
> @@ -1339,11 +1348,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
> (vmcs12->secondary_vm_exec_control & bit);
> }
>
> -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
> -{
> - return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
> -}
> -
> static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
> {
> return vmcs12->pin_based_vm_exec_control &
> @@ -3676,9 +3680,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
> &_vmexit_control) < 0)
> return -EIO;
>
> - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
> - PIN_BASED_VIRTUAL_NMIS;
> - opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
> + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
> + PIN_BASED_VMX_PREEMPTION_TIMER;
> if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> &_pin_based_exec_control) < 0)
> return -EIO;
> @@ -5538,7 +5542,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>
> static void enable_nmi_window(struct kvm_vcpu *vcpu)
> {
> - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
> + if (!cpu_has_virtual_nmis() ||
> + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
> enable_irq_window(vcpu);
> return;
> }
> @@ -5578,6 +5583,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + if (!cpu_has_virtual_nmis()) {
> + /*
> + * Tracking the NMI-blocked state in software is built upon
> + * finding the next open IRQ window. This, in turn, depends on
> + * well-behaving guests: They have to keep IRQs disabled at
> + * least as long as the NMI handler runs. Otherwise we may
> + * cause NMI nesting, maybe breaking the guest. But as this is
> + * highly unlikely, we can live with the residual risk.
> + */
> + vmx->loaded_vmcs->soft_vnmi_blocked = 1;
> + vmx->loaded_vmcs->vnmi_blocked_time = 0;
> + }
> +
> ++vcpu->stat.nmi_injections;
> vmx->loaded_vmcs->nmi_known_unmasked = false;
>
> @@ -5596,6 +5614,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> bool masked;
>
> + if (!cpu_has_virtual_nmis())
> + return vmx->loaded_vmcs->soft_vnmi_blocked;
> if (vmx->loaded_vmcs->nmi_known_unmasked)
> return false;
> masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
> @@ -5607,13 +5627,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
> - if (masked)
> - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> - GUEST_INTR_STATE_NMI);
> - else
> - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
> - GUEST_INTR_STATE_NMI);
> + if (!cpu_has_virtual_nmis()) {
> + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
> + vmx->loaded_vmcs->soft_vnmi_blocked = masked;
> + vmx->loaded_vmcs->vnmi_blocked_time = 0;
> + }
> + } else {
> + vmx->loaded_vmcs->nmi_known_unmasked = !masked;
> + if (masked)
> + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> + GUEST_INTR_STATE_NMI);
> + else
> + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
> + GUEST_INTR_STATE_NMI);
> + }
> }
>
> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
> @@ -5621,6 +5648,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
> if (to_vmx(vcpu)->nested.nested_run_pending)
> return 0;
>
> + if (!cpu_has_virtual_nmis() &&
> + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
> + return 0;
> +
> return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
> (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
> | GUEST_INTR_STATE_NMI));
> @@ -6348,6 +6379,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
> * AAK134, BY25.
> */
> if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> + cpu_has_virtual_nmis() &&
> (exit_qualification & INTR_INFO_UNBLOCK_NMI))
> vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
>
> @@ -6820,7 +6852,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
> }
>
> /* Create a new VMCS */
> - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
> + item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
> if (!item)
> return NULL;
> item->vmcs02.vmcs = alloc_vmcs();
> @@ -7837,6 +7869,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
> * "blocked by NMI" bit has to be set before next VM entry.
> */
> if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> + cpu_has_virtual_nmis() &&
> (exit_qualification & INTR_INFO_UNBLOCK_NMI))
> vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> GUEST_INTR_STATE_NMI);
> @@ -8554,6 +8587,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> + if (unlikely(!cpu_has_virtual_nmis() &&
> + vmx->loaded_vmcs->soft_vnmi_blocked)) {
> + if (vmx_interrupt_allowed(vcpu)) {
> + vmx->loaded_vmcs->soft_vnmi_blocked = 0;
> + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
> + vcpu->arch.nmi_pending) {
> + /*
> + * This CPU don't support us in finding the end of an
> + * NMI-blocked window if the guest runs with IRQs
> + * disabled. So we pull the trigger after 1 s of
> + * futile waiting, but inform the user about this.
> + */
> + printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
> + "state on VCPU %d after 1 s timeout\n",
> + __func__, vcpu->vcpu_id);
> + vmx->loaded_vmcs->soft_vnmi_blocked = 0;
> + }
> + }
> +
> if (exit_reason < kvm_vmx_max_exit_handlers
> && kvm_vmx_exit_handlers[exit_reason])
> return kvm_vmx_exit_handlers[exit_reason](vcpu);
> @@ -8837,33 +8889,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
>
> idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
>
> - if (vmx->loaded_vmcs->nmi_known_unmasked)
> - return;
> - /*
> - * Can't use vmx->exit_intr_info since we're not sure what
> - * the exit reason is.
> - */
> - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
> - vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
> - /*
> - * SDM 3: 27.7.1.2 (September 2008)
> - * Re-set bit "block by NMI" before VM entry if vmexit caused by
> - * a guest IRET fault.
> - * SDM 3: 23.2.2 (September 2008)
> - * Bit 12 is undefined in any of the following cases:
> - * If the VM exit sets the valid bit in the IDT-vectoring
> - * information field.
> - * If the VM exit is due to a double fault.
> - */
> - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
> - vector != DF_VECTOR && !idtv_info_valid)
> - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> - GUEST_INTR_STATE_NMI);
> - else
> - vmx->loaded_vmcs->nmi_known_unmasked =
> - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
> - & GUEST_INTR_STATE_NMI);
> + if (cpu_has_virtual_nmis()) {
> + if (vmx->loaded_vmcs->nmi_known_unmasked)
> + return;
> + /*
> + * Can't use vmx->exit_intr_info since we're not sure what
> + * the exit reason is.
> + */
> + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
> + vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
> + /*
> + * SDM 3: 27.7.1.2 (September 2008)
> + * Re-set bit "block by NMI" before VM entry if vmexit caused by
> + * a guest IRET fault.
> + * SDM 3: 23.2.2 (September 2008)
> + * Bit 12 is undefined in any of the following cases:
> + * If the VM exit sets the valid bit in the IDT-vectoring
> + * information field.
> + * If the VM exit is due to a double fault.
> + */
> + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
> + vector != DF_VECTOR && !idtv_info_valid)
> + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> + GUEST_INTR_STATE_NMI);
> + else
> + vmx->loaded_vmcs->nmi_known_unmasked =
> + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
> + & GUEST_INTR_STATE_NMI);
> + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
> + vmx->loaded_vmcs->vnmi_blocked_time +=
> + ktime_to_ns(ktime_sub(ktime_get(),
> + vmx->loaded_vmcs->entry_time));
> }
>
> static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
> @@ -8980,6 +9037,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> unsigned long debugctlmsr, cr3, cr4;
>
> + /* Record the guest's net vcpu time for enforced NMI injections. */
> + if (unlikely(!cpu_has_virtual_nmis() &&
> + vmx->loaded_vmcs->soft_vnmi_blocked))
> + vmx->loaded_vmcs->entry_time = ktime_get();
> +
> /* Don't enter VMX if guest state is invalid, let the exit handler
> start emulation until we arrive back to a valid state */
> if (vmx->emulation_required)
Acked-by: Khalid Elmously <khalid.elmously at canonical.com>
More information about the kernel-team
mailing list