APPLIED: [SRU Impish] KVM: x86/mmu: do compare-and-exchange of gPTE via the user address
Stefan Bader
stefan.bader at canonical.com
Fri May 6 07:50:41 UTC 2022
On 05.05.22 21:39, Thadeu Lima de Souza Cascardo wrote:
> From: Paolo Bonzini <pbonzini at redhat.com>
>
> commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
>
> FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it
> can go through get_user_pages_fast(), but if it cannot then it tries to
> use memremap(); that is not just terribly slow, it is also wrong because
> it assumes that the VM_PFNMAP VMA is contiguous.
>
> The right way to do it would be to do the same thing as
> hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
> fix up page faults before giving up", 2016-07-05), using follow_pte()
> and fixup_user_fault() to determine the correct address to use for
> memremap(). To do this, one could for example extract hva_to_pfn()
> for use outside virt/kvm/kvm_main.c. But really there is no reason to
> do that either, because there is already a perfectly valid address to
> do the cmpxchg() on, only it is a userspace address. That means doing
> user_access_begin()/user_access_end() and writing the code in assembly
> to handle exceptions correctly. Worse, the guest PTE can be 8-byte
> even on i686 so there is the extra complication of using cmpxchg8b to
> account for. But at least it is an efficient mess.
>
> (Thanks to Linus for suggesting improvement on the inline assembly).
>
> Reported-by: Qiuhao Li <qiuhao at sysec.org>
> Reported-by: Gaoning Pan <pgn at zju.edu.cn>
> Reported-by: Yongkang Jia <kangel at zju.edu.cn>
> Reported-by: syzbot+6cde2282daa792c49ab8 at syzkaller.appspotmail.com
> Debugged-by: Tadeusz Struk <tadeusz.struk at linaro.org>
> Tested-by: Maxim Levitsky <mlevitsk at redhat.com>
> Cc: stable at vger.kernel.org
> Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
> Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> (cherry picked from commit 8771d9673e0bdb7148299f3c074667124bde6dff linux-5.15.y)
> CVE-2022-1158
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
> ---
Applied to impish:linux/master-next. Thanks.
-Stefan
> arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------
> 1 file changed, 37 insertions(+), 40 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 45160fe80098..20a269bce359 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -34,9 +34,8 @@
> #define PT_HAVE_ACCESSED_DIRTY(mmu) true
> #ifdef CONFIG_X86_64
> #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
> - #define CMPXCHG cmpxchg
> + #define CMPXCHG "cmpxchgq"
> #else
> - #define CMPXCHG cmpxchg64
> #define PT_MAX_FULL_LEVELS 2
> #endif
> #elif PTTYPE == 32
> @@ -52,7 +51,7 @@
> #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
> #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
> #define PT_HAVE_ACCESSED_DIRTY(mmu) true
> - #define CMPXCHG cmpxchg
> + #define CMPXCHG "cmpxchgl"
> #elif PTTYPE == PTTYPE_EPT
> #define pt_element_t u64
> #define guest_walker guest_walkerEPT
> @@ -65,7 +64,9 @@
> #define PT_GUEST_DIRTY_SHIFT 9
> #define PT_GUEST_ACCESSED_SHIFT 8
> #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
> - #define CMPXCHG cmpxchg64
> + #ifdef CONFIG_X86_64
> + #define CMPXCHG "cmpxchgq"
> + #endif
> #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
> #else
> #error Invalid PTTYPE value
> @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> pt_element_t __user *ptep_user, unsigned index,
> pt_element_t orig_pte, pt_element_t new_pte)
> {
> - int npages;
> - pt_element_t ret;
> - pt_element_t *table;
> - struct page *page;
> -
> - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
> - if (likely(npages == 1)) {
> - table = kmap_atomic(page);
> - ret = CMPXCHG(&table[index], orig_pte, new_pte);
> - kunmap_atomic(table);
> -
> - kvm_release_page_dirty(page);
> - } else {
> - struct vm_area_struct *vma;
> - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
> - unsigned long pfn;
> - unsigned long paddr;
> -
> - mmap_read_lock(current->mm);
> - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
> - if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
> - mmap_read_unlock(current->mm);
> - return -EFAULT;
> - }
> - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> - paddr = pfn << PAGE_SHIFT;
> - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
> - if (!table) {
> - mmap_read_unlock(current->mm);
> - return -EFAULT;
> - }
> - ret = CMPXCHG(&table[index], orig_pte, new_pte);
> - memunmap(table);
> - mmap_read_unlock(current->mm);
> - }
> + int r = -EFAULT;
> +
> + if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
> + return -EFAULT;
> +
> +#ifdef CMPXCHG
> + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
> + "mov $0, %[r]\n"
> + "setnz %b[r]\n"
> + "2:"
> + _ASM_EXTABLE_UA(1b, 2b)
> + : [ptr] "+m" (*ptep_user),
> + [old] "+a" (orig_pte),
> + [r] "+q" (r)
> + : [new] "r" (new_pte)
> + : "memory");
> +#else
> + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
> + "movl $0, %[r]\n"
> + "jz 2f\n"
> + "incl %[r]\n"
> + "2:"
> + _ASM_EXTABLE_UA(1b, 2b)
> + : [ptr] "+m" (*ptep_user),
> + [old] "+A" (orig_pte),
> + [r] "+rm" (r)
> + : [new_lo] "b" ((u32)new_pte),
> + [new_hi] "c" ((u32)(new_pte >> 32))
> + : "memory");
> +#endif
>
> - return (ret != orig_pte);
> + user_access_end();
> + return r;
> }
>
> static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20220506/3cb12041/attachment-0001.sig>
More information about the kernel-team
mailing list