[PATCH 02/20] [Bionic] (upstream) KVM: PPC: Book3S HV: Radix page fault handler optimizations

Leonardo Bras leonardo at linux.ibm.com
Mon Mar 11 21:39:49 UTC 2019


From: Paul Mackerras <paulus at ozlabs.org>

BugLink: https://bugs.launchpad.net/bugs/1788098

This improves the handling of transparent huge pages in the radix
hypervisor page fault handler.  Previously, if a small page is faulted
in to a 2MB region of guest physical space, that means that there is
a page table pointer at the PMD level, which could never be replaced
by a leaf (2MB) PMD entry.  This adds the code to clear the PMD,
invlidate the page walk cache and free the page table page in this
situation, so that the leaf PMD entry can be created.

This also adds code to check whether a PMD or PTE being inserted is
the same as is already there (because of a race with another CPU that
faulted on the same page) and if so, we don't replace the existing
entry, meaning that we don't invalidate the PTE or PMD and do a TLB
invalidation.

Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
(cherry picked from commit c4c8a7643e74ebd7f2cfa80807562f16bb58c1d9 v4.19)
Signed-off-by: Leonardo Bras <leonardo at linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 42 +++++++++++++++++---------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d9bafe9a371..c6fb2bfcfb8b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -163,6 +163,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	asm volatile("ptesync": : :"memory");
 }
 
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+{
+	unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+
+	asm volatile("ptesync": : :"memory");
+	/* RIC=1 PRS=0 R=1 IS=2 */
+	asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
+		     : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
+	asm volatile("ptesync": : :"memory");
+}
+
 unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
 				      unsigned long clr, unsigned long set,
 				      unsigned long addr, unsigned int shift)
@@ -264,6 +275,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			ret = -EAGAIN;
 			goto out_unlock;
 		}
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
+			ret = 0;
+			goto out_unlock;
+		}
 		/* Valid 2MB page here already, remove it */
 		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
 					      ~0UL, 0, lgpa, PMD_SHIFT);
@@ -278,12 +294,13 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		}
 	} else if (level == 1 && !pmd_none(*pmd)) {
 		/*
-		 * There's a page table page here, but we wanted
-		 * to install a large page.  Tell the caller and let
-		 * it try installing a normal page if it wants.
+		 * There's a page table page here, but we wanted to
+		 * install a large page, so remove and free the page
+		 * table page.  new_ptep will be NULL since level == 1.
 		 */
-		ret = -EBUSY;
-		goto out_unlock;
+		new_ptep = pte_offset_kernel(pmd, 0);
+		pmd_clear(pmd);
+		kvmppc_radix_flush_pwc(kvm, gpa);
 	}
 	if (level == 0) {
 		if (pmd_none(*pmd)) {
@@ -294,6 +311,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		}
 		ptep = pte_offset_kernel(pmd, gpa);
 		if (pte_present(*ptep)) {
+			/* Check if someone else set the same thing */
+			if (pte_raw(*ptep) == pte_raw(pte)) {
+				ret = 0;
+				goto out_unlock;
+			}
 			/* PTE was previously valid, so invalidate it */
 			old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
 						      0, gpa, 0);
@@ -472,16 +494,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* Allocate space in the tree and write the PTE */
 	ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
-	if (ret == -EBUSY) {
-		/*
-		 * There's already a PMD where wanted to install a large page;
-		 * for now, fall back to installing a small page.
-		 */
-		level = 0;
-		pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
-		pte = pfn_pte(pfn, __pgprot(pgflags));
-		ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
-	}
 
 	if (page) {
 		if (!ret && (pgflags & _PAGE_WRITE))
-- 
2.20.1




More information about the kernel-team mailing list