[PATCH 03/20] KVM: PPC: Book3S HV: Streamline setting of reference and change bits

Leonardo Bras leonardo at linux.ibm.com
Mon Mar 11 16:50:34 UTC 2019


From: Paul Mackerras <paulus at ozlabs.org>

When using the radix MMU, we can get hypervisor page fault interrupts
with the DSISR_SET_RC bit set in DSISR/HSRR1, indicating that an
attempt to set the R (reference) or C (change) bit in a PTE atomically
failed.  Previously we would find the corresponding Linux PTE and
check the permission and dirty bits there, but this is not really
necessary since we only need to do what the hardware was trying to
do, namely set R or C atomically.  This removes the code that reads
the Linux PTE and just update the partition-scoped PTE, having first
checked that it is still present, and if the access is a write, that
the PTE still has write permission.

Furthermore, we now check whether any other relevant bits are set
in DSISR, and if there are, then we proceed with the rest of the
function in order to handle whatever condition they represent,
instead of returning to the guest as we did previously.

Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
Signed-off-by: Leonardo Bras <leonardo at linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 52 ++++++++++----------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c6fb2bfcfb8b..c5c66b13ca88 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -348,7 +348,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	unsigned long gpa, gfn, hva, pfn;
 	struct kvm_memory_slot *memslot;
 	struct page *page = NULL, *pages[1];
-	long ret, npages, ok;
+	long ret, npages;
 	unsigned int writing;
 	struct vm_area_struct *vma;
 	unsigned long flags;
@@ -400,43 +400,29 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (dsisr & DSISR_SET_RC) {
 		/*
 		 * Need to set an R or C bit in the 2nd-level tables;
-		 * if the relevant bits aren't already set in the linux
-		 * page tables, fall through to do the gup_fast to
-		 * set them in the linux page tables too.
+		 * since we are just helping out the hardware here,
+		 * it is sufficient to do what the hardware does.
 		 */
-		ok = 0;
 		pgflags = _PAGE_ACCESSED;
 		if (writing)
 			pgflags |= _PAGE_DIRTY;
-		local_irq_save(flags);
-		ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
-		if (ptep) {
-			pte = READ_ONCE(*ptep);
-			if (pte_present(pte) &&
-			    (pte_val(pte) & pgflags) == pgflags)
-				ok = 1;
-		}
-		local_irq_restore(flags);
-		if (ok) {
-			spin_lock(&kvm->mmu_lock);
-			if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
-				spin_unlock(&kvm->mmu_lock);
-				return RESUME_GUEST;
-			}
-			/*
-			 * We are walking the secondary page table here. We can do this
-			 * without disabling irq.
-			 */
-			ptep = __find_linux_pte(kvm->arch.pgtable,
-						gpa, NULL, &shift);
-			if (ptep && pte_present(*ptep)) {
-				kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
-							gpa, shift);
-				spin_unlock(&kvm->mmu_lock);
-				return RESUME_GUEST;
-			}
-			spin_unlock(&kvm->mmu_lock);
+		/*
+		 * We are walking the secondary page table here. We can do this
+		 * without disabling irq.
+		 */
+		spin_lock(&kvm->mmu_lock);
+		ptep = __find_linux_pte(kvm->arch.pgtable,
+					gpa, NULL, &shift);
+		if (ptep && pte_present(*ptep) &&
+		    (!writing || pte_write(*ptep))) {
+			kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+						gpa, shift);
+			dsisr &= ~DSISR_SET_RC;
 		}
+		spin_unlock(&kvm->mmu_lock);
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT | DSISR_SET_RC)))
+			return RESUME_GUEST;
 	}
 
 	ret = -EFAULT;
-- 
2.20.1




More information about the kernel-team mailing list