[SRU][jammy:linux-azure][PATCH v2 2/4] Revert "UBUNTU: SAUCE: swiotlb: allocate memory in a cache-friendly way"

Tue Jan 28 04:18:04 UTC 2025

BugLink: https://bugs.launchpad.net/bugs/2096813

This reverts commit 3db34e258474deb3b74432a6c95a28f3d228ba8a.

Signed-off-by: John Cabaj <john.cabaj at canonical.com>
Acked-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
Acked-by: Aaron Jauregui <aaron.jauregui at canonical.com>
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
 include/linux/swiotlb.h | 13 ++++-----
 kernel/dma/swiotlb.c    | 65 ++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b4bdd3ffb65d..48526da80c1e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -65,12 +65,6 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
-struct io_tlb_slot {
-	phys_addr_t orig_addr;
-	size_t alloc_size;
-	struct list_head node;
-};
-
 /**
  * struct io_tlb_mem - IO TLB Memory Pool Descriptor
  *
@@ -105,13 +99,16 @@ struct io_tlb_mem {
 	void *vaddr;
 	unsigned long nslabs;
 	unsigned long used;
-	struct list_head free_slots;
+	unsigned int index;
 	spinlock_t lock;
 	struct dentry *debugfs;
 	bool late_alloc;
 	bool force_bounce;
 	bool for_alloc;
-	struct io_tlb_slot *slots;
+	struct io_tlb_slot {
+		phys_addr_t orig_addr;
+		size_t alloc_size;
+	} *slots;
 	unsigned long *bitmap;
 };
 extern struct io_tlb_mem io_tlb_default_mem;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c28eed64d905..44d2f17d7779 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -227,7 +227,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 	mem->nslabs = nslabs;
 	mem->start = start;
 	mem->end = mem->start + bytes;
-	INIT_LIST_HEAD(&mem->free_slots);
+	mem->index = 0;
 	mem->late_alloc = late_alloc;
 
 	if (swiotlb_force == SWIOTLB_FORCE)
@@ -238,7 +238,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 		__set_bit(i, mem->bitmap);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
-		list_add_tail(&mem->slots[i].node, &mem->free_slots);
 	}
 
 	/*
@@ -520,6 +519,13 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
 	return nr_slots(boundary_mask + 1);
 }
 
+static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
+{
+	if (index >= mem->nslabs)
+		return 0;
+	return index;
+}
+
 /*
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
@@ -528,41 +534,54 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 			      size_t alloc_size, unsigned int alloc_align_mask)
 {
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-	struct io_tlb_slot *slot, *tmp;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
 	dma_addr_t tbl_dma_addr =
 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
 	unsigned long max_slots = get_max_slots(boundary_mask);
-	unsigned int iotlb_align_mask =
-		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
-	unsigned int nslots = nr_slots(alloc_size);
-	unsigned int index, i;
+	unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
+	unsigned int nslots = nr_slots(alloc_size), stride;
+	unsigned int index, wrap, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
 
 	BUG_ON(!nslots);
 
+	/*
+	 * Ensure that the allocation is at least slot-aligned and update
+	 * 'iotlb_align_mask' to ignore bits that will be preserved when
+	 * offsetting into the allocation.
+	 */
+	alloc_align_mask |= (IO_TLB_SIZE - 1);
+	iotlb_align_mask &= ~alloc_align_mask;
+
+	/*
+	 * For mappings with an alignment requirement don't bother looping to
+	 * unaligned slots once we found an aligned one.  For allocations of
+	 * PAGE_SIZE or larger only look for page aligned allocations.
+	 */
+	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+	if (alloc_size >= PAGE_SIZE)
+		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+	stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
+
 	spin_lock_irqsave(&mem->lock, flags);
 	if (unlikely(nslots > mem->nslabs - mem->used))
 		goto not_found;
 
-	list_for_each_entry_safe(slot, tmp, &mem->free_slots, node) {
-		index = slot - mem->slots;
+	index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
+	do {
 		if (orig_addr &&
 		    (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
 			    (orig_addr & iotlb_align_mask)) {
+			index = wrap_index(mem, index + 1);
 			continue;
 		}
 
-		if (io_tlb_offset(index) + nslots > IO_TLB_SEGSIZE)
-			continue;
-
-		/*
-		 * If requested size is larger than a page, ensure allocated
-		 * memory to be page aligned.
-		 */
-		if (alloc_size >= PAGE_SIZE && (slot_addr(tbl_dma_addr, index) & ~PAGE_MASK))
+		/* Start from the next segment if no enough free entries */
+		if (io_tlb_offset(index) + nslots > IO_TLB_SEGSIZE) {
+			index = wrap_index(mem, round_up(index, IO_TLB_SEGSIZE));
 			continue;
+		}
 
 		/*
 		 * If we find a slot that indicates we have 'nslots' number of
@@ -576,7 +595,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 					index + nslots)
 				goto found;
 		}
-	}
+		index = wrap_index(mem, index + stride);
+	} while (index != wrap);
 
 not_found:
 	spin_unlock_irqrestore(&mem->lock, flags);
@@ -587,9 +607,15 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		__clear_bit(i, mem->bitmap);
 		mem->slots[i].alloc_size =
 			alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
-		list_del(&mem->slots[i].node);
 	}
 
+	/*
+	 * Update the indices to avoid searching in the next round.
+	 */
+	if (index + nslots < mem->nslabs)
+		mem->index = index + nslots;
+	else
+		mem->index = 0;
 	mem->used += nslots;
 
 	spin_unlock_irqrestore(&mem->lock, flags);
@@ -666,7 +692,6 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 		__set_bit(i, mem->bitmap);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
-		list_add(&mem->slots[i].node, &mem->free_slots);
 	}
 
 	mem->used -= nslots;
-- 
2.43.0