[SRU][jammy:linux-azure][PATCH v2 3/4] Revert "UBUNTU: SAUCE: swiotlb: use bitmap to track free slots"

John Cabaj john.cabaj at canonical.com
Tue Jan 28 04:18:05 UTC 2025


BugLink: https://bugs.launchpad.net/bugs/2096813

This reverts commit 9e1b646a9cac90e81b3367b5488f9c49217b85a2.

Signed-off-by: John Cabaj <john.cabaj at canonical.com>
Acked-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
Acked-by: Aaron Jauregui <aaron.jauregui at canonical.com>
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
 include/linux/swiotlb.h |  6 ++--
 kernel/dma/swiotlb.c    | 62 ++++++++++++++++++++++-------------------
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 48526da80c1e..2b4f92668bc7 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -81,6 +81,8 @@ extern enum swiotlb_force swiotlb_force;
  *		@end. For default swiotlb, this is command line adjustable via
  *		setup_io_tlb_npages.
  * @used:	The number of used IO TLB block.
+ * @list:	The free list describing the number of free entries available
+ *		from each index.
  * @index:	The index to start searching in the next round.
  * @orig_addr:	The original address corresponding to a mapped entry.
  * @alloc_size:	Size of the allocated buffer.
@@ -90,8 +92,6 @@ extern enum swiotlb_force swiotlb_force;
  * @late_alloc:	%true if allocated using the page allocator
  * @force_bounce: %true if swiotlb bouncing is forced
  * @for_alloc:  %true if the pool is used for memory allocation
- * @bitmap:	The bitmap used to track free entries. 1 in bit X means the slot
- *		indexed by X is free.
  */
 struct io_tlb_mem {
 	phys_addr_t start;
@@ -108,8 +108,8 @@ struct io_tlb_mem {
 	struct io_tlb_slot {
 		phys_addr_t orig_addr;
 		size_t alloc_size;
+		unsigned int list;
 	} *slots;
-	unsigned long *bitmap;
 };
 extern struct io_tlb_mem io_tlb_default_mem;
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 44d2f17d7779..79d09e8f2032 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -235,7 +235,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 
 	spin_lock_init(&mem->lock);
 	for (i = 0; i < mem->nslabs; i++) {
-		__set_bit(i, mem->bitmap);
+		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
 	}
@@ -270,11 +270,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	mem->bitmap = memblock_alloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);
-	if (!mem->bitmap)
-		panic("%s: Failed to allocate %lu bytes align=0x%x\n",
-		      __func__, DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);
-
 	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
 
 	if (verbose)
@@ -378,14 +373,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (WARN_ON_ONCE(mem->nslabs))
 		return -ENOMEM;
 
-	mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 		get_order(array_size(sizeof(*mem->slots), nslabs)));
-	if (!mem->slots || !mem->bitmap) {
-		kfree(mem->bitmap);
-		kfree(mem->slots);
+	if (!mem->slots)
 		return -ENOMEM;
-	}
 
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
@@ -540,7 +531,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	unsigned long max_slots = get_max_slots(boundary_mask);
 	unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
 	unsigned int nslots = nr_slots(alloc_size), stride;
-	unsigned int index, wrap, i;
+	unsigned int index, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
 
@@ -577,12 +568,6 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 			continue;
 		}
 
-		/* Start from the next segment if no enough free entries */
-		if (io_tlb_offset(index) + nslots > IO_TLB_SEGSIZE) {
-			index = wrap_index(mem, round_up(index, IO_TLB_SEGSIZE));
-			continue;
-		}
-
 		/*
 		 * If we find a slot that indicates we have 'nslots' number of
 		 * contiguous buffers, we allocate the buffers from that slot
@@ -591,8 +576,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		if (!iommu_is_span_boundary(index, nslots,
 					    nr_slots(tbl_dma_addr),
 					    max_slots)) {
-			if (find_next_zero_bit(mem->bitmap, index + nslots, index) ==
-					index + nslots)
+			if (mem->slots[index].list >= nslots)
 				goto found;
 		}
 		index = wrap_index(mem, index + stride);
@@ -604,10 +588,14 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 
 found:
 	for (i = index; i < index + nslots; i++) {
-		__clear_bit(i, mem->bitmap);
+		mem->slots[i].list = 0;
 		mem->slots[i].alloc_size =
 			alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
 	}
+	for (i = index - 1;
+	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+	     mem->slots[i].list; i--)
+		mem->slots[i].list = ++count;
 
 	/*
 	 * Update the indices to avoid searching in the next round.
@@ -681,19 +669,38 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
 	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
-	int i;
+	int count, i;
 
+	/*
+	 * Return the buffer to the free list by setting the corresponding
+	 * entries to indicate the number of contiguous entries available.
+	 * While returning the entries to the free list, we merge the entries
+	 * with slots below and above the pool being returned.
+	 */
 	spin_lock_irqsave(&mem->lock, flags);
+	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+		count = mem->slots[index + nslots].list;
+	else
+		count = 0;
+
 	/*
-	 * Return the slots to swiotlb, updating bitmap to indicate
-	 * corresponding entries are free.
+	 * Step 1: return the slots to the free list, merging the slots with
+	 * superceeding slots
 	 */
 	for (i = index + nslots - 1; i >= index; i--) {
-		__set_bit(i, mem->bitmap);
+		mem->slots[i].list = ++count;
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
 	}
 
+	/*
+	 * Step 2: merge the returned slots with the preceding slots, if
+	 * available (non zero)
+	 */
+	for (i = index - 1;
+	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
+	     i--)
+		mem->slots[i].list = ++count;
 	mem->used -= nslots;
 	spin_unlock_irqrestore(&mem->lock, flags);
 }
@@ -878,12 +885,9 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 		if (!mem)
 			return -ENOMEM;
 
-		mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
 		mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
 				     GFP_KERNEL);
-		if (!mem->slots || !mem->bitmap) {
-			kfree(mem->slots);
-			kfree(mem->bitmap);
+		if (!mem->slots) {
 			kfree(mem);
 			return -ENOMEM;
 		}
-- 
2.43.0




More information about the kernel-team mailing list