[SRU][jammy:linux-azure][PATCH v2 3/4] Revert "UBUNTU: SAUCE: swiotlb: use bitmap to track free slots"
John Cabaj
john.cabaj at canonical.com
Tue Jan 28 04:18:05 UTC 2025
BugLink: https://bugs.launchpad.net/bugs/2096813
This reverts commit 9e1b646a9cac90e81b3367b5488f9c49217b85a2.
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
Acked-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
Acked-by: Aaron Jauregui <aaron.jauregui at canonical.com>
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
include/linux/swiotlb.h | 6 ++--
kernel/dma/swiotlb.c | 62 ++++++++++++++++++++++-------------------
2 files changed, 36 insertions(+), 32 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 48526da80c1e..2b4f92668bc7 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -81,6 +81,8 @@ extern enum swiotlb_force swiotlb_force;
* @end. For default swiotlb, this is command line adjustable via
* setup_io_tlb_npages.
* @used: The number of used IO TLB block.
+ * @list: The free list describing the number of free entries available
+ * from each index.
* @index: The index to start searching in the next round.
* @orig_addr: The original address corresponding to a mapped entry.
* @alloc_size: Size of the allocated buffer.
@@ -90,8 +92,6 @@ extern enum swiotlb_force swiotlb_force;
* @late_alloc: %true if allocated using the page allocator
* @force_bounce: %true if swiotlb bouncing is forced
* @for_alloc: %true if the pool is used for memory allocation
- * @bitmap: The bitmap used to track free entries. 1 in bit X means the slot
- * indexed by X is free.
*/
struct io_tlb_mem {
phys_addr_t start;
@@ -108,8 +108,8 @@ struct io_tlb_mem {
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
+ unsigned int list;
} *slots;
- unsigned long *bitmap;
};
extern struct io_tlb_mem io_tlb_default_mem;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 44d2f17d7779..79d09e8f2032 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -235,7 +235,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
spin_lock_init(&mem->lock);
for (i = 0; i < mem->nslabs; i++) {
- __set_bit(i, mem->bitmap);
+ mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
@@ -270,11 +270,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
- mem->bitmap = memblock_alloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);
- if (!mem->bitmap)
- panic("%s: Failed to allocate %lu bytes align=0x%x\n",
- __func__, DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);
-
swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
if (verbose)
@@ -378,14 +373,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (WARN_ON_ONCE(mem->nslabs))
return -ENOMEM;
- mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(array_size(sizeof(*mem->slots), nslabs)));
- if (!mem->slots || !mem->bitmap) {
- kfree(mem->bitmap);
- kfree(mem->slots);
+ if (!mem->slots)
return -ENOMEM;
- }
set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
@@ -540,7 +531,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int index, wrap, i;
+ unsigned int index, wrap, count = 0, i;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned long flags;
@@ -577,12 +568,6 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
continue;
}
- /* Start from the next segment if no enough free entries */
- if (io_tlb_offset(index) + nslots > IO_TLB_SEGSIZE) {
- index = wrap_index(mem, round_up(index, IO_TLB_SEGSIZE));
- continue;
- }
-
/*
* If we find a slot that indicates we have 'nslots' number of
* contiguous buffers, we allocate the buffers from that slot
@@ -591,8 +576,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
if (!iommu_is_span_boundary(index, nslots,
nr_slots(tbl_dma_addr),
max_slots)) {
- if (find_next_zero_bit(mem->bitmap, index + nslots, index) ==
- index + nslots)
+ if (mem->slots[index].list >= nslots)
goto found;
}
index = wrap_index(mem, index + stride);
@@ -604,10 +588,14 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
found:
for (i = index; i < index + nslots; i++) {
- __clear_bit(i, mem->bitmap);
+ mem->slots[i].list = 0;
mem->slots[i].alloc_size =
alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
}
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+ mem->slots[i].list; i--)
+ mem->slots[i].list = ++count;
/*
* Update the indices to avoid searching in the next round.
@@ -681,19 +669,38 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int i;
+ int count, i;
+ /*
+ * Return the buffer to the free list by setting the corresponding
+ * entries to indicate the number of contiguous entries available.
+ * While returning the entries to the free list, we merge the entries
+ * with slots below and above the pool being returned.
+ */
spin_lock_irqsave(&mem->lock, flags);
+ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+ count = mem->slots[index + nslots].list;
+ else
+ count = 0;
+
/*
- * Return the slots to swiotlb, updating bitmap to indicate
- * corresponding entries are free.
+ * Step 1: return the slots to the free list, merging the slots with
+ * superceeding slots
*/
for (i = index + nslots - 1; i >= index; i--) {
- __set_bit(i, mem->bitmap);
+ mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
+ /*
+ * Step 2: merge the returned slots with the preceding slots, if
+ * available (non zero)
+ */
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
+ i--)
+ mem->slots[i].list = ++count;
mem->used -= nslots;
spin_unlock_irqrestore(&mem->lock, flags);
}
@@ -878,12 +885,9 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
if (!mem)
return -ENOMEM;
- mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
GFP_KERNEL);
- if (!mem->slots || !mem->bitmap) {
- kfree(mem->slots);
- kfree(mem->bitmap);
+ if (!mem->slots) {
kfree(mem);
return -ENOMEM;
}
--
2.43.0
More information about the kernel-team
mailing list