[PATCH 6/9] swiotlb: refactor swiotlb_tbl_map_single
Kleber Souza
kleber.souza at canonical.com
Tue Sep 21 09:48:50 UTC 2021
On 21.09.21 03:28, Khalid Elmously wrote:
> From: Christoph Hellwig <hch at lst.de>
>
> BugLink: https://bugs.launchpad.net/bugs/1943902
>
> Split out a bunch of a self-contained helpers to make the function easier
> to follow.
>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> Acked-by: Jianxiong Gao <jxgao at google.com>
> Tested-by: Jianxiong Gao <jxgao at google.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
> (backported picked from 26a7e094783d482f3e125f09945a5bb1d867b2e6)
> [ kmously: different implementation swiotlb_tbl_map_single() required
> manual removal. Also used __phys_to_dma() instead of
> phys_to_dma_unencrypted() in find_slots ]
> Signed-off-by: Khalid Elmously <khalid.elmously at canonical.com>
> ---
> kernel/dma/swiotlb.c | 184 +++++++++++++++++++++----------------------
> 1 file changed, 91 insertions(+), 93 deletions(-)
>
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index af22c3c5e488c..5a0d9d4864aca 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -453,137 +453,135 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
> }
> }
>
> -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
> - dma_addr_t tbl_dma_addr,
> - phys_addr_t orig_addr,
> - size_t mapping_size,
> - size_t alloc_size,
> - enum dma_data_direction dir,
> - unsigned long attrs)
> -{
> - unsigned long flags;
> - phys_addr_t tlb_addr;
> - unsigned int nslots, stride, index, wrap;
> - int i;
> - unsigned long mask;
> - unsigned long offset_slots;
> - unsigned long max_slots;
> - unsigned long tmp_io_tlb_used;
> -
> - if (no_iotlb_memory)
> - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> -
> - if (mem_encrypt_active())
> - pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
>
> - if (mapping_size > alloc_size) {
> - dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> - mapping_size, alloc_size);
> - return (phys_addr_t)DMA_MAPPING_ERROR;
> - }
> -
> - mask = dma_get_seg_boundary(hwdev);
> +/*
> + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
> + */
> +static inline unsigned long get_max_slots(unsigned long boundary_mask)
> +{
> + if (boundary_mask == ~0UL)
> + return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> + return nr_slots(boundary_mask + 1);
> +}
>
> - tbl_dma_addr &= mask;
> +static unsigned int wrap_index(unsigned int index)
> +{
> + if (index >= io_tlb_nslabs)
> + return 0;
> + return index;
> +}
>
> - offset_slots = nr_slots(tbl_dma_addr);
> +/*
> + * Find a suitable number of IO TLB entries size that will fit this request and
> + * allocate a buffer from that IO TLB pool.
> + */
> +static int find_slots(struct device *dev, size_t alloc_size)
> +{
> + unsigned long boundary_mask = dma_get_seg_boundary(dev);
> + dma_addr_t tbl_dma_addr =
> + __phys_to_dma(dev, io_tlb_start) & boundary_mask;
> + unsigned long max_slots = get_max_slots(boundary_mask);
> + unsigned int nslots = nr_slots(alloc_size), stride = 1;
> + unsigned int index, wrap, count = 0, i;
> + unsigned long flags;
>
> - /*
> - * Carefully handle integer overflow which can occur when mask == ~0UL.
> - */
> - max_slots = mask + 1
> - ? nr_slots(mask + 1)
> - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> + BUG_ON(!nslots);
>
> /*
> * For mappings greater than or equal to a page, we limit the stride
> * (and hence alignment) to a page size.
> */
> - nslots = nr_slots(alloc_size);
> if (alloc_size >= PAGE_SIZE)
> - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
> - else
> - stride = 1;
> -
> - BUG_ON(!nslots);
> + stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
>
> - /*
> - * Find suitable number of IO TLB entries size that will fit this
> - * request and allocate a buffer from that IO TLB pool.
> - */
> spin_lock_irqsave(&io_tlb_lock, flags);
> -
> if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
> goto not_found;
>
> - index = ALIGN(io_tlb_index, stride);
> - if (index >= io_tlb_nslabs)
> - index = 0;
> - wrap = index;
> -
> + index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
> do {
> - while (iommu_is_span_boundary(index, nslots, offset_slots,
> - max_slots)) {
> - index += stride;
> - if (index >= io_tlb_nslabs)
> - index = 0;
> - if (index == wrap)
> - goto not_found;
> - }
> -
> /*
> * If we find a slot that indicates we have 'nslots' number of
> * contiguous buffers, we allocate the buffers from that slot
> * and mark the entries as '0' indicating unavailable.
> */
> - if (io_tlb_list[index] >= nslots) {
> - int count = 0;
> -
> - for (i = index; i < (int) (index + nslots); i++)
> - io_tlb_list[i] = 0;
> - for (i = index - 1;
> - io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> - io_tlb_list[i]; i--)
> - io_tlb_list[i] = ++count;
> - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
> -
> - /*
> - * Update the indices to avoid searching in the next
> - * round.
> - */
> - io_tlb_index = ((index + nslots) < io_tlb_nslabs
> - ? (index + nslots) : 0);
> -
> - goto found;
> + if (!iommu_is_span_boundary(index, nslots,
> + nr_slots(tbl_dma_addr),
> + max_slots)) {
> + if (io_tlb_list[index] >= nslots)
> + goto found;
> }
> - index += stride;
> - if (index >= io_tlb_nslabs)
> - index = 0;
> + index = wrap_index(index + stride);
> } while (index != wrap);
>
> not_found:
> - tmp_io_tlb_used = io_tlb_used;
> -
> spin_unlock_irqrestore(&io_tlb_lock, flags);
> - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
> - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> - alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
> - return (phys_addr_t)DMA_MAPPING_ERROR;
> + return -1;
> +
> found:
> + for (i = index; i < index + nslots; i++)
> + io_tlb_list[i] = 0;
> + for (i = index - 1;
> + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> + io_tlb_list[i]; i--)
> + io_tlb_list[i] = ++count;
> +
> + /*
> + * Update the indices to avoid searching in the next round.
> + */
> + if (index + nslots < io_tlb_nslabs)
> + io_tlb_index = index + nslots;
> + else
> + io_tlb_index = 0;
> io_tlb_used += nslots;
> +
> spin_unlock_irqrestore(&io_tlb_lock, flags);
> + return index;
> +}
> +
> +phys_addr_t swiotlb_tbl_map_single(struct device *dev, dma_addr_t dma_addr,
With this refactoring, the second parameter of this function (named tbl_dma_addr
in the original function and dma_addr here) is not used anymore, the address is now
being calculated in find_slots(). This means that all callers of this function is
passing this parameter which is not used anymore.
Should we cherry-pick/backport fc0021aa340af65a0a37d77be39e22aa886a6132 ("swiotlb:
remove the tbl_dma_addr argument to swiotlb_tbl_map_single") to make this backport
cleaner and more consistent?
> + phys_addr_t orig_addr, size_t mapping_size,
> + size_t alloc_size,
> + enum dma_data_direction dir,
> + unsigned long attrs)
> +{
> + unsigned int index, i;
> + phys_addr_t tlb_addr;
> +
> + if (no_iotlb_memory)
> + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> +
> + if (mem_encrypt_active())
> + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +
> + if (mapping_size > alloc_size) {
> + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> + mapping_size, alloc_size);
> + return (phys_addr_t)DMA_MAPPING_ERROR;
> + }
> +
> + index = find_slots(dev, alloc_size);
> + if (index == -1) {
index is defined as unsigned int, so this check is broken.
This has been fixed by 95b079d8215b83b37fa59341fda92fcb9392f14a ("swiotlb: Fix the type of index").
I think we should include this fixup to this patchset.
> + if (!(attrs & DMA_ATTR_NO_WARN))
> + dev_warn_ratelimited(dev,
> + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> + alloc_size, io_tlb_nslabs, io_tlb_used);
> + return (phys_addr_t)DMA_MAPPING_ERROR;
> + }
>
> /*
> * Save away the mapping from the original address to the DMA address.
> * This is needed when we sync the memory. Then we sync the buffer if
> * needed.
> */
> - for (i = 0; i < nslots; i++)
> - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
> + for (i = 0; i < nr_slots(alloc_size); i++)
> + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
> +
> + tlb_addr = slot_addr(io_tlb_start, index);
> if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
> swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
> -
> return tlb_addr;
> }
>
>
More information about the kernel-team
mailing list