[PATCH 6/9] swiotlb: refactor swiotlb_tbl_map_single

Kleber Souza kleber.souza at canonical.com
Tue Sep 21 09:48:50 UTC 2021


On 21.09.21 03:28, Khalid Elmously wrote:
> From: Christoph Hellwig <hch at lst.de>
> 
> BugLink: https://bugs.launchpad.net/bugs/1943902
> 
> Split out a bunch of a self-contained helpers to make the function easier
> to follow.
> 
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> Acked-by: Jianxiong Gao <jxgao at google.com>
> Tested-by: Jianxiong Gao <jxgao at google.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
> (backported picked from 26a7e094783d482f3e125f09945a5bb1d867b2e6)
> [ kmously: different implementation swiotlb_tbl_map_single() required
>   manual removal. Also used __phys_to_dma() instead of
> phys_to_dma_unencrypted() in find_slots ]
> Signed-off-by: Khalid Elmously <khalid.elmously at canonical.com>
> ---
>   kernel/dma/swiotlb.c | 184 +++++++++++++++++++++----------------------
>   1 file changed, 91 insertions(+), 93 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index af22c3c5e488c..5a0d9d4864aca 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -453,137 +453,135 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
>   	}
>   }
>   
> -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
> -				   dma_addr_t tbl_dma_addr,
> -				   phys_addr_t orig_addr,
> -				   size_t mapping_size,
> -				   size_t alloc_size,
> -				   enum dma_data_direction dir,
> -				   unsigned long attrs)
> -{
> -	unsigned long flags;
> -	phys_addr_t tlb_addr;
> -	unsigned int nslots, stride, index, wrap;
> -	int i;
> -	unsigned long mask;
> -	unsigned long offset_slots;
> -	unsigned long max_slots;
> -	unsigned long tmp_io_tlb_used;
> -
> -	if (no_iotlb_memory)
> -		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> -
> -	if (mem_encrypt_active())
> -		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
>   
> -	if (mapping_size > alloc_size) {
> -		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> -			      mapping_size, alloc_size);
> -		return (phys_addr_t)DMA_MAPPING_ERROR;
> -	}
> -
> -	mask = dma_get_seg_boundary(hwdev);
> +/*
> + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
> + */
> +static inline unsigned long get_max_slots(unsigned long boundary_mask)
> +{
> +	if (boundary_mask == ~0UL)
> +		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	return nr_slots(boundary_mask + 1);
> +}
>   
> -	tbl_dma_addr &= mask;
> +static unsigned int wrap_index(unsigned int index)
> +{
> +	if (index >= io_tlb_nslabs)
> +		return 0;
> +	return index;
> +}
>   
> -	offset_slots = nr_slots(tbl_dma_addr);
> +/*
> + * Find a suitable number of IO TLB entries size that will fit this request and
> + * allocate a buffer from that IO TLB pool.
> + */
> +static int find_slots(struct device *dev, size_t alloc_size)
> +{
> +	unsigned long boundary_mask = dma_get_seg_boundary(dev);
> +	dma_addr_t tbl_dma_addr =
> +		__phys_to_dma(dev, io_tlb_start) & boundary_mask;
> +	unsigned long max_slots = get_max_slots(boundary_mask);
> +	unsigned int nslots = nr_slots(alloc_size), stride = 1;
> +	unsigned int index, wrap, count = 0, i;
> +	unsigned long flags;
>   
> -	/*
> -	 * Carefully handle integer overflow which can occur when mask == ~0UL.
> -	 */
> -	max_slots = mask + 1
> -		    ? nr_slots(mask + 1)
> -		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	BUG_ON(!nslots);
>   
>   	/*
>   	 * For mappings greater than or equal to a page, we limit the stride
>   	 * (and hence alignment) to a page size.
>   	 */
> -	nslots = nr_slots(alloc_size);
>   	if (alloc_size >= PAGE_SIZE)
> -		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
> -	else
> -		stride = 1;
> -
> -	BUG_ON(!nslots);
> +		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
>   
> -	/*
> -	 * Find suitable number of IO TLB entries size that will fit this
> -	 * request and allocate a buffer from that IO TLB pool.
> -	 */
>   	spin_lock_irqsave(&io_tlb_lock, flags);
> -
>   	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
>   		goto not_found;
>   
> -	index = ALIGN(io_tlb_index, stride);
> -	if (index >= io_tlb_nslabs)
> -		index = 0;
> -	wrap = index;
> -
> +	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
>   	do {
> -		while (iommu_is_span_boundary(index, nslots, offset_slots,
> -					      max_slots)) {
> -			index += stride;
> -			if (index >= io_tlb_nslabs)
> -				index = 0;
> -			if (index == wrap)
> -				goto not_found;
> -		}
> -
>   		/*
>   		 * If we find a slot that indicates we have 'nslots' number of
>   		 * contiguous buffers, we allocate the buffers from that slot
>   		 * and mark the entries as '0' indicating unavailable.
>   		 */
> -		if (io_tlb_list[index] >= nslots) {
> -			int count = 0;
> -
> -			for (i = index; i < (int) (index + nslots); i++)
> -				io_tlb_list[i] = 0;
> -			for (i = index - 1;
> -			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> -			     io_tlb_list[i]; i--)
> -				io_tlb_list[i] = ++count;
> -			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
> -
> -			/*
> -			 * Update the indices to avoid searching in the next
> -			 * round.
> -			 */
> -			io_tlb_index = ((index + nslots) < io_tlb_nslabs
> -					? (index + nslots) : 0);
> -
> -			goto found;
> +		if (!iommu_is_span_boundary(index, nslots,
> +					    nr_slots(tbl_dma_addr),
> +					    max_slots)) {
> +			if (io_tlb_list[index] >= nslots)
> +				goto found;
>   		}
> -		index += stride;
> -		if (index >= io_tlb_nslabs)
> -			index = 0;
> +		index = wrap_index(index + stride);
>   	} while (index != wrap);
>   
>   not_found:
> -	tmp_io_tlb_used = io_tlb_used;
> -
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> -	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
> -		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> -			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
> -	return (phys_addr_t)DMA_MAPPING_ERROR;
> +	return -1;
> +
>   found:
> +	for (i = index; i < index + nslots; i++)
> +		io_tlb_list[i] = 0;
> +	for (i = index - 1;
> +	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> +	     io_tlb_list[i]; i--)
> +		io_tlb_list[i] = ++count;
> +
> +	/*
> +	 * Update the indices to avoid searching in the next round.
> +	 */
> +	if (index + nslots < io_tlb_nslabs)
> +		io_tlb_index = index + nslots;
> +	else
> +		io_tlb_index = 0;
>   	io_tlb_used += nslots;
> +
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> +	return index;
> +}
> +
> +phys_addr_t swiotlb_tbl_map_single(struct device *dev, dma_addr_t dma_addr,

With this refactoring, the second parameter of this function (named tbl_dma_addr
in the original function and dma_addr here) is not used anymore, the address is now
being calculated in find_slots(). This means that all callers of this function is
passing this parameter which is not used anymore.

Should we cherry-pick/backport fc0021aa340af65a0a37d77be39e22aa886a6132 ("swiotlb:
remove the tbl_dma_addr argument to swiotlb_tbl_map_single") to make this backport
cleaner and more consistent?

> +				phys_addr_t orig_addr, size_t mapping_size,
> +				size_t alloc_size,
> +				enum dma_data_direction dir,
> +				unsigned long attrs)
> +{
> +	unsigned int index, i;
> +	phys_addr_t tlb_addr;
> +
> +	if (no_iotlb_memory)
> +		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> +
> +	if (mem_encrypt_active())
> +		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +
> +	if (mapping_size > alloc_size) {
> +		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> +			      mapping_size, alloc_size);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
> +
> +	index = find_slots(dev, alloc_size);
> +	if (index == -1) {

index is defined as unsigned int, so this check is broken.

This has been fixed by 95b079d8215b83b37fa59341fda92fcb9392f14a ("swiotlb: Fix the type of index").
I think we should include this fixup to this patchset.

> +		if (!(attrs & DMA_ATTR_NO_WARN))
> +			dev_warn_ratelimited(dev,
> +	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> +				 alloc_size, io_tlb_nslabs, io_tlb_used);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
>   
>   	/*
>   	 * Save away the mapping from the original address to the DMA address.
>   	 * This is needed when we sync the memory.  Then we sync the buffer if
>   	 * needed.
>   	 */
> -	for (i = 0; i < nslots; i++)
> -		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
> +	for (i = 0; i < nr_slots(alloc_size); i++)
> +		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
> +
> +	tlb_addr = slot_addr(io_tlb_start, index);
>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>   	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
>   		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
> -
>   	return tlb_addr;
>   }
>   
> 




More information about the kernel-team mailing list