[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c0e70b17-cdf1-4fd8-f807-e4b9ccad44fd@gmail.com>
Date: Fri, 29 Apr 2022 22:25:53 +0800
From: Tianyu Lan <ltykernel@...il.com>
To: hch@...radead.org, robin.murphy@....com
Cc: Tianyu Lan <Tianyu.Lan@...rosoft.com>,
iommu@...ts.linux-foundation.org, linux-kernel@...r.kernel.org,
vkuznets@...hat.com, brijesh.singh@....com, konrad.wilk@...cle.com,
hch@....de, wei.liu@...nel.org, parri.andrea@...il.com,
thomas.lendacky@....com, linux-hyperv@...r.kernel.org,
andi.kleen@...el.com, kirill.shutemov@...el.com,
m.szyprowski@...sung.com, michael.h.kelley@...rosoft.com,
kys@...rosoft.com
Subject: Re: [RFC PATCH] swiotlb: Add Child IO TLB mem support
On 4/29/2022 10:21 PM, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@...rosoft.com>
>
> Traditionally swiotlb was not performance critical because it was only
> used for slow devices. But in some setups, like TDX/SEV confidential
> guests, all IO has to go through swiotlb. Currently swiotlb only has a
> single lock. Under high IO load with multiple CPUs this can lead to
> significant lock contention on the swiotlb lock.
>
> This patch adds child IO TLB mem support to resolve spinlock overhead
> among device's queues. Each device may allocate IO tlb mem and setup
> child IO TLB mem according to queue number. Swiotlb code allocates
> bounce buffer among child IO tlb mem iterately.
>
Hi Robin and Christoph:
According to Robin idea. I draft this patch. Please have a look
and check whether it's right diection.
Thanks.
> Signed-off-by: Tianyu Lan <Tianyu.Lan@...rosoft.com>
> ---
> include/linux/swiotlb.h | 7 +++
> kernel/dma/swiotlb.c | 96 ++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 93 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 7ed35dd3de6e..4a3f6a7b4b7e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -89,6 +89,9 @@ extern enum swiotlb_force swiotlb_force;
> * @late_alloc: %true if allocated using the page allocator
> * @force_bounce: %true if swiotlb bouncing is forced
> * @for_alloc: %true if the pool is used for memory allocation
> + * @child_nslot:The number of IO TLB slot in the child IO TLB mem.
> + * @num_child: The child io tlb mem number in the pool.
> + * @child_start:The child index to start searching in the next round.
> */
> struct io_tlb_mem {
> phys_addr_t start;
> @@ -102,6 +105,10 @@ struct io_tlb_mem {
> bool late_alloc;
> bool force_bounce;
> bool for_alloc;
> + unsigned int num_child;
> + unsigned int child_nslot;
> + unsigned int child_start;
> + struct io_tlb_mem *child;
> struct io_tlb_slot {
> phys_addr_t orig_addr;
> size_t alloc_size;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index e2ef0864eb1e..382fa2288645 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -207,6 +207,25 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
> mem->force_bounce = true;
>
> spin_lock_init(&mem->lock);
> +
> + if (mem->num_child) {
> + mem->child_nslot = nslabs / mem->num_child;
> + mem->child_start = 0;
> +
> + /*
> + * Initialize child IO TLB mem, divide IO TLB pool
> + * into child number. Reuse parent mem->slot in the
> + * child mem->slot.
> + */
> + for (i = 0; i < mem->num_child; i++) {
> + mem->num_child = 0;
> + mem->child[i].slots = mem->slots + i * mem->child_nslot;
> + swiotlb_init_io_tlb_mem(&mem->child[i],
> + start + ((i * mem->child_nslot) << IO_TLB_SHIFT),
> + mem->child_nslot, late_alloc);
> + }
> + }
> +
> for (i = 0; i < mem->nslabs; i++) {
> mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> @@ -336,16 +355,18 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
>
> mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> get_order(array_size(sizeof(*mem->slots), nslabs)));
> - if (!mem->slots) {
> - free_pages((unsigned long)vstart, order);
> - return -ENOMEM;
> - }
> + if (!mem->slots)
> + goto error_slots;
>
> set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT);
> swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
>
> swiotlb_print_info();
> return 0;
> +
> +error_slots:
> + free_pages((unsigned long)vstart, order);
> + return -ENOMEM;
> }
>
> void __init swiotlb_exit(void)
> @@ -483,10 +504,11 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
> * Find a suitable number of IO TLB entries size that will fit this request and
> * allocate a buffer from that IO TLB pool.
> */
> -static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> - size_t alloc_size, unsigned int alloc_align_mask)
> +static int swiotlb_do_find_slots(struct io_tlb_mem *mem,
> + struct device *dev, phys_addr_t orig_addr,
> + size_t alloc_size,
> + unsigned int alloc_align_mask)
> {
> - struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> unsigned long boundary_mask = dma_get_seg_boundary(dev);
> dma_addr_t tbl_dma_addr =
> phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
> @@ -565,6 +587,46 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> return index;
> }
>
> +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> + size_t alloc_size, unsigned int alloc_align_mask)
> +{
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> + struct io_tlb_mem *child_mem = mem;
> + int start = 0, i = 0, index;
> +
> + if (mem->num_child) {
> + i = start = mem->child_start;
> + mem->child_start = (mem->child_start + 1) % mem->num_child;
> + child_mem = mem->child;
> + }
> +
> + do {
> + index = swiotlb_do_find_slots(child_mem + i, dev, orig_addr,
> + alloc_size, alloc_align_mask);
> + if (index >= 0)
> + return i * mem->child_nslot + index;
> + if (++i >= mem->num_child)
> + i = 0;
> + } while (i != start);
> +
> + return -1;
> +}
> +
> +static unsigned long mem_used(struct io_tlb_mem *mem)
> +{
> + int i;
> + unsigned long used = 0;
> +
> + if (mem->num_child) {
> + for (i = 0; i < mem->num_child; i++)
> + used += mem->child[i].used;
> + } else {
> + used = mem->used;
> + }
> +
> + return used;
> +}
> +
> phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> size_t mapping_size, size_t alloc_size,
> unsigned int alloc_align_mask, enum dma_data_direction dir,
> @@ -594,7 +656,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> if (!(attrs & DMA_ATTR_NO_WARN))
> dev_warn_ratelimited(dev,
> "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> - alloc_size, mem->nslabs, mem->used);
> + alloc_size, mem->nslabs, mem_used(mem));
> return (phys_addr_t)DMA_MAPPING_ERROR;
> }
>
> @@ -617,9 +679,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> return tlb_addr;
> }
>
> -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> +static void swiotlb_do_release_slots(struct io_tlb_mem *mem,
> + struct device *dev, phys_addr_t tlb_addr)
> {
> - struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> unsigned long flags;
> unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
> int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> @@ -660,6 +722,20 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> spin_unlock_irqrestore(&mem->lock, flags);
> }
>
> +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> +{
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> + int index, offset;
> +
> + if (mem->num_child) {
> + offset = swiotlb_align_offset(dev, tlb_addr);
> + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> + mem = &mem->child[index / mem->child_nslot];
> + }
> +
> + swiotlb_do_release_slots(mem, dev, tlb_addr);
> +}
> +
> /*
> * tlb_addr is the physical address of the bounce buffer to unmap.
> */
Powered by blists - more mailing lists