netdev - Re: [PATCH v11 6/6] iommu/dma: Use a large flush queue and timeout for shadow_on

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7b01f4ae-05d9-e7b8-7a44-31c9d8adae80@arm.com>
Date: Fri, 18 Aug 2023 20:06:46 +0100
From: Robin Murphy <robin.murphy@....com>
To: Niklas Schnelle <schnelle@...ux.ibm.com>, Joerg Roedel <joro@...tes.org>,
 Matthew Rosato <mjrosato@...ux.ibm.com>, Will Deacon <will@...nel.org>,
 Wenjia Zhang <wenjia@...ux.ibm.com>, Jason Gunthorpe <jgg@...pe.ca>
Cc: Gerd Bayer <gbayer@...ux.ibm.com>, Julian Ruess <julianr@...ux.ibm.com>,
 Pierre Morel <pmorel@...ux.ibm.com>, Alexandra Winter
 <wintera@...ux.ibm.com>, Heiko Carstens <hca@...ux.ibm.com>,
 Vasily Gorbik <gor@...ux.ibm.com>, Alexander Gordeev
 <agordeev@...ux.ibm.com>, Christian Borntraeger <borntraeger@...ux.ibm.com>,
 Sven Schnelle <svens@...ux.ibm.com>,
 Suravee Suthikulpanit <suravee.suthikulpanit@....com>,
 Hector Martin <marcan@...can.st>, Sven Peter <sven@...npeter.dev>,
 Alyssa Rosenzweig <alyssa@...enzweig.io>,
 David Woodhouse <dwmw2@...radead.org>, Lu Baolu <baolu.lu@...ux.intel.com>,
 Andy Gross <agross@...nel.org>, Bjorn Andersson <andersson@...nel.org>,
 Konrad Dybcio <konrad.dybcio@...aro.org>, Yong Wu <yong.wu@...iatek.com>,
 Matthias Brugger <matthias.bgg@...il.com>,
 AngeloGioacchino Del Regno <angelogioacchino.delregno@...labora.com>,
 Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
 Orson Zhai <orsonzhai@...il.com>, Baolin Wang
 <baolin.wang@...ux.alibaba.com>, Chunyan Zhang <zhang.lyra@...il.com>,
 Chen-Yu Tsai <wens@...e.org>, Jernej Skrabec <jernej.skrabec@...il.com>,
 Samuel Holland <samuel@...lland.org>,
 Thierry Reding <thierry.reding@...il.com>, Krishna Reddy
 <vdumpa@...dia.com>, Jonathan Hunter <jonathanh@...dia.com>,
 Jonathan Corbet <corbet@....net>, linux-s390@...r.kernel.org,
 netdev@...r.kernel.org, linux-kernel@...r.kernel.org, iommu@...ts.linux.dev,
 asahi@...ts.linux.dev, linux-arm-kernel@...ts.infradead.org,
 linux-arm-msm@...r.kernel.org, linux-mediatek@...ts.infradead.org,
 linux-sunxi@...ts.linux.dev, linux-tegra@...r.kernel.org,
 linux-doc@...r.kernel.org
Subject: Re: [PATCH v11 6/6] iommu/dma: Use a large flush queue and timeout
 for shadow_on_flush

On 2023-07-17 12:00, Niklas Schnelle wrote:
> Flush queues currently use a fixed compile time size of 256 entries.
> This being a power of 2 allows the compiler to use shift and mask
> instead of more expensive modulo operations. With per-CPU flush queues
> larger queue sizes would hit per-CPU allocation limits, with a single
> flush queue these limits do not apply however. Also with single queues
> being particularly suitable for virtualized environments with expensive
> IOTLB flushes these benefit especially from larger queues and thus fewer
> flushes.
> 
> To this end re-order struct iova_fq so we can use a dynamic array and
> introduce the flush queue size and timeouts as new options in the
> dma_iommu_options struct. So as not to lose the shift and mask
> optimization, use a power of 2 for the length and use explicit shift and
> mask instead of letting the compiler optimize this.
> 
> A large queue size and 1 second timeout is then set for the shadow on
> flush case set by s390 paged memory guests. This then brings performance
> on par with the previous s390 specific DMA API implementation.
> 
> Reviewed-by: Matthew Rosato <mjrosato@...ux.ibm.com> #s390
> Signed-off-by: Niklas Schnelle <schnelle@...ux.ibm.com>
> ---
>   drivers/iommu/dma-iommu.c | 50 +++++++++++++++++++++++++++++++----------------
>   1 file changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 4ada8b9749c9..bebc0804ff53 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -46,7 +46,9 @@ enum iommu_dma_cookie_type {
>   struct dma_iommu_options {
>   #define IOMMU_DMA_OPTS_PER_CPU_QUEUE	0L
>   #define IOMMU_DMA_OPTS_SINGLE_QUEUE	BIT(0)
> -	u64	flags;
> +	u64		flags;
> +	size_t		fq_size;
> +	unsigned int	fq_timeout;
>   };
>   
>   struct iommu_dma_cookie {
> @@ -95,10 +97,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
>   early_param("iommu.forcedac", iommu_dma_forcedac_setup);
>   
>   /* Number of entries per flush queue */
> -#define IOVA_FQ_SIZE	256
> +#define IOVA_DEFAULT_FQ_SIZE	256
> +#define IOVA_SINGLE_FQ_SIZE	32768
>   
>   /* Timeout (in ms) after which entries are flushed from the queue */
> -#define IOVA_FQ_TIMEOUT	10
> +#define IOVA_DEFAULT_FQ_TIMEOUT	10
> +#define IOVA_SINGLE_FQ_TIMEOUT	1000
>   
>   /* Flush queue entry for deferred flushing */
>   struct iova_fq_entry {
> @@ -110,18 +114,19 @@ struct iova_fq_entry {
>   
>   /* Per-CPU flush queue structure */
>   struct iova_fq {
> -	struct iova_fq_entry entries[IOVA_FQ_SIZE];
> -	unsigned int head, tail;
>   	spinlock_t lock;
> +	unsigned int head, tail;
> +	unsigned int mod_mask;
> +	struct iova_fq_entry entries[];
>   };
>   
>   #define fq_ring_for_each(i, fq) \
> -	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
> +	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
>   
>   static inline bool fq_full(struct iova_fq *fq)
>   {
>   	assert_spin_locked(&fq->lock);
> -	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
> +	return (((fq->tail + 1) & fq->mod_mask) == fq->head);
>   }
>   
>   static inline unsigned int fq_ring_add(struct iova_fq *fq)
> @@ -130,7 +135,7 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
>   
>   	assert_spin_locked(&fq->lock);
>   
> -	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
> +	fq->tail = (idx + 1) & fq->mod_mask;
>   
>   	return idx;
>   }
> @@ -152,7 +157,7 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq
>   			       fq->entries[idx].iova_pfn,
>   			       fq->entries[idx].pages);
>   
> -		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
> +		fq->head = (fq->head + 1) & fq->mod_mask;
>   	}
>   }
>   
> @@ -246,7 +251,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
>   	if (!atomic_read(&cookie->fq_timer_on) &&
>   	    !atomic_xchg(&cookie->fq_timer_on, 1))
>   		mod_timer(&cookie->fq_timer,
> -			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
> +			  jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
>   }
>   
>   static void iommu_dma_free_fq_single(struct iova_fq *fq)
> @@ -287,27 +292,29 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
>   		iommu_dma_free_fq_percpu(cookie->percpu_fq);
>   }
>   
> -static void iommu_dma_init_one_fq(struct iova_fq *fq)
> +static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
>   {
>   	int i;
>   
>   	fq->head = 0;
>   	fq->tail = 0;
> +	fq->mod_mask = fq_size - 1;
>   
>   	spin_lock_init(&fq->lock);
>   
> -	for (i = 0; i < IOVA_FQ_SIZE; i++)
> +	for (i = 0; i < fq_size; i++)
>   		INIT_LIST_HEAD(&fq->entries[i].freelist);
>   }
>   
>   static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
>   {
> +	size_t fq_size = cookie->options.fq_size;
>   	struct iova_fq *queue;
>   
> -	queue = vzalloc(sizeof(*queue));
> +	queue = vzalloc(struct_size(queue, entries, fq_size));
>   	if (!queue)
>   		return -ENOMEM;
> -	iommu_dma_init_one_fq(queue);
> +	iommu_dma_init_one_fq(queue, fq_size);
>   	cookie->single_fq = queue;
>   
>   	return 0;
> @@ -315,15 +322,17 @@ static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
>   
>   static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
>   {
> +	size_t fq_size = cookie->options.fq_size;
>   	struct iova_fq __percpu *queue;
>   	int cpu;
>   
> -	queue = alloc_percpu(struct iova_fq);
> +	queue = __alloc_percpu(struct_size(queue, entries, fq_size),
> +			       __alignof__(*queue));
>   	if (!queue)
>   		return -ENOMEM;
>   
>   	for_each_possible_cpu(cpu)
> -		iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu));
> +		iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
>   	cookie->percpu_fq = queue;
>   	return 0;
>   }
> @@ -377,6 +386,8 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
>   		INIT_LIST_HEAD(&cookie->msi_page_list);
>   		cookie->type = type;
>   		cookie->options.flags = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
> +		cookie->options.fq_size = IOVA_DEFAULT_FQ_SIZE;
> +		cookie->options.fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;

Similar comment as on the previous patch - it would be clearer and more 
robust to set fq_size and fq_timeout all in one place in the main path 
of iommu_dma_init_domain(). Otherwise,

Acked-by: Robin Murphy <robin.murphy@....com>

>   	}
>   	return cookie;
>   }
> @@ -696,14 +707,19 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   
>   	if (domain->type == IOMMU_DOMAIN_DMA_FQ) {
>   		/* Expensive shadowing IOTLB flushes require some tuning */
> -		if (dev->iommu->shadow_on_flush)
> +		if (dev->iommu->shadow_on_flush) {
>   			cookie->options.flags |= IOMMU_DMA_OPTS_SINGLE_QUEUE;
> +			cookie->options.fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
> +			cookie->options.fq_size = IOVA_SINGLE_FQ_SIZE;
> +		}
>   
>   		/* If the FQ fails we can simply fall back to strict mode */
>   		if (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) ||
>   		    iommu_dma_init_fq(domain)) {
>   			domain->type = IOMMU_DOMAIN_DMA;
>   			cookie->options.flags &= ~IOMMU_DMA_OPTS_SINGLE_QUEUE;
> +			cookie->options.fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
> +			cookie->options.fq_size = IOVA_DEFAULT_FQ_SIZE;
>   		}
>   	}
>   
>