lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a190ba95-79d1-e9e2-1f62-97aa94a4be7b@oracle.com>
Date:   Fri, 11 Aug 2023 15:14:16 +0100
From:   John Garry <john.g.garry@...cle.com>
To:     Zhang Zekun <zhangzekun11@...wei.com>, robin.murphy@....com,
        joro@...tes.org, will@...nel.org
Cc:     iommu@...ts.linux.dev, linux-kernel@...r.kernel.org,
        baolu.lu@...ux.intel.com, robh@...nel.org, nicolinc@...dia.com,
        kevin.tian@...el.com
Subject: Re: [RESEND PATCH 2/2] iommu/iova: allocate iova_rcache->depot
 dynamicly

On 11/08/2023 14:02, Zhang Zekun wrote:
> In fio test with 4k,read,and allowed cpus to 0-255, we observe a performance
> decrease of IOPS. The normal IOPS 

What do you mean by normal IOPS? Describe this "normal" scenario.

? can reach up to 1980k, but we can only
> get about 1600k.
> 
> abormal IOPS:
> Jobs: 12 (f=12): [R(12)][99.3%][r=6220MiB/s][r=1592k IOPS][eta 00m:12s]
> Jobs: 12 (f=12): [R(12)][99.4%][r=6215MiB/s][r=1591k IOPS][eta 00m:11s]
> Jobs: 12 (f=12): [R(12)][99.4%][r=6335MiB/s][r=1622k IOPS][eta 00m:10s]
> Jobs: 12 (f=12): [R(12)][99.5%][r=6194MiB/s][r=1586k IOPS][eta 00m:09s]
> Jobs: 12 (f=12): [R(12)][99.6%][r=6173MiB/s][r=1580k IOPS][eta 00m:08s]
> Jobs: 12 (f=12): [R(12)][99.6%][r=5984MiB/s][r=1532k IOPS][eta 00m:07s]
> Jobs: 12 (f=12): [R(12)][99.7%][r=6374MiB/s][r=1632k IOPS][eta 00m:06s]
> Jobs: 12 (f=12): [R(12)][99.7%][r=6343MiB/s][r=1624k IOPS][eta 00m:05s]
> 
> normal IOPS:
> Jobs: 12 (f=12): [R(12)][99.3%][r=7736MiB/s][r=1980k IOPS][eta 00m:12s]
> Jobs: 12 (f=12): [R(12)][99.4%][r=7744MiB/s][r=1982k IOPS][eta 00m:11s]
> Jobs: 12 (f=12): [R(12)][99.4%][r=7737MiB/s][r=1981k IOPS][eta 00m:10s]
> Jobs: 12 (f=12): [R(12)][99.5%][r=7735MiB/s][r=1980k IOPS][eta 00m:09s]
> Jobs: 12 (f=12): [R(12)][99.6%][r=7741MiB/s][r=1982k IOPS][eta 00m:08s]
> Jobs: 12 (f=12): [R(12)][99.6%][r=7740MiB/s][r=1982k IOPS][eta 00m:07s]
> Jobs: 12 (f=12): [R(12)][99.7%][r=7736MiB/s][r=1981k IOPS][eta 00m:06s]
> Jobs: 12 (f=12): [R(12)][99.7%][r=7736MiB/s][r=1980k IOPS][eta 00m:05s]
> 
> The current struct of iova_rcache will have iova_cpu_rcache for every
> cpu, and these iova_cpu_rcaches use a common buffer iova_rcache->depot to
> exchange iovas among iova_cpu_rcaches. A machine with 256 cpus will have
> 256 iova_cpu_rcaches and 1 iova_rcache->depot per iova_domain. However,
> the max size of iova_rcache->depot is fixed to MAX_GLOBAL_MAGS which equals
> to 32, and can't grow with the number of cpus, and this can cause problem
> in some condition.
> 
> Some drivers will only free iovas in their irq call back function. For
> the driver in this case it has 16 thread irqs to free iova, but these
> irq call back function will only free iovas on 16 certain cpus(cpu{0,16,
> 32...,240}). Thread irq which smp affinity is 0-15, will only free iova on
> cpu 0. However, the driver will alloc iova on all cpus(cpu{0-255}), cpus
> without free iova in local cpu_rcache need to get free iovas from
> iova_rcache->depot. The current size of iova_rcache->depot max size is 32,
> and it seems to be too small for 256 users (16 cpus will put iovas to
> iova_rcache->depot and 240 cpus will try to get iova from it). Set
> iova_rcache->depot grow with the num of possible cpus, and the decrease
> of IOPS disappear.

Doesn't it take a long time for all the depots to fill for you? From the 
description, this sounds like the hisilicon SAS controller which you are 
experimenting with and I found there that it took a long time for the 
depots to fill for high throughput testing.

Thanks,
John

> 
> Signed-off-by: Zhang Zekun <zhangzekun11@...wei.com>
> ---
>   drivers/iommu/iova.c | 26 ++++++++++++++++++++++----
>   1 file changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index 3c784a28e9ed..df37a4501e98 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -238,6 +238,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
>   
>   static struct kmem_cache *iova_cache;
>   static unsigned int iova_cache_users;
> +static unsigned int max_global_mags;
>   static DEFINE_MUTEX(iova_cache_mutex);
>   
>   static struct iova *alloc_iova_mem(void)
> @@ -625,7 +626,6 @@ EXPORT_SYMBOL_GPL(reserve_iova);
>    * will be wasted.
>    */
>   #define IOVA_MAG_SIZE 127
> -#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
>   
>   struct iova_magazine {
>   	unsigned long size;
> @@ -641,7 +641,7 @@ struct iova_cpu_rcache {
>   struct iova_rcache {
>   	spinlock_t lock;
>   	unsigned long depot_size;
> -	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> +	struct iova_magazine **depot;
>   	struct iova_cpu_rcache __percpu *cpu_rcaches;
>   };
>   
> @@ -722,6 +722,13 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
>   	unsigned int cpu;
>   	int i, ret;
>   
> +	/*
> +	 * the size of max global mags should growth with the num of
> +	 * cpus
> +	 */
> +	if (!max_global_mags)
> +		max_global_mags = max_t(unsigned int, 32, num_possible_cpus());
> +
>   	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
>   				 sizeof(struct iova_rcache),
>   				 GFP_KERNEL);
> @@ -733,6 +740,12 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
>   		struct iova_rcache *rcache;
>   
>   		rcache = &iovad->rcaches[i];
> +		rcache->depot = kcalloc(max_global_mags, sizeof(struct iova_magazine *),
> +					GFP_KERNEL);
> +		if (!rcache->depot) {
> +			ret = -ENOMEM;
> +			goto out_err;
> +		}
>   		spin_lock_init(&rcache->lock);
>   		rcache->depot_size = 0;
>   		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
> @@ -798,7 +811,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
>   
>   		if (new_mag) {
>   			spin_lock(&rcache->lock);
> -			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
> +			if (rcache->depot_size < max_global_mags) {
>   				rcache->depot[rcache->depot_size++] =
>   						cpu_rcache->loaded;
>   			} else {
> @@ -903,8 +916,12 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
> -		if (!rcache->cpu_rcaches)
> +		if (!rcache->depot)
> +			break;
> +		if (!rcache->cpu_rcaches) {
> +			kfree(rcache->depot);
>   			break;
> +		}
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>   			if (!cpu_rcache->loaded)
> @@ -917,6 +934,7 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   		free_percpu(rcache->cpu_rcaches);
>   		for (j = 0; j < rcache->depot_size; ++j)
>   			iova_magazine_free(rcache->depot[j]);
> +		kfree(rcache->depot);
>   	}
>   
>   	kfree(iovad->rcaches);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ