[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YtVlDiLTPxm312u+@worktop.programming.kicks-ass.net>
Date: Mon, 18 Jul 2022 15:50:06 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Tariq Toukan <tariqt@...dia.com>
Cc: "David S. Miller" <davem@...emloft.net>,
Saeed Mahameed <saeedm@...dia.com>,
Jakub Kicinski <kuba@...nel.org>,
Ingo Molnar <mingo@...hat.com>,
Juri Lelli <juri.lelli@...hat.com>,
Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
Gal Pressman <gal@...dia.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next V2 2/2] net/mlx5e: Improve remote NUMA
preferences used for the IRQ affinity hints
On Mon, Jul 18, 2022 at 03:43:15PM +0300, Tariq Toukan wrote:
> Reviewed-by: Gal Pressman <gal@...dia.com>
> Acked-by: Saeed Mahameed <saeedm@...dia.com>
> Signed-off-by: Tariq Toukan <tariqt@...dia.com>
> ---
> drivers/net/ethernet/mellanox/mlx5/core/eq.c | 62 +++++++++++++++++++-
> 1 file changed, 59 insertions(+), 3 deletions(-)
>
> v2:
> Separated the set_cpu operation into two functions, per Saeed's suggestion.
> Added Saeed's Acked-by signature.
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> index 229728c80233..e72bdaaad84f 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
> @@ -11,6 +11,9 @@
> #ifdef CONFIG_RFS_ACCEL
> #include <linux/cpu_rmap.h>
> #endif
> +#ifdef CONFIG_NUMA
> +#include <linux/sched/topology.h>
> +#endif
> #include "mlx5_core.h"
> #include "lib/eq.h"
> #include "fpga/core.h"
> @@ -806,13 +809,67 @@ static void comp_irqs_release(struct mlx5_core_dev *dev)
> kfree(table->comp_irqs);
> }
>
> +static void set_cpus_by_local_spread(struct mlx5_core_dev *dev, u16 *cpus,
> + int ncomp_eqs)
> +{
> + int i;
> +
> + for (i = 0; i < ncomp_eqs; i++)
> + cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
> +}
> +
> +static bool set_cpus_by_numa_distance(struct mlx5_core_dev *dev, u16 *cpus,
> + int ncomp_eqs)
> +{
> +#ifdef CONFIG_NUMA
> + cpumask_var_t cpumask;
> + int first;
> + int i;
> +
> + if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) {
> + mlx5_core_err(dev, "zalloc_cpumask_var failed\n");
> + return false;
> + }
> + cpumask_copy(cpumask, cpu_online_mask);
> +
> + first = cpumask_local_spread(0, dev->priv.numa_node);
Arguably you want something like:
first = cpumask_any(cpumask_of_node(dev->priv.numa_node));
> +
> + for (i = 0; i < ncomp_eqs; i++) {
> + int cpu;
> +
> + cpu = sched_numa_find_closest(cpumask, first);
> + if (cpu >= nr_cpu_ids) {
> + mlx5_core_err(dev, "sched_numa_find_closest failed, cpu(%d) >= nr_cpu_ids(%d)\n",
> + cpu, nr_cpu_ids);
> +
> + free_cpumask_var(cpumask);
> + return false;
So this will fail when ncomp_eqs > cpumask_weight(online_cpus); is that
desired?
> + }
> + cpus[i] = cpu;
> + cpumask_clear_cpu(cpu, cpumask);
Since there is no concurrency on this cpumask, you don't need atomic
ops:
__cpumask_clear_cpu(..);
> + }
> +
> + free_cpumask_var(cpumask);
> + return true;
> +#else
> + return false;
> +#endif
> +}
> +
> +static void mlx5_set_eqs_cpus(struct mlx5_core_dev *dev, u16 *cpus, int ncomp_eqs)
> +{
> + bool success = set_cpus_by_numa_distance(dev, cpus, ncomp_eqs);
> +
> + if (!success)
> + set_cpus_by_local_spread(dev, cpus, ncomp_eqs);
> +}
> +
> static int comp_irqs_request(struct mlx5_core_dev *dev)
> {
> struct mlx5_eq_table *table = dev->priv.eq_table;
> int ncomp_eqs = table->num_comp_eqs;
> u16 *cpus;
> int ret;
> - int i;
>
> ncomp_eqs = table->num_comp_eqs;
> table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
> @@ -830,8 +887,7 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
> ret = -ENOMEM;
> goto free_irqs;
> }
> - for (i = 0; i < ncomp_eqs; i++)
> - cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
> + mlx5_set_eqs_cpus(dev, cpus, ncomp_eqs);
So you change this for mlx5, what about the other users of
cpumask_local_spread() ?
Powered by blists - more mailing lists