netdev - Re: [net 02/14] Revert "mlx5: move affinity hints assignments to generic code"

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <967520a0-ad2d-ddbd-a4dc-a674ec7c9013@fb.com>
Date:   Tue, 19 Dec 2017 17:42:41 -0500
From:   Jes Sorensen <jsorensen@...com>
To:     Saeed Mahameed <saeedm@...lanox.com>,
        "David S. Miller" <davem@...emloft.net>
CC:     <netdev@...r.kernel.org>, Sagi Grimberg <sagi@...mberg.me>,
        Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [net 02/14] Revert "mlx5: move affinity hints assignments to
 generic code"

On 12/19/2017 05:24 PM, Saeed Mahameed wrote:
> Before the offending commit, mlx5 core did the IRQ affinity itself,
> and it seems that the new generic code have some drawbacks and one
> of them is the lack for user ability to modify irq affinity after
> the initial affinity values got assigned.
> 
> The issue is still being discussed and a solution in the new generic code
> is required, until then we need to revert this patch.
> 
> This fixes the following issue:
> echo <new affinity> > /proc/irq/<x>/smp_affinity
> fails with  -EIO
> 
> This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664.
> Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since
> it is used in mlx5_ib driver.
> 
> Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code")
> Cc: Sagi Grimberg <sagi@...mberg.me>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Jes Sorensen <jsorensen@...com>
> Reported-by: Jes Sorensen <jsorensen@...com>
> Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>

Acked-by: Jes Sorensen <jsorensen@...com>

Cheers,
Jes


> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en.h      |  1 +
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++-------
>  drivers/net/ethernet/mellanox/mlx5/core/main.c    | 75 +++++++++++++++++++++--
>  include/linux/mlx5/driver.h                       |  1 +
>  4 files changed, 93 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index c0872b3284cb..43f9054830e5 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -590,6 +590,7 @@ struct mlx5e_channel {
>  	struct mlx5_core_dev      *mdev;
>  	struct hwtstamp_config    *tstamp;
>  	int                        ix;
> +	int                        cpu;
>  };
>  
>  struct mlx5e_channels {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index d2b057a3e512..cbec66bc82f1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -71,11 +71,6 @@ struct mlx5e_channel_param {
>  	struct mlx5e_cq_param      icosq_cq;
>  };
>  
> -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
> -{
> -	return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
> -}
> -
>  static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
>  {
>  	return MLX5_CAP_GEN(mdev, striding_rq) &&
> @@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
>  	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
>  	int mtt_sz = mlx5e_get_wqe_mtt_sz();
>  	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
> -	int node = mlx5e_get_node(c->priv, c->ix);
>  	int i;
>  
>  	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
> -					GFP_KERNEL, node);
> +				      GFP_KERNEL, cpu_to_node(c->cpu));
>  	if (!rq->mpwqe.info)
>  		goto err_out;
>  
>  	/* We allocate more than mtt_sz as we will align the pointer */
> -	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
> -					GFP_KERNEL, node);
> +	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
> +					cpu_to_node(c->cpu));
>  	if (unlikely(!rq->mpwqe.mtt_no_align))
>  		goto err_free_wqe_info;
>  
> @@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>  	int err;
>  	int i;
>  
> -	rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
>  
>  	err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
>  				&rq->wq_ctrl);
> @@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>  	default: /* MLX5_WQ_TYPE_LINKED_LIST */
>  		rq->wqe.frag_info =
>  			kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
> -				     GFP_KERNEL,
> -				     mlx5e_get_node(c->priv, c->ix));
> +				     GFP_KERNEL, cpu_to_node(c->cpu));
>  		if (!rq->wqe.frag_info) {
>  			err = -ENOMEM;
>  			goto err_rq_wq_destroy;
> @@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
>  	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
>  	sq->min_inline_mode = params->tx_min_inline_mode;
>  
> -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +	param->wq.db_numa_node = cpu_to_node(c->cpu);
>  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
>  	if (err)
>  		return err;
>  	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>  
> -	err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +	err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
>  	if (err)
>  		goto err_sq_wq_destroy;
>  
> @@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
>  	sq->channel   = c;
>  	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
>  
> -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +	param->wq.db_numa_node = cpu_to_node(c->cpu);
>  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
>  	if (err)
>  		return err;
>  	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>  
> -	err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +	err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
>  	if (err)
>  		goto err_sq_wq_destroy;
>  
> @@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
>  	if (MLX5_IPSEC_DEV(c->priv->mdev))
>  		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
>  
> -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +	param->wq.db_numa_node = cpu_to_node(c->cpu);
>  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
>  	if (err)
>  		return err;
>  	sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
>  
> -	err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +	err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
>  	if (err)
>  		goto err_sq_wq_destroy;
>  
> @@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
>  	struct mlx5_core_dev *mdev = c->priv->mdev;
>  	int err;
>  
> -	param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
> -	param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
> +	param->wq.buf_numa_node = cpu_to_node(c->cpu);
> +	param->wq.db_numa_node  = cpu_to_node(c->cpu);
>  	param->eq_ix   = c->ix;
>  
>  	err = mlx5e_alloc_cq_common(mdev, param, cq);
> @@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
>  	mlx5e_free_cq(cq);
>  }
>  
> +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
> +{
> +	return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
> +}
> +
>  static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
>  			     struct mlx5e_params *params,
>  			     struct mlx5e_channel_param *cparam)
> @@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
>  {
>  	struct mlx5e_cq_moder icocq_moder = {0, 0};
>  	struct net_device *netdev = priv->netdev;
> +	int cpu = mlx5e_get_cpu(priv, ix);
>  	struct mlx5e_channel *c;
>  	unsigned int irq;
>  	int err;
>  	int eqn;
>  
> -	c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
> +	c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
>  	if (!c)
>  		return -ENOMEM;
>  
> @@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
>  	c->mdev     = priv->mdev;
>  	c->tstamp   = &priv->tstamp;
>  	c->ix       = ix;
> +	c->cpu      = cpu;
>  	c->pdev     = &priv->mdev->pdev->dev;
>  	c->netdev   = priv->netdev;
>  	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
> @@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
>  	for (tc = 0; tc < c->num_tc; tc++)
>  		mlx5e_activate_txqsq(&c->sq[tc]);
>  	mlx5e_activate_rq(&c->rq);
> -	netif_set_xps_queue(c->netdev,
> -		mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
> +	netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
>  }
>  
>  static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 5f323442cc5a..8a89c7e8cd63 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
>  {
>  	struct mlx5_priv *priv = &dev->priv;
>  	struct mlx5_eq_table *table = &priv->eq_table;
> -	struct irq_affinity irqdesc = {
> -		.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
> -	};
>  	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
>  	int nvec;
>  
> @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
>  	if (!priv->irq_info)
>  		goto err_free_msix;
>  
> -	nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
> +	nvec = pci_alloc_irq_vectors(dev->pdev,
>  			MLX5_EQ_VEC_COMP_BASE + 1, nvec,
> -			PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
> -			&irqdesc);
> +			PCI_IRQ_MSIX);
>  	if (nvec < 0)
>  		return nvec;
>  
> @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
>  	return (u64)timer_l | (u64)timer_h1 << 32;
>  }
>  
> +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> +	struct mlx5_priv *priv  = &mdev->priv;
> +	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> +	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
> +		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
> +		return -ENOMEM;
> +	}
> +
> +	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
> +			priv->irq_info[i].mask);
> +
> +	if (IS_ENABLED(CONFIG_SMP) &&
> +	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
> +		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
> +
> +	return 0;
> +}
> +
> +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> +	struct mlx5_priv *priv  = &mdev->priv;
> +	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> +	irq_set_affinity_hint(irq, NULL);
> +	free_cpumask_var(priv->irq_info[i].mask);
> +}
> +
> +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> +	int err;
> +	int i;
> +
> +	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
> +		err = mlx5_irq_set_affinity_hint(mdev, i);
> +		if (err)
> +			goto err_out;
> +	}
> +
> +	return 0;
> +
> +err_out:
> +	for (i--; i >= 0; i--)
> +		mlx5_irq_clear_affinity_hint(mdev, i);
> +
> +	return err;
> +}
> +
> +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> +	int i;
> +
> +	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
> +		mlx5_irq_clear_affinity_hint(mdev, i);
> +}
> +
>  int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
>  		    unsigned int *irqn)
>  {
> @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
>  		goto err_stop_eqs;
>  	}
>  
> +	err = mlx5_irq_set_affinity_hints(dev);
> +	if (err) {
> +		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
> +		goto err_affinity_hints;
> +	}
> +
>  	err = mlx5_init_fs(dev);
>  	if (err) {
>  		dev_err(&pdev->dev, "Failed to init flow steering\n");
> @@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
>  	mlx5_cleanup_fs(dev);
>  
>  err_fs:
> +	mlx5_irq_clear_affinity_hints(dev);
> +
> +err_affinity_hints:
>  	free_comp_eqs(dev);
>  
>  err_stop_eqs:
> @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
>  
>  	mlx5_sriov_detach(dev);
>  	mlx5_cleanup_fs(dev);
> +	mlx5_irq_clear_affinity_hints(dev);
>  	free_comp_eqs(dev);
>  	mlx5_stop_eqs(dev);
>  	mlx5_put_uars_page(dev, priv->uar);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index a886b51511ab..40a6f33c4cde 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -556,6 +556,7 @@ struct mlx5_core_sriov {
>  };
>  
>  struct mlx5_irq_info {
> +	cpumask_var_t mask;
>  	char name[MLX5_MAX_IRQ_NAME];
>  };
>  
>