[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <967520a0-ad2d-ddbd-a4dc-a674ec7c9013@fb.com>
Date: Tue, 19 Dec 2017 17:42:41 -0500
From: Jes Sorensen <jsorensen@...com>
To: Saeed Mahameed <saeedm@...lanox.com>,
"David S. Miller" <davem@...emloft.net>
CC: <netdev@...r.kernel.org>, Sagi Grimberg <sagi@...mberg.me>,
Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [net 02/14] Revert "mlx5: move affinity hints assignments to
generic code"
On 12/19/2017 05:24 PM, Saeed Mahameed wrote:
> Before the offending commit, mlx5 core did the IRQ affinity itself,
> and it seems that the new generic code have some drawbacks and one
> of them is the lack for user ability to modify irq affinity after
> the initial affinity values got assigned.
>
> The issue is still being discussed and a solution in the new generic code
> is required, until then we need to revert this patch.
>
> This fixes the following issue:
> echo <new affinity> > /proc/irq/<x>/smp_affinity
> fails with -EIO
>
> This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664.
> Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since
> it is used in mlx5_ib driver.
>
> Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code")
> Cc: Sagi Grimberg <sagi@...mberg.me>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Jes Sorensen <jsorensen@...com>
> Reported-by: Jes Sorensen <jsorensen@...com>
> Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
Acked-by: Jes Sorensen <jsorensen@...com>
Cheers,
Jes
> ---
> drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 +
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++-------
> drivers/net/ethernet/mellanox/mlx5/core/main.c | 75 +++++++++++++++++++++--
> include/linux/mlx5/driver.h | 1 +
> 4 files changed, 93 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index c0872b3284cb..43f9054830e5 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -590,6 +590,7 @@ struct mlx5e_channel {
> struct mlx5_core_dev *mdev;
> struct hwtstamp_config *tstamp;
> int ix;
> + int cpu;
> };
>
> struct mlx5e_channels {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index d2b057a3e512..cbec66bc82f1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -71,11 +71,6 @@ struct mlx5e_channel_param {
> struct mlx5e_cq_param icosq_cq;
> };
>
> -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
> -{
> - return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
> -}
> -
> static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
> {
> return MLX5_CAP_GEN(mdev, striding_rq) &&
> @@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
> int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
> int mtt_sz = mlx5e_get_wqe_mtt_sz();
> int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
> - int node = mlx5e_get_node(c->priv, c->ix);
> int i;
>
> rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
> - GFP_KERNEL, node);
> + GFP_KERNEL, cpu_to_node(c->cpu));
> if (!rq->mpwqe.info)
> goto err_out;
>
> /* We allocate more than mtt_sz as we will align the pointer */
> - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
> - GFP_KERNEL, node);
> + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
> + cpu_to_node(c->cpu));
> if (unlikely(!rq->mpwqe.mtt_no_align))
> goto err_free_wqe_info;
>
> @@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
> int err;
> int i;
>
> - rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> + rqp->wq.db_numa_node = cpu_to_node(c->cpu);
>
> err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
> &rq->wq_ctrl);
> @@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
> default: /* MLX5_WQ_TYPE_LINKED_LIST */
> rq->wqe.frag_info =
> kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
> - GFP_KERNEL,
> - mlx5e_get_node(c->priv, c->ix));
> + GFP_KERNEL, cpu_to_node(c->cpu));
> if (!rq->wqe.frag_info) {
> err = -ENOMEM;
> goto err_rq_wq_destroy;
> @@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
> sq->uar_map = mdev->mlx5e_res.bfreg.map;
> sq->min_inline_mode = params->tx_min_inline_mode;
>
> - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> + param->wq.db_numa_node = cpu_to_node(c->cpu);
> err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
> if (err)
> return err;
> sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>
> - err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
> if (err)
> goto err_sq_wq_destroy;
>
> @@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
> sq->channel = c;
> sq->uar_map = mdev->mlx5e_res.bfreg.map;
>
> - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> + param->wq.db_numa_node = cpu_to_node(c->cpu);
> err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
> if (err)
> return err;
> sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>
> - err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
> + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
> if (err)
> goto err_sq_wq_destroy;
>
> @@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
> if (MLX5_IPSEC_DEV(c->priv->mdev))
> set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
>
> - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> + param->wq.db_numa_node = cpu_to_node(c->cpu);
> err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
> if (err)
> return err;
> sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>
> - err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
> if (err)
> goto err_sq_wq_destroy;
>
> @@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
> struct mlx5_core_dev *mdev = c->priv->mdev;
> int err;
>
> - param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
> - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> + param->wq.buf_numa_node = cpu_to_node(c->cpu);
> + param->wq.db_numa_node = cpu_to_node(c->cpu);
> param->eq_ix = c->ix;
>
> err = mlx5e_alloc_cq_common(mdev, param, cq);
> @@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
> mlx5e_free_cq(cq);
> }
>
> +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
> +{
> + return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
> +}
> +
> static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
> struct mlx5e_params *params,
> struct mlx5e_channel_param *cparam)
> @@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
> {
> struct mlx5e_cq_moder icocq_moder = {0, 0};
> struct net_device *netdev = priv->netdev;
> + int cpu = mlx5e_get_cpu(priv, ix);
> struct mlx5e_channel *c;
> unsigned int irq;
> int err;
> int eqn;
>
> - c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
> + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
> if (!c)
> return -ENOMEM;
>
> @@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
> c->mdev = priv->mdev;
> c->tstamp = &priv->tstamp;
> c->ix = ix;
> + c->cpu = cpu;
> c->pdev = &priv->mdev->pdev->dev;
> c->netdev = priv->netdev;
> c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
> @@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
> for (tc = 0; tc < c->num_tc; tc++)
> mlx5e_activate_txqsq(&c->sq[tc]);
> mlx5e_activate_rq(&c->rq);
> - netif_set_xps_queue(c->netdev,
> - mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
> + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
> }
>
> static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 5f323442cc5a..8a89c7e8cd63 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
> {
> struct mlx5_priv *priv = &dev->priv;
> struct mlx5_eq_table *table = &priv->eq_table;
> - struct irq_affinity irqdesc = {
> - .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
> - };
> int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
> int nvec;
>
> @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
> if (!priv->irq_info)
> goto err_free_msix;
>
> - nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
> + nvec = pci_alloc_irq_vectors(dev->pdev,
> MLX5_EQ_VEC_COMP_BASE + 1, nvec,
> - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
> - &irqdesc);
> + PCI_IRQ_MSIX);
> if (nvec < 0)
> return nvec;
>
> @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
> return (u64)timer_l | (u64)timer_h1 << 32;
> }
>
> +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> + struct mlx5_priv *priv = &mdev->priv;
> + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
> + mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
> + return -ENOMEM;
> + }
> +
> + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
> + priv->irq_info[i].mask);
> +
> + if (IS_ENABLED(CONFIG_SMP) &&
> + irq_set_affinity_hint(irq, priv->irq_info[i].mask))
> + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
> +
> + return 0;
> +}
> +
> +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> + struct mlx5_priv *priv = &mdev->priv;
> + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> + irq_set_affinity_hint(irq, NULL);
> + free_cpumask_var(priv->irq_info[i].mask);
> +}
> +
> +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> + int err;
> + int i;
> +
> + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
> + err = mlx5_irq_set_affinity_hint(mdev, i);
> + if (err)
> + goto err_out;
> + }
> +
> + return 0;
> +
> +err_out:
> + for (i--; i >= 0; i--)
> + mlx5_irq_clear_affinity_hint(mdev, i);
> +
> + return err;
> +}
> +
> +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> + int i;
> +
> + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
> + mlx5_irq_clear_affinity_hint(mdev, i);
> +}
> +
> int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
> unsigned int *irqn)
> {
> @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
> goto err_stop_eqs;
> }
>
> + err = mlx5_irq_set_affinity_hints(dev);
> + if (err) {
> + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
> + goto err_affinity_hints;
> + }
> +
> err = mlx5_init_fs(dev);
> if (err) {
> dev_err(&pdev->dev, "Failed to init flow steering\n");
> @@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
> mlx5_cleanup_fs(dev);
>
> err_fs:
> + mlx5_irq_clear_affinity_hints(dev);
> +
> +err_affinity_hints:
> free_comp_eqs(dev);
>
> err_stop_eqs:
> @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
>
> mlx5_sriov_detach(dev);
> mlx5_cleanup_fs(dev);
> + mlx5_irq_clear_affinity_hints(dev);
> free_comp_eqs(dev);
> mlx5_stop_eqs(dev);
> mlx5_put_uars_page(dev, priv->uar);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index a886b51511ab..40a6f33c4cde 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -556,6 +556,7 @@ struct mlx5_core_sriov {
> };
>
> struct mlx5_irq_info {
> + cpumask_var_t mask;
> char name[MLX5_MAX_IRQ_NAME];
> };
>
>
Powered by blists - more mailing lists