lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181119234128.6948-7-saeedm@mellanox.com>
Date:   Mon, 19 Nov 2018 15:41:21 -0800
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
Cc:     netdev@...r.kernel.org, Yuval Avnery <yuvalav@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net 06/13] net/mlx5e: Adjust to max number of channles when re-attaching

From: Yuval Avnery <yuvalav@...lanox.com>

When core driver enters deattach/attach flow after pci reset,
Number of logical CPUs may have changed.
As a result we need to update the cpu affiliated resource tables.
	1. indirect rqt list
	2. eq table

Reproduction (PowerPC):
	echo 1000 > /sys/kernel/debug/powerpc/eeh_max_freezes
	ppc64_cpu --smt=on
	# Restart driver
	modprobe -r ... ; modprobe ...
	# Link up
	ifconfig ...
	# Only physical CPUs
	ppc64_cpu --smt=off
	# Inject PCI errors so PCI will reset - calling the pci error handler
	echo 0x8000000000000000 > /sys/kernel/debug/powerpc/<PCI BUS>/err_injct_inboundA

Call trace when trying to add non-existing rqs to an indirect rqt:
	mlx5e_redirect_rqt+0x84/0x260 [mlx5_core] (unreliable)
	mlx5e_redirect_rqts+0x188/0x190 [mlx5_core]
	mlx5e_activate_priv_channels+0x488/0x570 [mlx5_core]
	mlx5e_open_locked+0xbc/0x140 [mlx5_core]
	mlx5e_open+0x50/0x130 [mlx5_core]
	mlx5e_nic_enable+0x174/0x1b0 [mlx5_core]
	mlx5e_attach_netdev+0x154/0x290 [mlx5_core]
	mlx5e_attach+0x88/0xd0 [mlx5_core]
	mlx5_attach_device+0x168/0x1e0 [mlx5_core]
	mlx5_load_one+0x1140/0x1210 [mlx5_core]
	mlx5_pci_resume+0x6c/0xf0 [mlx5_core]

Create cq will fail when trying to use non-existing EQ.

Fixes: 89d44f0a6c73 ("net/mlx5_core: Add pci error handlers to mlx5_core driver")
Signed-off-by: Yuval Avnery <yuvalav@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fb3b2d9c352b..25b09bb68e8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1623,13 +1623,15 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
 	int err;
 	u32 i;
 
+	err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
+	if (err)
+		return err;
+
 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
 			       &cq->wq_ctrl);
 	if (err)
 		return err;
 
-	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
-
 	mcq->cqe_sz     = 64;
 	mcq->set_ci_db  = cq->wq_ctrl.db.db;
 	mcq->arm_db     = cq->wq_ctrl.db.db + 1;
@@ -1687,6 +1689,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	int eqn;
 	int err;
 
+	err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+	if (err)
+		return err;
+
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		sizeof(u64) * cq->wq_ctrl.buf.npages;
 	in = kvzalloc(inlen, GFP_KERNEL);
@@ -1700,8 +1706,6 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
-	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
-
 	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
@@ -1921,6 +1925,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	int err;
 	int eqn;
 
+	err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+	if (err)
+		return err;
+
 	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
 	if (!c)
 		return -ENOMEM;
@@ -1937,7 +1945,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
 
-	mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
 	c->irq_desc = irq_to_desc(irq);
 
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
@@ -5010,11 +5017,21 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
 	const struct mlx5e_profile *profile;
+	int max_nch;
 	int err;
 
 	profile = priv->profile;
 	clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
+	/* max number of channels may have changed */
+	max_nch = mlx5e_get_max_num_channels(priv->mdev);
+	if (priv->channels.params.num_channels > max_nch) {
+		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+		priv->channels.params.num_channels = max_nch;
+		mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+					      MLX5E_INDIR_RQT_SIZE, max_nch);
+	}
+
 	err = profile->init_tx(priv);
 	if (err)
 		goto out;
-- 
2.19.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ