lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200226011246.70129-7-saeedm@mellanox.com>
Date:   Tue, 25 Feb 2020 17:12:36 -0800
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>, kuba@...nel.org
Cc:     netdev@...r.kernel.org, Maxim Mikityanskiy <maximmi@...lanox.com>,
        Tariq Toukan <tariqt@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 06/16] net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases

From: Maxim Mikityanskiy <maximmi@...lanox.com>

Currently, mlx5e notifies the kernel about the number of queues and sets
the default XPS cpumasks when channels are activated. This
implementation has several corner cases, in which the kernel may not be
updated on time, or XPS cpumasks may be reset when not directly touched
by the user.

This commit fixes these corner cases to match the following expected
behavior:

1. The number of queues always corresponds to the number of channels
configured.

2. XPS cpumasks are set to driver's defaults on netdev attach.

3. XPS cpumasks set by user are not reset, unless the number of channels
changes. If the number of channels changes, they are reset to driver's
defaults. (In general case, when the number of channels increases or
decreases, it's not possible to guess how to convert the current XPS
cpumasks to work with the new number of channels, so we let the user
reconfigure it if they change the number of channels.)

XPS cpumasks are no longer stored per channel. Only one temporary
cpumask is used. The old stored cpumasks didn't reflect the user's
changes and were not used after applying them.

A scratchpad area is added to struct mlx5e_priv. As cpumask_var_t
requires allocation, and the preactivate hook can't fail, we need to
preallocate the temporary cpumask in advance. It's stored in the
scratchpad.

Fixes: 149e566fef81 ("net/mlx5e: Expand XPS cpumask to cover all online cpus")
Signed-off-by: Maxim Mikityanskiy <maximmi@...lanox.com>
Reviewed-by: Tariq Toukan <tariqt@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 11 ++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 95 +++++++++++--------
 2 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4ddccab02a4b..6d725d2acd3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -737,7 +737,6 @@ struct mlx5e_channel {
 	DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
 	int                        ix;
 	int                        cpu;
-	cpumask_var_t              xps_cpumask;
 };
 
 struct mlx5e_channels {
@@ -813,6 +812,15 @@ struct mlx5e_xsk {
 	bool ever_used;
 };
 
+/* Temporary storage for variables that are allocated when struct mlx5e_priv is
+ * initialized, and used where we can't allocate them because that functions
+ * must not fail. Use with care and make sure the same variable is not used
+ * simultaneously by multiple users.
+ */
+struct mlx5e_scratchpad {
+	cpumask_var_t cpumask;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -876,6 +884,7 @@ struct mlx5e_priv {
 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
 	struct mlx5e_hv_vhca_stats_agent stats_agent;
 #endif
+	struct mlx5e_scratchpad    scratchpad;
 };
 
 struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bbe8c32fb423..4906d609aa55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1794,29 +1794,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
-static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c,
-				   struct mlx5e_params *params)
-{
-	int num_comp_vectors = mlx5_comp_vectors_count(c->mdev);
-	int irq;
-
-	if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL))
-		return -ENOMEM;
-
-	for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) {
-		int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq));
-
-		cpumask_set_cpu(cpu, c->xps_cpumask);
-	}
-
-	return 0;
-}
-
-static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
-{
-	free_cpumask_var(c->xps_cpumask);
-}
-
 static int mlx5e_open_queues(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
 			     struct mlx5e_channel_param *cparam)
@@ -1967,10 +1944,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->irq_desc = irq_to_desc(irq);
 	c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
 
-	err = mlx5e_alloc_xps_cpumask(c, params);
-	if (err)
-		goto err_free_channel;
-
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
 	err = mlx5e_open_queues(c, params, cparam);
@@ -1993,9 +1966,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
 err_napi_del:
 	netif_napi_del(&c->napi);
-	mlx5e_free_xps_cpumask(c);
 
-err_free_channel:
 	kvfree(c);
 
 	return err;
@@ -2009,7 +1980,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 		mlx5e_activate_txqsq(&c->sq[tc]);
 	mlx5e_activate_icosq(&c->icosq);
 	mlx5e_activate_rq(&c->rq);
-	netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
 
 	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
 		mlx5e_activate_xsk(c);
@@ -2034,7 +2004,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 		mlx5e_close_xsk(c);
 	mlx5e_close_queues(c);
 	netif_napi_del(&c->napi);
-	mlx5e_free_xps_cpumask(c);
 
 	kvfree(c);
 }
@@ -2869,10 +2838,10 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
-static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
+static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count)
 {
-	int num_txqs = priv->channels.num * priv->channels.params.num_tc;
-	int num_rxqs = priv->channels.num * priv->profile->rq_groups;
+	int num_txqs = count * priv->channels.params.num_tc;
+	int num_rxqs = count * priv->profile->rq_groups;
 	struct net_device *netdev = priv->netdev;
 
 	mlx5e_netdev_set_tcs(netdev);
@@ -2880,10 +2849,34 @@ static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 	netif_set_real_num_rx_queues(netdev, num_rxqs);
 }
 
+static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
+					   struct mlx5e_params *params)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int num_comp_vectors, ix, irq;
+
+	num_comp_vectors = mlx5_comp_vectors_count(mdev);
+
+	for (ix = 0; ix < params->num_channels; ix++) {
+		cpumask_clear(priv->scratchpad.cpumask);
+
+		for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
+			int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
+
+			cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
+		}
+
+		netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
+	}
+}
+
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
 {
 	u16 count = priv->channels.params.num_channels;
 
+	mlx5e_update_netdev_queues(priv, count);
+	mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
+
 	if (!netif_is_rxfh_configured(priv->netdev))
 		mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
 					      MLX5E_INDIR_RQT_SIZE, count);
@@ -2912,8 +2905,6 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
 
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
-	mlx5e_update_netdev_queues(priv);
-
 	mlx5e_build_txq_maps(priv);
 	mlx5e_activate_channels(&priv->channels);
 	mlx5e_xdp_tx_enable(priv);
@@ -3449,7 +3440,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_num_channels_changed);
 	if (err)
 		goto out;
 
@@ -5231,6 +5222,9 @@ int mlx5e_netdev_init(struct net_device *netdev,
 	priv->max_nch     = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
 	priv->max_opened_tc = 1;
 
+	if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
 	mutex_init(&priv->state_lock);
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
@@ -5239,7 +5233,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
 
 	priv->wq = create_singlethread_workqueue("mlx5e");
 	if (!priv->wq)
-		return -ENOMEM;
+		goto err_free_cpumask;
 
 	/* netdev init */
 	netif_carrier_off(netdev);
@@ -5249,11 +5243,17 @@ int mlx5e_netdev_init(struct net_device *netdev,
 #endif
 
 	return 0;
+
+err_free_cpumask:
+	free_cpumask_var(priv->scratchpad.cpumask);
+
+	return -ENOMEM;
 }
 
 void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
 {
 	destroy_workqueue(priv->wq);
+	free_cpumask_var(priv->scratchpad.cpumask);
 }
 
 struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
@@ -5288,6 +5288,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
+	const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
 	const struct mlx5e_profile *profile;
 	int max_nch;
 	int err;
@@ -5299,11 +5300,25 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 	max_nch = mlx5e_get_max_num_channels(priv->mdev);
 	if (priv->channels.params.num_channels > max_nch) {
 		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
-		/* Reducing the number of channels - RXFH has to be reset. */
+		/* Reducing the number of channels - RXFH has to be reset, and
+		 * mlx5e_num_channels_changed below will build the RQT.
+		 */
 		priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
 		priv->channels.params.num_channels = max_nch;
-		mlx5e_num_channels_changed(priv);
 	}
+	/* 1. Set the real number of queues in the kernel the first time.
+	 * 2. Set our default XPS cpumask.
+	 * 3. Build the RQT.
+	 *
+	 * rtnl_lock is required by netif_set_real_num_*_queues in case the
+	 * netdev has been registered by this point (if this function was called
+	 * in the reload or resume flow).
+	 */
+	if (take_rtnl)
+		rtnl_lock();
+	mlx5e_num_channels_changed(priv);
+	if (take_rtnl)
+		rtnl_unlock();
 
 	err = profile->init_tx(priv);
 	if (err)
-- 
2.24.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ