lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250619113721.60201-6-mbloch@nvidia.com>
Date: Thu, 19 Jun 2025 14:37:21 +0300
From: Mark Bloch <mbloch@...dia.com>
To: "David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>, "Andrew
 Lunn" <andrew+netdev@...n.ch>, Simon Horman <horms@...nel.org>
CC: <saeedm@...dia.com>, <gal@...dia.com>, <leonro@...dia.com>,
	<tariqt@...dia.com>, Leon Romanovsky <leon@...nel.org>, Jonathan Corbet
	<corbet@....net>, <netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
	<linux-doc@...r.kernel.org>, <linux-kernel@...r.kernel.org>, Dragos Tatulea
	<dtatulea@...dia.com>, Mark Bloch <mbloch@...dia.com>
Subject: [PATCH net-next 5/5] net/mlx5e: Make PCIe congestion event thresholds configurable

From: Dragos Tatulea <dtatulea@...dia.com>

Add a new sysfs entry for reading and configuring the PCIe congestion
event thresholds. The format is the following:
<inbound_low> <inbound_high> <outbound_low> <outbound_high>

Units are 0.01 %. Accepted values are in range (0, 10000].

When new thresholds are configured, a object modify operation will
happen. The set function is updated accordingly to act as a modify
as well.

The threshold configuration is stored and queried directly
in the firmware.

To prevent fat fingering the numbers, read them initially as u64.

Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
Reviewed-by: Tariq Toukan <tariqt@...dia.com>
Signed-off-by: Mark Bloch <mbloch@...dia.com>
---
 .../mellanox/mlx5/core/en/pcie_cong_event.c   | 152 +++++++++++++++++-
 1 file changed, 144 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
index a24e5465ceeb..a74d1e15c92e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
@@ -39,9 +39,13 @@ struct mlx5e_pcie_cong_event {
 
 	/* For ethtool stats group. */
 	struct mlx5e_pcie_cong_stats stats;
+
+	struct device_attribute attr;
 };
 
 /* In units of 0.01 % */
+#define MLX5E_PCIE_CONG_THRESH_MAX 10000
+
 static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
 	.inbound_high = 9000,
 	.inbound_low = 7500,
@@ -97,6 +101,7 @@ MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
 static int
 mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
 			     const struct mlx5e_pcie_cong_thresh *config,
+			     bool modify,
 			     u64 *obj_id)
 {
 	u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
@@ -108,8 +113,16 @@ mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
 	hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
 	cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
 
-	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
-		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	if (!modify) {
+		MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+			 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	} else {
+		MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+			 MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+		MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, *obj_id);
+		MLX5_SET64(pcie_cong_event_obj, cong_obj, modify_select_field,
+			   MLX5_PCIE_CONG_EVENT_MOD_THRESH);
+	}
 
 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
 		 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
@@ -131,10 +144,12 @@ mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
-	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	if (!modify)
+		*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 
-	mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
+	mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) %s. Config: in: [%u, %u], out: [%u, %u]\n",
 		      *obj_id,
+		      modify ? "modified" : "created",
 		      config->inbound_high, config->inbound_low,
 		      config->outbound_high, config->outbound_low);
 
@@ -160,13 +175,13 @@ static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
 
 static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
 					  u64 obj_id,
-					  u32 *state)
+					  u32 *state,
+					  struct mlx5e_pcie_cong_thresh *config)
 {
 	u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
 	u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
 	void *obj;
 	void *hdr;
-	u8 cong;
 	int err;
 
 	hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
@@ -184,6 +199,8 @@ static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
 	obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
 
 	if (state) {
+		u8 cong;
+
 		cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
 		if (cong == MLX5E_CONG_HIGH_STATE)
 			*state |= MLX5E_INBOUND_CONG;
@@ -193,6 +210,19 @@ static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
 			*state |= MLX5E_OUTBOUND_CONG;
 	}
 
+	if (config) {
+		*config = (struct mlx5e_pcie_cong_thresh) {
+			.inbound_low = MLX5_GET(pcie_cong_event_obj, obj,
+						inbound_cong_low_threshold),
+			.inbound_high = MLX5_GET(pcie_cong_event_obj, obj,
+						inbound_cong_high_threshold),
+			.outbound_low = MLX5_GET(pcie_cong_event_obj, obj,
+						 outbound_cong_low_threshold),
+			.outbound_high = MLX5_GET(pcie_cong_event_obj, obj,
+						  outbound_cong_high_threshold),
+		};
+	}
+
 	return 0;
 }
 
@@ -210,7 +240,7 @@ static void mlx5e_pcie_cong_event_work(struct work_struct *work)
 	dev = priv->mdev;
 
 	err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
-					     &new_cong_state);
+					     &new_cong_state, NULL);
 	if (err) {
 		mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
 			       err, cong_event->obj_id);
@@ -249,6 +279,101 @@ static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static bool mlx5e_thresh_check_val(u64 val)
+{
+	return val > 0 && val <= MLX5E_PCIE_CONG_THRESH_MAX;
+}
+
+static bool
+mlx5e_thresh_config_check_order(const struct mlx5e_pcie_cong_thresh *config)
+{
+	if (config->inbound_high <= config->inbound_low)
+		return false;
+
+	if (config->outbound_high <= config->outbound_low)
+		return false;
+
+	return true;
+}
+
+#define MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES 4
+
+static ssize_t thresh_config_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf,
+				   size_t count)
+{
+	struct mlx5e_pcie_cong_thresh config = {};
+	struct mlx5e_pcie_cong_event *cong_event;
+	u64 outbound_high, outbound_low;
+	u64 inbound_high, inbound_low;
+	struct mlx5e_priv *priv;
+	int ret;
+	int err;
+
+	cong_event = container_of(attr, struct mlx5e_pcie_cong_event, attr);
+	priv = cong_event->priv;
+
+	ret = sscanf(buf, "%llu %llu %llu %llu",
+		     &inbound_low, &inbound_high,
+		     &outbound_low, &outbound_high);
+	if (ret != MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES) {
+		mlx5_core_err(priv->mdev, "Invalid format for PCIe congestion threshold configuration. Expected %d, got %d.\n",
+			      MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES, ret);
+		return -EINVAL;
+	}
+
+	if (!mlx5e_thresh_check_val(inbound_high) ||
+	    !mlx5e_thresh_check_val(inbound_low) ||
+	    !mlx5e_thresh_check_val(outbound_high) ||
+	    !mlx5e_thresh_check_val(outbound_low)) {
+		mlx5_core_err(priv->mdev, "Invalid values for PCIe congestion threshold configuration. Valid range [1, %d]\n",
+			      MLX5E_PCIE_CONG_THRESH_MAX);
+		return -EINVAL;
+	}
+
+	config = (struct mlx5e_pcie_cong_thresh) {
+		.inbound_low = inbound_low,
+		.inbound_high = inbound_high,
+		.outbound_low = outbound_low,
+		.outbound_high = outbound_high,
+
+	};
+
+	if (!mlx5e_thresh_config_check_order(&config)) {
+		mlx5_core_err(priv->mdev, "Invalid order of values for PCIe congestion threshold configuration.\n");
+		return -EINVAL;
+	}
+
+	err = mlx5_cmd_pcie_cong_event_set(priv->mdev, &config,
+					   true, &cong_event->obj_id);
+
+	return err ? err : count;
+}
+
+static ssize_t thresh_config_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct mlx5e_pcie_cong_event *cong_event;
+	struct mlx5e_pcie_cong_thresh config;
+	struct mlx5e_priv *priv;
+	int err;
+
+	cong_event = container_of(attr, struct mlx5e_pcie_cong_event, attr);
+	priv = cong_event->priv;
+
+	err = mlx5_cmd_pcie_cong_event_query(priv->mdev, cong_event->obj_id,
+					     NULL, &config);
+
+	if (err)
+		return err;
+
+	return sysfs_emit(buf, "%u %u %u %u\n",
+			  config.inbound_low, config.inbound_high,
+			  config.outbound_low, config.outbound_high);
+}
+
 bool mlx5e_pcie_cong_event_supported(struct mlx5_core_dev *dev)
 {
 	u64 features = MLX5_CAP_GEN_2_64(dev, general_obj_types_127_64);
@@ -283,7 +408,7 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
 	cong_event->priv = priv;
 
 	err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
-					   &cong_event->obj_id);
+					   false, &cong_event->obj_id);
 	if (err) {
 		mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
 		goto err_free;
@@ -295,10 +420,20 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
 		goto err_obj_destroy;
 	}
 
+	cong_event->attr = (struct device_attribute)__ATTR_RW(thresh_config);
+	err = sysfs_create_file(&mdev->device->kobj,
+				&cong_event->attr.attr);
+	if (err) {
+		mlx5_core_warn(mdev, "Error creating a sysfs entry for pcie_cong limits.\n");
+		goto err_unregister_nb;
+	}
+
 	priv->cong_event = cong_event;
 
 	return 0;
 
+err_unregister_nb:
+	mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
 err_obj_destroy:
 	mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
 err_free:
@@ -316,6 +451,7 @@ void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
 		return;
 
 	priv->cong_event = NULL;
+	sysfs_remove_file(&mdev->device->kobj, &cong_event->attr.attr);
 
 	mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
 	cancel_work_sync(&cong_event->work);
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ