[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <uqbng3vzz2ybmrrhdcocsfjtfxitck2rs76hcrsk7aiddjssp2@haqcnmzrljws>
Date: Thu, 23 Oct 2025 14:18:20 +0200
From: Jiri Pirko <jiri@...nulli.us>
To: Daniel Zahka <daniel.zahka@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Saeed Mahameed <saeedm@...dia.com>,
Tariq Toukan <tariqt@...dia.com>, Simon Horman <horms@...nel.org>,
Jonathan Corbet <corbet@....net>, Leon Romanovsky <leon@...nel.org>,
Mark Bloch <mbloch@...dia.com>, Andrew Lunn <andrew+netdev@...n.ch>,
Vlad Dumitrescu <vdumitrescu@...dia.com>, netdev@...r.kernel.org, linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-rdma@...r.kernel.org
Subject: Re: [PATCH net-next] net/mlx5: Implement swp_l4_csum_mode via
devlink params
Wed, Oct 22, 2025 at 09:09:31PM +0200, daniel.zahka@...il.com wrote:
>swp_l4_csum_mode controls how L4 transmit checksums are computed when
>using Software Parser (SWP) hints for header locations.
>
>Supported values:
> 1. device_default: use device default setting.
Is this different between devices/fw_versions?
> 2. full_csum: calculate L4 checksum with the psuedo-header.
> 3. l4_only: calculate L4 checksum without the psuedo-header. Only
s/psuedo/pseudo/
> available when swp_l4_csum_mode_l4_only is set in
> mlx5_ifc_nv_sw_offload_cap_bits.
>
>The l4_only setting is a dependency for PSP initialization in
>mlx5e_psp_init().
>
>Signed-off-by: Daniel Zahka <daniel.zahka@...il.com>
>---
> Documentation/networking/devlink/mlx5.rst | 9 ++
> .../net/ethernet/mellanox/mlx5/core/devlink.h | 3 +-
> .../mellanox/mlx5/core/lib/nv_param.c | 148 ++++++++++++++++++
> 3 files changed, 159 insertions(+), 1 deletion(-)
>
>diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
>index 0e5f9c76e514..f366e551b2f7 100644
>--- a/Documentation/networking/devlink/mlx5.rst
>+++ b/Documentation/networking/devlink/mlx5.rst
>@@ -218,6 +218,15 @@ parameters.
> * ``balanced`` : Merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance
> * ``aggressive`` : Merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads
>
>+ * - ``swp_l4_csum_mode``
>+ - string
>+ - permanent
>+ - Configure how the L4 checksum is calculated by the device when using
>+ Software Parser (SWP) hints for header locations.
>+ * ``device_default`` : Use the device's default checksum calculation mode
>+ * ``full_csum`` : Calculate full checksum including the pseudo-header
>+ * ``l4_only`` : Calculate L4-only checksum, excluding the pseudo-header
>+
> The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
>
> Info versions
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
>index c9555119a661..43b9bf8829cf 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
>@@ -26,7 +26,8 @@ enum mlx5_devlink_param_id {
> MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
> MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
> MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
>- MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE
>+ MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
>+ MLX5_DEVLINK_PARAM_ID_SWP_L4_CSUM_MODE,
> };
>
> struct mlx5_trap_ctx {
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>index 459a0b4d08e6..fac3d9801b3b 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>@@ -8,6 +8,8 @@ enum {
> MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF = 0x80,
> MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP = 0x81,
> MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a,
>+ MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CAP = 0x10b,
>+ MLX5_CLASS_0_CTRL_ID_NV_SW_ACCELERATE_CONF = 0x11d,
>
> MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF = 0x80,
> };
>@@ -123,6 +125,17 @@ struct mlx5_ifc_nv_sw_offload_conf_bits {
> u8 lro_log_timeout0[0x4];
> };
>
>+struct mlx5_ifc_nv_sw_offload_cap_bits {
>+ u8 reserved_at_0[0x19];
>+ u8 swp_l4_csum_mode_l4_only[0x1];
>+ u8 reserved_at_1a[0x6];
>+};
>+
>+struct mlx5_ifc_nv_sw_accelerate_conf_bits {
>+ u8 swp_l4_csum_mode[0x2];
>+ u8 reserved_at_2[0x3e];
>+};
>+
> #define MNVDA_HDR_SZ \
> (MLX5_ST_SZ_BYTES(mnvda_reg) - \
> MLX5_BYTE_OFF(mnvda_reg, configuration_item_data))
>@@ -195,9 +208,42 @@ mlx5_nv_param_read_sw_offload_conf(struct mlx5_core_dev *dev, void *mnvda,
> return mlx5_nv_param_read(dev, mnvda, len);
> }
>
>+static int
>+mlx5_nv_param_read_sw_offload_cap(struct mlx5_core_dev *dev, void *mnvda,
>+ size_t len)
>+{
>+ MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0);
>+ MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index,
>+ MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CAP);
>+ MLX5_SET_CFG_HDR_LEN(mnvda, nv_sw_offload_cap);
>+
>+ return mlx5_nv_param_read(dev, mnvda, len);
>+}
>+
>+static int
>+mlx5_nv_param_read_sw_accelerate_conf(struct mlx5_core_dev *dev, void *mnvda,
>+ size_t len)
>+{
>+ MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0);
>+ MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index,
>+ MLX5_CLASS_0_CTRL_ID_NV_SW_ACCELERATE_CONF);
>+ MLX5_SET_CFG_HDR_LEN(mnvda, nv_sw_accelerate_conf);
>+
>+ return mlx5_nv_param_read(dev, mnvda, len);
>+}
>+
> static const char *const
> cqe_compress_str[] = { "balanced", "aggressive" };
>
>+enum swp_l4_csum_mode {
>+ SWP_L4_CSUM_MODE_DEVICE_DEFAULT = 0,
>+ SWP_L4_CSUM_MODE_FULL_CSUM = 1,
>+ SWP_L4_CSUM_MODE_L4_ONLY = 2,
>+};
>+
>+static const char *const
>+ swp_l4_csum_mode_str[] = { "device_default", "full_csum", "l4_only" };
>+
> static int
> mlx5_nv_param_devlink_cqe_compress_get(struct devlink *devlink, u32 id,
> struct devlink_param_gset_ctx *ctx)
>@@ -268,6 +314,102 @@ mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 id,
> return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> }
>
>+static int
>+mlx5_nv_param_devlink_swp_l4_csum_mode_get(struct devlink *devlink, u32 id,
>+ struct devlink_param_gset_ctx *ctx)
>+{
>+ struct mlx5_core_dev *dev = devlink_priv(devlink);
>+ u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>+ u8 value = U8_MAX;
>+ void *data;
>+ int err;
>+
>+ err = mlx5_nv_param_read_sw_accelerate_conf(dev, mnvda, sizeof(mnvda));
>+ if (err)
>+ return err;
>+
>+ data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+ value = MLX5_GET(nv_sw_accelerate_conf, data, swp_l4_csum_mode);
>+
>+ if (value >= ARRAY_SIZE(swp_l4_csum_mode_str))
>+ return -EOPNOTSUPP;
Einval? I think this is another argument for introduction of extack for
param getters. Care to add it?
>+
>+ strscpy(ctx->val.vstr, swp_l4_csum_mode_str[value],
>+ sizeof(ctx->val.vstr));
>+ return 0;
>+}
>+
>+static int
>+mlx5_nv_param_devlink_swp_l4_csum_mode_validate(struct devlink *devlink, u32 id,
>+ union devlink_param_value val,
>+ struct netlink_ext_ack *extack)
>+{
>+ struct mlx5_core_dev *dev = devlink_priv(devlink);
>+ u32 cap[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>+ void *data;
>+ int err, i;
>+
>+ for (i = 0; i < ARRAY_SIZE(swp_l4_csum_mode_str); i++) {
>+ if (!strcmp(val.vstr, swp_l4_csum_mode_str[i]))
>+ break;
>+ }
>+
>+ if (i >= ARRAY_SIZE(swp_l4_csum_mode_str)) {
>+ NL_SET_ERR_MSG_MOD(extack,
>+ "Invalid value, supported values are device_default/full_csum/l4_only");
>+ return -EINVAL;
>+ }
>+
>+ if (i == SWP_L4_CSUM_MODE_L4_ONLY) {
>+ err = mlx5_nv_param_read_sw_offload_cap(dev, cap, sizeof(cap));
>+ if (err) {
>+ NL_SET_ERR_MSG_MOD(extack,
>+ "Failed to read sw_offload_cap");
>+ return err;
>+ }
>+
>+ data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data);
>+ if (!MLX5_GET(nv_sw_offload_cap, data, swp_l4_csum_mode_l4_only)) {
>+ NL_SET_ERR_MSG_MOD(extack,
>+ "l4_only mode is not supported on this device");
>+ return -EOPNOTSUPP;
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+static int
>+mlx5_nv_param_devlink_swp_l4_csum_mode_set(struct devlink *devlink, u32 id,
>+ struct devlink_param_gset_ctx *ctx,
>+ struct netlink_ext_ack *extack)
>+{
>+ struct mlx5_core_dev *dev = devlink_priv(devlink);
>+ u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>+ void *data;
>+ u8 value;
>+ int err;
>+
>+ if (!strcmp(ctx->val.vstr, "device_default"))
>+ value = SWP_L4_CSUM_MODE_DEVICE_DEFAULT;
>+ else if (!strcmp(ctx->val.vstr, "full_csum"))
>+ value = SWP_L4_CSUM_MODE_FULL_CSUM;
>+ else
>+ value = SWP_L4_CSUM_MODE_L4_ONLY;
>+
>+ err = mlx5_nv_param_read_sw_accelerate_conf(dev, mnvda, sizeof(mnvda));
>+ if (err) {
>+ NL_SET_ERR_MSG_MOD(extack,
>+ "Failed to read sw_accelerate_conf mnvda reg");
>+ return err;
>+ }
>+
>+ data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+ MLX5_SET(nv_sw_accelerate_conf, data, swp_l4_csum_mode, value);
>+
>+ return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>+}
>+
> static int mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev,
> void *mnvda, size_t len)
> {
>@@ -545,6 +687,12 @@ static const struct devlink_param mlx5_nv_param_devlink_params[] = {
> mlx5_nv_param_devlink_cqe_compress_get,
> mlx5_nv_param_devlink_cqe_compress_set,
> mlx5_nv_param_devlink_cqe_compress_validate),
>+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_SWP_L4_CSUM_MODE,
Why this is driver specific? Isn't this something other drivers might
eventually implement as well?
>+ "swp_l4_csum_mode", DEVLINK_PARAM_TYPE_STRING,
>+ BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>+ mlx5_nv_param_devlink_swp_l4_csum_mode_get,
>+ mlx5_nv_param_devlink_swp_l4_csum_mode_set,
>+ mlx5_nv_param_devlink_swp_l4_csum_mode_validate),
> };
>
> int mlx5_nv_param_register_dl_params(struct devlink *devlink)
>--
>2.47.3
>
Powered by blists - more mailing lists