lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y43EIHJ8nlUz5HYK@nanopsycho>
Date:   Mon, 5 Dec 2022 11:12:48 +0100
From:   Jiri Pirko <jiri@...nulli.us>
To:     Shay Drory <shayd@...dia.com>
Cc:     netdev@...r.kernel.org, kuba@...nel.org, davem@...emloft.net,
        danielj@...dia.com, yishaih@...dia.com, jiri@...dia.com,
        saeedm@...dia.com, parav@...dia.com
Subject: Re: [PATCH net-next V3 4/8] devlink: Expose port function commands
 to control RoCE

Sun, Dec 04, 2022 at 03:16:28PM CET, shayd@...dia.com wrote:
>Expose port function commands to enable / disable RoCE, this is used to
>control the port RoCE device capabilities.
>
>When RoCE is disabled for a function of the port, function cannot create
>any RoCE specific resources (e.g GID table).
>It also saves system memory utilization. For example disabling RoCE enable a
>VF/SF saves 1 Mbytes of system memory per function.
>
>Example of a PCI VF port which supports function configuration:
>Set RoCE of the VF's port function.
>
>$ devlink port show pci/0000:06:00.0/2
>pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
>vfnum 1
>    function:
>        hw_addr 00:00:00:00:00:00 roce enable
>
>$ devlink port function set pci/0000:06:00.0/2 roce disable
>
>$ devlink port show pci/0000:06:00.0/2
>pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
>vfnum 1
>    function:
>        hw_addr 00:00:00:00:00:00 roce disable
>
>Signed-off-by: Shay Drory <shayd@...dia.com>
>Reviewed-by: Jiri Pirko <jiri@...dia.com>

When you do changes in the patch, you should remove reviewed-by and
acked-by tags.


>---
>v2->v3:
> - change DEVLINK_PORT_FN_SET_CAP to devlink_port_fn_cap_fill.
> - move out DEVLINK_PORT_FN_CAPS_VALID_MASK from UAPI.
> - introduce DEVLINK_PORT_FN_CAP_ROCE and add _BIT suffix to
>   devlink_port_fn_attr_cap.
> - remove DEVLINK_PORT_FN_ATTR_CAPS_MAX
>---
> .../networking/devlink/devlink-port.rst       |  34 +++++-
> include/net/devlink.h                         |  19 +++
> include/uapi/linux/devlink.h                  |  10 ++
> net/core/devlink.c                            | 113 ++++++++++++++++++
> 4 files changed, 175 insertions(+), 1 deletion(-)
>
>diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
>index 2c637f4aae8e..c3302d23e480 100644
>--- a/Documentation/networking/devlink/devlink-port.rst
>+++ b/Documentation/networking/devlink/devlink-port.rst
>@@ -110,7 +110,7 @@ devlink ports for both the controllers.
> Function configuration
> ======================
> 
>-A user can configure the function attribute before enumerating the PCI
>+Users can configure one or more function attributes before enumerating the PCI
> function. Usually it means, user should configure function attribute
> before a bus specific device for the function is created. However, when
> SRIOV is enabled, virtual function devices are created on the PCI bus.
>@@ -122,6 +122,9 @@ A user may set the hardware address of the function using
> `devlink port function set hw_addr` command. For Ethernet port function
> this means a MAC address.
> 
>+Users may also set the RoCE capability of the function using
>+`devlink port function set roce` command.
>+
> Function attributes
> ===================
> 
>@@ -162,6 +165,35 @@ device created for the PCI VF/SF.
>       function:
>         hw_addr 00:00:00:00:88:88
> 
>+RoCE capability setup
>+---------------------
>+Not all PCI VFs/SFs require RoCE capability.
>+
>+When RoCE capability is disabled, it saves system memory per PCI VF/SF.
>+
>+When user disables RoCE capability for a VF/SF, user application cannot send or
>+receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
>+will be empty.
>+
>+When RoCE capability is disabled in the device using port function attribute,
>+VF/SF driver cannot override it.
>+
>+- Get RoCE capability of the VF device::
>+
>+    $ devlink port show pci/0000:06:00.0/2
>+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
>+        function:
>+            hw_addr 00:00:00:00:00:00 roce enable
>+
>+- Set RoCE capability of the VF device::
>+
>+    $ devlink port function set pci/0000:06:00.0/2 roce disable
>+
>+    $ devlink port show pci/0000:06:00.0/2
>+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
>+        function:
>+            hw_addr 00:00:00:00:00:00 roce disable
>+
> Subfunction
> ============
> 
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 5f6eca5e4a40..20306fb8a1d9 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -1451,6 +1451,25 @@ struct devlink_ops {
> 	int (*port_function_hw_addr_set)(struct devlink_port *port,
> 					 const u8 *hw_addr, int hw_addr_len,
> 					 struct netlink_ext_ack *extack);
>+	/**
>+	 * @port_function_roce_get: Port function's roce get function.
>+	 *
>+	 * Query RoCE state of a function managed by the devlink port.
>+	 * Return -EOPNOTSUPP if port function RoCE handling is not supported.
>+	 */
>+	int (*port_function_roce_get)(struct devlink_port *devlink_port,
>+				      bool *is_enable,
>+				      struct netlink_ext_ack *extack);
>+	/**
>+	 * @port_function_roce_set: Port function's roce set function.
>+	 *
>+	 * Enable/Disable the RoCE state of a function managed by the devlink
>+	 * port.
>+	 * Return -EOPNOTSUPP if port function RoCE handling is not supported.
>+	 */
>+	int (*port_function_roce_set)(struct devlink_port *devlink_port,
>+				      bool enable,
>+				      struct netlink_ext_ack *extack);
> 	/**
> 	 * port_new() - Add a new port function of a specified flavor
> 	 * @devlink: Devlink instance
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index 70191d96af89..6cc2925bd478 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -658,11 +658,21 @@ enum devlink_resource_unit {
> 	DEVLINK_RESOURCE_UNIT_ENTRY,
> };
> 
>+enum devlink_port_fn_attr_cap {
>+	DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
>+
>+	/* Add new caps above */
>+	__DEVLINK_PORT_FN_ATTR_CAPS_MAX,

Well this is not needed in uapi too, but I don't see any good way to
maintain this internally :/ No harm to expose.

Looks good,
Reviewed-by: Jiri Pirko <jiri@...dia.com>




>+};
>+
>+#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
>+
> enum devlink_port_function_attr {
> 	DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
> 	DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR,	/* binary */
> 	DEVLINK_PORT_FN_ATTR_STATE,	/* u8 */
> 	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
>+	DEVLINK_PORT_FN_ATTR_CAPS,	/* bitfield32 */
> 
> 	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
> 	DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 2b6e11277837..5c4d3abd7677 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
> EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
> EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
> 
>+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
>+	(_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
>+
> static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
> 	[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
> 	[DEVLINK_PORT_FN_ATTR_STATE] =
> 		NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
> 				 DEVLINK_PORT_FN_STATE_ACTIVE),
>+	[DEVLINK_PORT_FN_ATTR_CAPS] =
>+		NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
> };
> 
> static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
>@@ -692,6 +697,60 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
> 	return 0;
> }
> 
>+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
>+				     u32 cap, bool is_enable)
>+{
>+	caps->selector |= cap;
>+	if (is_enable)
>+		caps->value |= cap;
>+}
>+
>+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
>+				     struct devlink_port *devlink_port,
>+				     struct nla_bitfield32 *caps,
>+				     struct netlink_ext_ack *extack)
>+{
>+	bool is_enable;
>+	int err;
>+
>+	if (!ops->port_function_roce_get)
>+		return 0;
>+
>+	err = ops->port_function_roce_get(devlink_port, &is_enable, extack);
>+	if (err) {
>+		if (err == -EOPNOTSUPP)
>+			return 0;
>+		return err;
>+	}
>+
>+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
>+	return 0;
>+}
>+
>+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
>+				     struct devlink_port *devlink_port,
>+				     struct sk_buff *msg,
>+				     struct netlink_ext_ack *extack,
>+				     bool *msg_updated)
>+{
>+	struct nla_bitfield32 caps = {};
>+	int err;
>+
>+	err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
>+	if (err)
>+		return err;
>+
>+	if (!caps.selector)
>+		return 0;
>+	err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
>+				 caps.selector);
>+	if (err)
>+		return err;
>+
>+	*msg_updated = true;
>+	return 0;
>+}
>+
> static int
> devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
> 				  struct genl_info *info,
>@@ -1275,6 +1334,35 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
> 	return 0;
> }
> 
>+static int
>+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
>+			 struct netlink_ext_ack *extack)
>+{
>+	const struct devlink_ops *ops = devlink_port->devlink->ops;
>+
>+	return ops->port_function_roce_set(devlink_port, enable, extack);
>+}
>+
>+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
>+				    const struct nlattr *attr,
>+				    struct netlink_ext_ack *extack)
>+{
>+	struct nla_bitfield32 caps;
>+	u32 caps_value;
>+	int err;
>+
>+	caps = nla_get_bitfield32(attr);
>+	caps_value = caps.value & caps.selector;
>+	if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
>+		err = devlink_port_fn_roce_set(devlink_port,
>+					       caps_value & DEVLINK_PORT_FN_CAP_ROCE,
>+					       extack);
>+		if (err)
>+			return err;
>+	}
>+	return 0;
>+}
>+
> static int
> devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
> 				   struct netlink_ext_ack *extack)
>@@ -1293,6 +1381,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
> 					   &msg_updated);
> 	if (err)
> 		goto out;
>+	err = devlink_port_fn_caps_fill(ops, port, msg, extack,
>+					&msg_updated);
>+	if (err)
>+		goto out;
> 	err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
> out:
> 	if (err || !msg_updated)
>@@ -1665,6 +1757,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
> 					  struct netlink_ext_ack *extack)
> {
> 	const struct devlink_ops *ops = devlink_port->devlink->ops;
>+	struct nlattr *attr;
> 
> 	if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
> 	    !ops->port_function_hw_addr_set) {
>@@ -1677,6 +1770,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
> 				   "Function does not support state setting");
> 		return -EOPNOTSUPP;
> 	}
>+	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
>+	if (attr) {
>+		struct nla_bitfield32 caps;
>+
>+		caps = nla_get_bitfield32(attr);
>+		if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
>+		    !ops->port_function_roce_set) {
>+			NL_SET_ERR_MSG_ATTR(extack, attr,
>+					    "Port doesn't support RoCE function attribute");
>+			return -EOPNOTSUPP;
>+		}
>+	}
> 	return 0;
> }
> 
>@@ -1704,6 +1809,14 @@ static int devlink_port_function_set(struct devlink_port *port,
> 		if (err)
> 			return err;
> 	}
>+
>+	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
>+	if (attr) {
>+		err = devlink_port_fn_caps_set(port, attr, extack);
>+		if (err)
>+			return err;
>+	}
>+
> 	/* Keep this as the last function attribute set, so that when
> 	 * multiple port function attributes are set along with state,
> 	 * Those can be applied first before activating the state.
>-- 
>2.38.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ