[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221130115217.7171-5-shayd@nvidia.com>
Date: Wed, 30 Nov 2022 13:52:13 +0200
From: Shay Drory <shayd@...dia.com>
To: <netdev@...r.kernel.org>, <kuba@...nel.org>, <davem@...emloft.net>
CC: <danielj@...dia.com>, <yishaih@...dia.com>, <jiri@...dia.com>,
<saeedm@...dia.com>, <parav@...dia.com>
Subject: [PATCH net-next 4/8] devlink: Expose port function commands to control RoCE
Expose port function commands to enable / disable RoCE, this is used to
control the port RoCE device capabilities.
When RoCE is disabled for a function of the port, function cannot create
any RoCE specific resources (e.g GID table).
It also saves system memory utilization. For example disabling RoCE enable a
VF/SF saves 1 Mbytes of system memory per function.
Example of a PCI VF port which supports function configuration:
Set RoCE of the VF's port function.
$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
vfnum 1
function:
hw_addr 00:00:00:00:00:00 roce enable
$ devlink port function set pci/0000:06:00.0/2 roce disable
$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
vfnum 1
function:
hw_addr 00:00:00:00:00:00 roce disable
Signed-off-by: Shay Drory <shayd@...dia.com>
Reviewed-by: Jiri Pirko <jiri@...dia.com>
---
.../networking/devlink/devlink-port.rst | 34 +++++-
include/net/devlink.h | 19 +++
include/uapi/linux/devlink.h | 12 ++
net/core/devlink.c | 114 ++++++++++++++++++
4 files changed, 178 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 0b520363c6af..79f9c0390b47 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -110,7 +110,7 @@ devlink ports for both the controllers.
Function configuration
======================
-A user can configure the function attribute before enumerating the PCI
+Users can configure one or more function attributes before enumerating the PCI
function. Usually it means, user should configure function attribute
before a bus specific device for the function is created. However, when
SRIOV is enabled, virtual function devices are created on the PCI bus.
@@ -122,6 +122,9 @@ A user may set the hardware address of the function using
'devlink port function set hw_addr' command. For Ethernet port function
this means a MAC address.
+Users may also set the RoCE capability of the function using
+'devlink port function set roce' command.
+
Function attributes
===================
@@ -162,6 +165,35 @@ device created for the PCI VF/SF.
function:
hw_addr 00:00:00:00:88:88
+RoCE capability setup
+---------------------
+Not all PCI VFs/SFs require RoCE capability.
+
+When RoCE capability is disabled, it saves system memory per PCI VF/SF.
+
+When user disables RoCE capability for a VF/SF, user application cannot send or
+receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
+will be empty.
+
+When RoCE capability is disabled in the device using port function attribute,
+VF/SF driver cannot override it.
+
+- Get RoCE capability of the VF device::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce enable
+
+- Set RoCE capability of the VF device::
+
+ $ devlink port function set pci/0000:06:00.0/2 roce disable
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce disable
+
Subfunction
============
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 074a79b8933f..7f75100e8b26 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1435,6 +1435,25 @@ struct devlink_ops {
int (*port_function_hw_addr_set)(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
+ /**
+ * @port_function_roce_get: Port function's roce get function.
+ *
+ * Query RoCE state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_function_roce_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_function_roce_set: Port function's roce set function.
+ *
+ * Enable/Disable the RoCE state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_function_roce_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
/**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 498d0d5d0957..c6f1fbe54095 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -656,11 +656,23 @@ enum devlink_resource_unit {
DEVLINK_RESOURCE_UNIT_ENTRY,
};
+enum devlink_port_fn_attr_cap {
+ DEVLINK_PORT_FN_ATTR_CAP_ROCE,
+
+ /* Add new caps above */
+ __DEVLINK_PORT_FN_ATTR_CAPS_MAX,
+ DEVLINK_PORT_FN_ATTR_CAPS_MAX = __DEVLINK_PORT_FN_ATTR_CAPS_MAX - 1
+};
+
+#define DEVLINK_PORT_FN_ATTR_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
enum devlink_port_function_attr {
DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
+ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 485348697290..88846ad635a0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -200,6 +200,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_ATTR_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
@@ -692,6 +694,64 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
+#define DEVLINK_PORT_FN_CAP(_name) \
+ BIT(DEVLINK_PORT_FN_ATTR_CAP_##_name)
+
+#define DEVLINK_PORT_FN_SET_CAP(caps, cap, enable) \
+ do { \
+ typeof(cap) cap_ = (cap); \
+ typeof(caps) caps_ = (caps); \
+ (caps_)->selector |= cap_; \
+ if (enable) \
+ (caps_)->value |= cap_; \
+ } while (0)
+
+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_function_roce_get)
+ return 0;
+
+ err = ops->port_function_roce_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ DEVLINK_PORT_FN_SET_CAP(caps, DEVLINK_PORT_FN_CAP(ROCE), is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
@@ -1275,6 +1335,35 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
return 0;
}
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_function_roce_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP(ROCE)) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP(ROCE),
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
@@ -1293,6 +1382,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_caps_fill(ops, port, msg, extack,
+ &msg_updated);
+ if (err)
+ goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
@@ -1665,6 +1758,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink_port->devlink->ops;
+ struct nlattr *attr;
if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
!ops->port_function_hw_addr_set) {
@@ -1676,6 +1770,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
"Function does not support state setting");
return -EOPNOTSUPP;
}
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP(ROCE) &&
+ !ops->port_function_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ }
return 0;
}
@@ -1703,6 +1809,14 @@ static int devlink_port_function_set(struct devlink_port *port,
if (err)
return err;
}
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
--
2.38.1
Powered by blists - more mailing lists