lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zel8WpMczric0fz3@nanopsycho>
Date: Thu, 7 Mar 2024 09:35:38 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: William Tu <witu@...dia.com>
Cc: netdev@...r.kernel.org, jiri@...dia.com, bodong@...dia.com,
	tariqt@...dia.com, yossiku@...dia.com, kuba@...nel.org
Subject: Re: [PATCH RFC v3 net-next 1/2] devlink: Add shared memory pool
 eswitch attribute

Thu, Mar 07, 2024 at 12:12:52AM CET, witu@...dia.com wrote:
>Add eswitch attribute spool_size for shared memory pool size.
>
>When using switchdev mode, the representor ports handles the slow path
>traffic, the traffic that can't be offloaded will be redirected to the
>representor port for processing. Memory consumption of the representor
>port's rx buffer can grow to several GB when scaling to 1k VFs reps.
>For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
>consumes 3MB of DMA memory for packet buffer in WQEs, and with four
>channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
>ports are for slow path traffic, most of these rx DMA memory are idle.
>
>Add spool_size configuration, allowing multiple representor ports
>to share a rx memory buffer pool. When enabled, individual representor
>doesn't need to allocate its dedicated rx buffer, but just pointing
>its rq to the memory pool. This could make the memory being better
>utilized. The spool_size represents the number of bytes of the memory
>pool. Users can adjust it based on how many reps, total system
>memory, or performance expectation.
>
>An example use case:
>$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
>  spool-size 4096000
>$ devlink dev eswitch show pci/0000:08:00.0
>  pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
>  spool-size 4096000
>
>Disable the shared memory pool by setting spool_size to 0.
>
>Signed-off-by: William Tu <witu@...dia.com>
>---
>v3: feedback from Jakub
>- introduce 1 attribute instead of 2
>- use spool_size == 0 to disable
>- spool_size as byte unit, not counts
>
>Question about ENODOCS:
>where to add this? the only document about devlink attr is at iproute2
>Do I add a new file Documentation/networking/devlink/devlink-eswitch-attr.rst?

Yeah. Please document the existing attrs as well while you are at it.


>---
> Documentation/netlink/specs/devlink.yaml |  4 ++++
> include/net/devlink.h                    |  3 +++
> include/uapi/linux/devlink.h             |  1 +
> net/devlink/dev.c                        | 21 +++++++++++++++++++++
> net/devlink/netlink_gen.c                |  5 +++--
> 5 files changed, 32 insertions(+), 2 deletions(-)
>
>diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
>index cf6eaa0da821..cb46fa9d6664 100644
>--- a/Documentation/netlink/specs/devlink.yaml
>+++ b/Documentation/netlink/specs/devlink.yaml
>@@ -429,6 +429,9 @@ attribute-sets:
>         name: eswitch-encap-mode
>         type: u8
>         enum: eswitch-encap-mode
>+      -
>+        name: eswitch-spool-size
>+        type: u32
>       -
>         name: resource-list
>         type: nest
>@@ -1555,6 +1558,7 @@ operations:
>             - eswitch-mode
>             - eswitch-inline-mode
>             - eswitch-encap-mode
>+            - eswitch-spool-size
> 
>     -
>       name: eswitch-set
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 9ac394bdfbe4..164c543dd7ca 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -1327,6 +1327,9 @@ struct devlink_ops {
> 	int (*eswitch_encap_mode_set)(struct devlink *devlink,
> 				      enum devlink_eswitch_encap_mode encap_mode,
> 				      struct netlink_ext_ack *extack);
>+	int (*eswitch_spool_size_get)(struct devlink *devlink, u32 *p_size);
>+	int (*eswitch_spool_size_set)(struct devlink *devlink, u32 size,
>+				      struct netlink_ext_ack *extack);
> 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
> 			struct netlink_ext_ack *extack);
> 	/**
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index 130cae0d3e20..cbe51be7a08a 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -614,6 +614,7 @@ enum devlink_attr {
> 
> 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
> 
>+	DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,	/* u32 */
> 	/* add new attributes above here, update the policy in devlink.c */

While at it, could you please update this comment? It should say:
 	/* Add new attributes above here, update the spec in
	 * Documentation/netlink/specs/devlink.yaml and re-generate
	 * net/devlink/netlink_gen.c. */
As a separate patch please.


> 
> 	__DEVLINK_ATTR_MAX,
>diff --git a/net/devlink/dev.c b/net/devlink/dev.c
>index 19dbf540748a..561874424db7 100644
>--- a/net/devlink/dev.c
>+++ b/net/devlink/dev.c
>@@ -633,6 +633,7 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
> {
> 	const struct devlink_ops *ops = devlink->ops;
> 	enum devlink_eswitch_encap_mode encap_mode;
>+	u32 spool_size;
> 	u8 inline_mode;
> 	void *hdr;
> 	int err = 0;
>@@ -674,6 +675,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
> 			goto nla_put_failure;
> 	}
> 
>+	if (ops->eswitch_spool_size_get) {
>+		err = ops->eswitch_spool_size_get(devlink, &spool_size);
>+		if (err)
>+			goto nla_put_failure;
>+		err = nla_put_u32(msg, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE, spool_size);
>+		if (err)
>+			goto nla_put_failure;
>+	}
>+
> 	genlmsg_end(msg, hdr);
> 	return 0;
> 
>@@ -708,10 +718,21 @@ int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
> 	struct devlink *devlink = info->user_ptr[0];
> 	const struct devlink_ops *ops = devlink->ops;
> 	enum devlink_eswitch_encap_mode encap_mode;
>+	u32 spool_size;
> 	u8 inline_mode;
> 	int err = 0;
> 	u16 mode;
> 
>+	if (info->attrs[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
>+		if (!ops->eswitch_spool_size_set)

Fill up extack msg here please.


>+			return -EOPNOTSUPP;
>+		spool_size = nla_get_u32(info->attrs[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]);
>+		err = ops->eswitch_spool_size_set(devlink, spool_size,
>+						  info->extack);
>+		if (err)
>+			return err;
>+	}
>+
> 	if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
> 		if (!ops->eswitch_mode_set)
> 			return -EOPNOTSUPP;
>diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
>index c81cf2dd154f..acbf484b28a2 100644
>--- a/net/devlink/netlink_gen.c
>+++ b/net/devlink/netlink_gen.c
>@@ -194,12 +194,13 @@ static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NA
> };
> 
> /* DEVLINK_CMD_ESWITCH_SET - do */
>-static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = {
>+static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE + 1] = {
> 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
> 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
> 	[DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1),
> 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3),
> 	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1),
>+	[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = { .type = NLA_U32, },
> };
> 
> /* DEVLINK_CMD_DPIPE_TABLE_GET - do */
>@@ -787,7 +788,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
> 		.doit		= devlink_nl_eswitch_set_doit,
> 		.post_doit	= devlink_nl_post_doit,
> 		.policy		= devlink_eswitch_set_nl_policy,
>-		.maxattr	= DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
>+		.maxattr	= DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
> 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
> 	},
> 	{
>-- 
>2.38.1
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ