lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Zel8zzXM18k3FnGL@nanopsycho>
Date: Thu, 7 Mar 2024 09:37:35 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: William Tu <witu@...dia.com>
Cc: netdev@...r.kernel.org, jiri@...dia.com, bodong@...dia.com,
	tariqt@...dia.com, yossiku@...dia.com, kuba@...nel.org
Subject: Re: [PATCH RFC v3 iproute2-next] devlink: Add shared memory pool
 eswitch attribute

Thu, Mar 07, 2024 at 12:29:22AM CET, witu@...dia.com wrote:
>Add eswitch attribute spool_size for shared memory pool size.
>
>When using switchdev mode, the representor ports handles the slow path
>traffic, the traffic that can't be offloaded will be redirected to the
>representor port for processing. Memory consumption of the representor
>port's rx buffer can grow to several GB when scaling to 1k VFs reps.
>For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
>consumes 3MB of DMA memory for packet buffer in WQEs, and with four
>channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
>ports are for slow path traffic, most of these rx DMA memory are idle.
>
>Add spool_size configuration, allowing multiple representor ports
>to share a rx memory buffer pool. When enabled, individual representor
>doesn't need to allocate its dedicated rx buffer, but just pointing
>its rq to the memory pool. This could make the memory being better
>utilized. The spool_size represents the number of bytes of the memory
>pool. Users can adjust it based on how many reps, total system
>memory, or performance expectation.
>
>An example use case:
>$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
>  spool-size 4096000
>$ devlink dev eswitch show pci/0000:08:00.0
>  pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
>  spool-size 4096000
>
>Disable the shared memory pool by setting spool_size to 0.
>
>Signed-off-by: William Tu <witu@...dia.com>
>---
>v3:
>- change to 1 attributes and rename to spool-size
>
>v2: feedback from Stephen
>- add man page, send to iproute2-next
>---
> devlink/devlink.c            | 25 +++++++++++++++++++++++--
> include/uapi/linux/devlink.h |  1 +
> man/man8/devlink-dev.8       |  6 ++++++
> 3 files changed, 30 insertions(+), 2 deletions(-)
>
>diff --git a/devlink/devlink.c b/devlink/devlink.c
>index dbeb6e397e8e..5ad789caa934 100644
>--- a/devlink/devlink.c
>+++ b/devlink/devlink.c
>@@ -309,6 +309,7 @@ static int ifname_map_update(struct ifname_map *ifname_map, const char *ifname)
> #define DL_OPT_PORT_FN_RATE_TX_PRIORITY	BIT(55)
> #define DL_OPT_PORT_FN_RATE_TX_WEIGHT	BIT(56)
> #define DL_OPT_PORT_FN_CAPS	BIT(57)
>+#define DL_OPT_ESWITCH_SPOOL_SIZE	BIT(58)
> 
> struct dl_opts {
> 	uint64_t present; /* flags of present items */
>@@ -375,6 +376,7 @@ struct dl_opts {
> 	const char *linecard_type;
> 	bool selftests_opt[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
> 	struct nla_bitfield32 port_fn_caps;
>+	uint32_t eswitch_spool_size;
> };
> 
> struct dl {
>@@ -630,6 +632,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
> 	[DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
> 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
> 	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = MNL_TYPE_U8,
>+	[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = MNL_TYPE_U32,
> 	[DEVLINK_ATTR_DPIPE_TABLES] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = MNL_TYPE_STRING,
>@@ -1672,6 +1675,7 @@ static const struct dl_args_metadata dl_args_required[] = {
> 	{DL_OPT_LINECARD,	      "Linecard index expected."},
> 	{DL_OPT_LINECARD_TYPE,	      "Linecard type expected."},
> 	{DL_OPT_SELFTESTS,            "Test name is expected"},
>+	{DL_OPT_ESWITCH_SPOOL_SIZE,   "E-Switch shared memory pool size expected."},
> };
> 
> static int dl_args_finding_required_validate(uint64_t o_required,
>@@ -1895,6 +1899,13 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required,
> 			if (err)
> 				return err;
> 			o_found |= DL_OPT_ESWITCH_ENCAP_MODE;
>+		} else if (dl_argv_match(dl, "spool-size") &&
>+			   (o_all & DL_OPT_ESWITCH_SPOOL_SIZE)) {
>+			dl_arg_inc(dl);
>+			err = dl_argv_uint32_t(dl, &opts->eswitch_spool_size);
>+			if (err)
>+				return err;
>+			o_found |= DL_OPT_ESWITCH_SPOOL_SIZE;
> 		} else if (dl_argv_match(dl, "path") &&
> 			   (o_all & DL_OPT_RESOURCE_PATH)) {
> 			dl_arg_inc(dl);
>@@ -2547,6 +2558,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
> 	if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE)
> 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
> 				opts->eswitch_encap_mode);
>+	if (opts->present & DL_OPT_ESWITCH_SPOOL_SIZE)
>+		mnl_attr_put_u32(nlh, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
>+				 opts->eswitch_spool_size);
> 	if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid)
> 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID,
> 				 opts->resource_id);
>@@ -2707,6 +2721,7 @@ static void cmd_dev_help(void)
> 	pr_err("       devlink dev eswitch set DEV [ mode { legacy | switchdev } ]\n");
> 	pr_err("                               [ inline-mode { none | link | network | transport } ]\n");
> 	pr_err("                               [ encap-mode { none | basic } ]\n");
>+	pr_err("                               [ spool-size { SIZE } ]\n");


SIZE should not be in brackets, it's not an enum selection:
	pr_err("                               [ spool-size SIZE ]\n");
Same in man



> 	pr_err("       devlink dev eswitch show DEV\n");
> 	pr_err("       devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n");
> 	pr_err("       devlink dev param show [DEV name PARAMETER]\n");
>@@ -3194,7 +3209,12 @@ static void pr_out_eswitch(struct dl *dl, struct nlattr **tb)
> 			     eswitch_encap_mode_name(mnl_attr_get_u8(
> 				    tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE])));
> 	}
>-
>+	if (tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
>+		check_indent_newline(dl);
>+		print_uint(PRINT_ANY, "spool-size", "spool-size %u",
>+			   mnl_attr_get_u32(
>+				    tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]));
>+	}
> 	pr_out_handle_end(dl);
> }
> 
>@@ -3239,7 +3259,8 @@ static int cmd_dev_eswitch_set(struct dl *dl)
> 	err = dl_argv_parse(dl, DL_OPT_HANDLE,
> 			    DL_OPT_ESWITCH_MODE |
> 			    DL_OPT_ESWITCH_INLINE_MODE |
>-			    DL_OPT_ESWITCH_ENCAP_MODE);
>+			    DL_OPT_ESWITCH_ENCAP_MODE |
>+			    DL_OPT_ESWITCH_SPOOL_SIZE);
> 	if (err)
> 		return err;
> 
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index e77170199815..c750e29a1c5c 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -614,6 +614,7 @@ enum devlink_attr {
> 
> 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
> 
>+	DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,	/* u32 */
> 	/* add new attributes above here, update the policy in devlink.c */
> 
> 	__DEVLINK_ATTR_MAX,
>diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8
>index e9d091df48d8..081cc8740f8b 100644
>--- a/man/man8/devlink-dev.8
>+++ b/man/man8/devlink-dev.8
>@@ -34,6 +34,8 @@ devlink-dev \- devlink device configuration
> .BR inline-mode " { " none " | " link " | " network " | " transport " } "
> ] [
> .BR encap-mode " { " none " | " basic " } "
>+] [
>+.BR spool-size " { SIZE } "
> ]
> 
> .ti -8
>@@ -151,6 +153,10 @@ Set eswitch encapsulation support
> .I basic
> - Enable encapsulation support
> 
>+.TP
>+.BR spool-size " SIZE"
>+Set the rx shared memory pool size in bytes.
>+
> .SS devlink dev param set  - set new value to devlink device configuration parameter
> 
> .TP
>-- 
>2.38.1
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ