[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zel8WpMczric0fz3@nanopsycho>
Date: Thu, 7 Mar 2024 09:35:38 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: William Tu <witu@...dia.com>
Cc: netdev@...r.kernel.org, jiri@...dia.com, bodong@...dia.com,
tariqt@...dia.com, yossiku@...dia.com, kuba@...nel.org
Subject: Re: [PATCH RFC v3 net-next 1/2] devlink: Add shared memory pool
eswitch attribute
Thu, Mar 07, 2024 at 12:12:52AM CET, witu@...dia.com wrote:
>Add eswitch attribute spool_size for shared memory pool size.
>
>When using switchdev mode, the representor ports handles the slow path
>traffic, the traffic that can't be offloaded will be redirected to the
>representor port for processing. Memory consumption of the representor
>port's rx buffer can grow to several GB when scaling to 1k VFs reps.
>For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
>consumes 3MB of DMA memory for packet buffer in WQEs, and with four
>channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
>ports are for slow path traffic, most of these rx DMA memory are idle.
>
>Add spool_size configuration, allowing multiple representor ports
>to share a rx memory buffer pool. When enabled, individual representor
>doesn't need to allocate its dedicated rx buffer, but just pointing
>its rq to the memory pool. This could make the memory being better
>utilized. The spool_size represents the number of bytes of the memory
>pool. Users can adjust it based on how many reps, total system
>memory, or performance expectation.
>
>An example use case:
>$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
> spool-size 4096000
>$ devlink dev eswitch show pci/0000:08:00.0
> pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
> spool-size 4096000
>
>Disable the shared memory pool by setting spool_size to 0.
>
>Signed-off-by: William Tu <witu@...dia.com>
>---
>v3: feedback from Jakub
>- introduce 1 attribute instead of 2
>- use spool_size == 0 to disable
>- spool_size as byte unit, not counts
>
>Question about ENODOCS:
>where to add this? the only document about devlink attr is at iproute2
>Do I add a new file Documentation/networking/devlink/devlink-eswitch-attr.rst?
Yeah. Please document the existing attrs as well while you are at it.
>---
> Documentation/netlink/specs/devlink.yaml | 4 ++++
> include/net/devlink.h | 3 +++
> include/uapi/linux/devlink.h | 1 +
> net/devlink/dev.c | 21 +++++++++++++++++++++
> net/devlink/netlink_gen.c | 5 +++--
> 5 files changed, 32 insertions(+), 2 deletions(-)
>
>diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
>index cf6eaa0da821..cb46fa9d6664 100644
>--- a/Documentation/netlink/specs/devlink.yaml
>+++ b/Documentation/netlink/specs/devlink.yaml
>@@ -429,6 +429,9 @@ attribute-sets:
> name: eswitch-encap-mode
> type: u8
> enum: eswitch-encap-mode
>+ -
>+ name: eswitch-spool-size
>+ type: u32
> -
> name: resource-list
> type: nest
>@@ -1555,6 +1558,7 @@ operations:
> - eswitch-mode
> - eswitch-inline-mode
> - eswitch-encap-mode
>+ - eswitch-spool-size
>
> -
> name: eswitch-set
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 9ac394bdfbe4..164c543dd7ca 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -1327,6 +1327,9 @@ struct devlink_ops {
> int (*eswitch_encap_mode_set)(struct devlink *devlink,
> enum devlink_eswitch_encap_mode encap_mode,
> struct netlink_ext_ack *extack);
>+ int (*eswitch_spool_size_get)(struct devlink *devlink, u32 *p_size);
>+ int (*eswitch_spool_size_set)(struct devlink *devlink, u32 size,
>+ struct netlink_ext_ack *extack);
> int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
> struct netlink_ext_ack *extack);
> /**
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index 130cae0d3e20..cbe51be7a08a 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -614,6 +614,7 @@ enum devlink_attr {
>
> DEVLINK_ATTR_REGION_DIRECT, /* flag */
>
>+ DEVLINK_ATTR_ESWITCH_SPOOL_SIZE, /* u32 */
> /* add new attributes above here, update the policy in devlink.c */
While at it, could you please update this comment? It should say:
/* Add new attributes above here, update the spec in
* Documentation/netlink/specs/devlink.yaml and re-generate
* net/devlink/netlink_gen.c. */
As a separate patch please.
>
> __DEVLINK_ATTR_MAX,
>diff --git a/net/devlink/dev.c b/net/devlink/dev.c
>index 19dbf540748a..561874424db7 100644
>--- a/net/devlink/dev.c
>+++ b/net/devlink/dev.c
>@@ -633,6 +633,7 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
> {
> const struct devlink_ops *ops = devlink->ops;
> enum devlink_eswitch_encap_mode encap_mode;
>+ u32 spool_size;
> u8 inline_mode;
> void *hdr;
> int err = 0;
>@@ -674,6 +675,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
> goto nla_put_failure;
> }
>
>+ if (ops->eswitch_spool_size_get) {
>+ err = ops->eswitch_spool_size_get(devlink, &spool_size);
>+ if (err)
>+ goto nla_put_failure;
>+ err = nla_put_u32(msg, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE, spool_size);
>+ if (err)
>+ goto nla_put_failure;
>+ }
>+
> genlmsg_end(msg, hdr);
> return 0;
>
>@@ -708,10 +718,21 @@ int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
> struct devlink *devlink = info->user_ptr[0];
> const struct devlink_ops *ops = devlink->ops;
> enum devlink_eswitch_encap_mode encap_mode;
>+ u32 spool_size;
> u8 inline_mode;
> int err = 0;
> u16 mode;
>
>+ if (info->attrs[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
>+ if (!ops->eswitch_spool_size_set)
Fill up extack msg here please.
>+ return -EOPNOTSUPP;
>+ spool_size = nla_get_u32(info->attrs[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]);
>+ err = ops->eswitch_spool_size_set(devlink, spool_size,
>+ info->extack);
>+ if (err)
>+ return err;
>+ }
>+
> if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
> if (!ops->eswitch_mode_set)
> return -EOPNOTSUPP;
>diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
>index c81cf2dd154f..acbf484b28a2 100644
>--- a/net/devlink/netlink_gen.c
>+++ b/net/devlink/netlink_gen.c
>@@ -194,12 +194,13 @@ static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NA
> };
>
> /* DEVLINK_CMD_ESWITCH_SET - do */
>-static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = {
>+static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE + 1] = {
> [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
> [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
> [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1),
> [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3),
> [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1),
>+ [DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = { .type = NLA_U32, },
> };
>
> /* DEVLINK_CMD_DPIPE_TABLE_GET - do */
>@@ -787,7 +788,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
> .doit = devlink_nl_eswitch_set_doit,
> .post_doit = devlink_nl_post_doit,
> .policy = devlink_eswitch_set_nl_policy,
>- .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
>+ .maxattr = DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
> .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
> },
> {
>--
>2.38.1
>
>
Powered by blists - more mailing lists