[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aNFzfbIFkOY1f2bL@mini-arch>
Date: Mon, 22 Sep 2025 09:04:13 -0700
From: Stanislav Fomichev <stfomichev@...il.com>
To: Daniel Borkmann <daniel@...earbox.net>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org, kuba@...nel.org,
davem@...emloft.net, razor@...ckwall.org, pabeni@...hat.com,
willemb@...gle.com, sdf@...ichev.me, john.fastabend@...il.com,
martin.lau@...nel.org, jordan@...fe.io,
maciej.fijalkowski@...el.com, magnus.karlsson@...el.com,
David Wei <dw@...idwei.uk>
Subject: Re: [PATCH net-next 01/20] net, ynl: Add bind-queue operation
On 09/19, Daniel Borkmann wrote:
> From: David Wei <dw@...idwei.uk>
>
> Add a ynl netdev family operation called bind-queue that _binds_ an
> rxq from a real netdev to a virtual netdev i.e. netkit or veth. This
> bound or _mapped_ rxq in the virtual netdev acts as a proxy for the
> parent real rxq, and can be used by processes running in a container
> to use memory providers (io_uring zero-copy rx or devmem) or AF_XDP.
> An early implementation had only driver-specific integration [0],
> but in order for other virtual devices to reuse, it makes sense to
> have this as a generic API.
>
> src-ifindex and src-queue-id is the real netdev and rxq respectively.
> dst-ifindex is the virtual netdev. Note that this op doesn't take
> dst-queue-id, because the expectation is that the op will _create_ a
> new rxq in the virtual netdev. The virtual netdev must have
> real_num_rx_queues less than num_rx_queues at the time of calling
> bind-queue.
>
> Signed-off-by: David Wei <dw@...idwei.uk>
> Co-developed-by: Daniel Borkmann <daniel@...earbox.net>
> Signed-off-by: Daniel Borkmann <daniel@...earbox.net>
> Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0]
> ---
> Documentation/netlink/specs/netdev.yaml | 37 +++++++++++++++++++++++++
> include/uapi/linux/netdev.h | 11 ++++++++
> net/core/netdev-genl-gen.c | 14 ++++++++++
> net/core/netdev-genl-gen.h | 1 +
> net/core/netdev-genl.c | 4 +++
> tools/include/uapi/linux/netdev.h | 11 ++++++++
> 6 files changed, 78 insertions(+)
>
> diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
> index e00d3fa1c152..99a430ea8a9a 100644
> --- a/Documentation/netlink/specs/netdev.yaml
> +++ b/Documentation/netlink/specs/netdev.yaml
> @@ -561,6 +561,29 @@ attribute-sets:
> type: u32
> checks:
> min: 1
> + -
> + name: queue-pair
> + attributes:
> + -
> + name: src-ifindex
> + doc: netdev ifindex of the physical device
> + type: u32
> + checks:
> + min: 1
> + -
> + name: src-queue-id
> + doc: netdev queue id of the physical device
> + type: u32
> + -
> + name: dst-ifindex
> + doc: netdev ifindex of the virtual device
> + type: u32
> + checks:
> + min: 1
> + -
> + name: dst-queue-id
> + doc: netdev queue id of the virtual device
> + type: u32
>
> operations:
> list:
> @@ -772,6 +795,20 @@ operations:
> attributes:
> - id
>
> + -
> + name: bind-queue
> + doc: Bind a physical netdev queue to a virtual one
> + attribute-set: queue-pair
> + do:
> + request:
> + attributes:
> + - src-ifindex
> + - src-queue-id
> + - dst-ifindex
> + reply:
> + attributes:
> + - dst-queue-id
> +
> kernel-family:
> headers: ["net/netdev_netlink.h"]
> sock-priv: struct netdev_nl_sock
> diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
> index 48eb49aa03d4..05e17765a39d 100644
> --- a/include/uapi/linux/netdev.h
> +++ b/include/uapi/linux/netdev.h
> @@ -210,6 +210,16 @@ enum {
> NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
> };
>
> +enum {
> + NETDEV_A_QUEUE_PAIR_SRC_IFINDEX = 1,
> + NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID,
> + NETDEV_A_QUEUE_PAIR_DST_IFINDEX,
> + NETDEV_A_QUEUE_PAIR_DST_QUEUE_ID,
> +
> + __NETDEV_A_QUEUE_PAIR_MAX,
> + NETDEV_A_QUEUE_PAIR_MAX = (__NETDEV_A_QUEUE_PAIR_MAX - 1)
> +};
> +
> enum {
> NETDEV_CMD_DEV_GET = 1,
> NETDEV_CMD_DEV_ADD_NTF,
> @@ -226,6 +236,7 @@ enum {
> NETDEV_CMD_BIND_RX,
> NETDEV_CMD_NAPI_SET,
> NETDEV_CMD_BIND_TX,
> + NETDEV_CMD_BIND_QUEUE,
>
> __NETDEV_CMD_MAX,
> NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
> diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
> index e9a2a6f26cb7..10b2ab4dd500 100644
> --- a/net/core/netdev-genl-gen.c
> +++ b/net/core/netdev-genl-gen.c
> @@ -106,6 +106,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1]
> [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
> };
>
> +/* NETDEV_CMD_BIND_QUEUE - do */
> +static const struct nla_policy netdev_bind_queue_nl_policy[NETDEV_A_QUEUE_PAIR_DST_IFINDEX + 1] = {
> + [NETDEV_A_QUEUE_PAIR_SRC_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
> + [NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID] = { .type = NLA_U32, },
> + [NETDEV_A_QUEUE_PAIR_DST_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
> +};
> +
> /* Ops table for netdev */
> static const struct genl_split_ops netdev_nl_ops[] = {
> {
> @@ -204,6 +211,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
> .maxattr = NETDEV_A_DMABUF_FD,
> .flags = GENL_CMD_CAP_DO,
> },
> + {
> + .cmd = NETDEV_CMD_BIND_QUEUE,
> + .doit = netdev_nl_bind_queue_doit,
> + .policy = netdev_bind_queue_nl_policy,
> + .maxattr = NETDEV_A_QUEUE_PAIR_DST_IFINDEX,
> + .flags = GENL_CMD_CAP_DO,
> + },
> };
>
> static const struct genl_multicast_group netdev_nl_mcgrps[] = {
> diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
> index cf3fad74511f..309248fe2b9e 100644
> --- a/net/core/netdev-genl-gen.h
> +++ b/net/core/netdev-genl-gen.h
> @@ -35,6 +35,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
> int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
> int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
> int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info);
> +int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info);
>
> enum {
> NETDEV_NLGRP_MGMT,
> diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
> index 470fabbeacd9..b0aea27bf84e 100644
> --- a/net/core/netdev-genl.c
> +++ b/net/core/netdev-genl.c
> @@ -1120,6 +1120,10 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
> return err;
> }
>
> +int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info)
> +{
nit: return 'not supported' for now or something similar?
Powered by blists - more mailing lists