lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250919213153.103606-2-daniel@iogearbox.net>
Date: Fri, 19 Sep 2025 23:31:34 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: netdev@...r.kernel.org
Cc: bpf@...r.kernel.org,
	kuba@...nel.org,
	davem@...emloft.net,
	razor@...ckwall.org,
	pabeni@...hat.com,
	willemb@...gle.com,
	sdf@...ichev.me,
	john.fastabend@...il.com,
	martin.lau@...nel.org,
	jordan@...fe.io,
	maciej.fijalkowski@...el.com,
	magnus.karlsson@...el.com,
	David Wei <dw@...idwei.uk>
Subject: [PATCH net-next 01/20] net, ynl: Add bind-queue operation

From: David Wei <dw@...idwei.uk>

Add a ynl netdev family operation called bind-queue that _binds_ an
rxq from a real netdev to a virtual netdev i.e. netkit or veth. This
bound or _mapped_ rxq in the virtual netdev acts as a proxy for the
parent real rxq, and can be used by processes running in a container
to use memory providers (io_uring zero-copy rx or devmem) or AF_XDP.
An early implementation had only driver-specific integration [0],
but in order for other virtual devices to reuse, it makes sense to
have this as a generic API.

src-ifindex and src-queue-id is the real netdev and rxq respectively.
dst-ifindex is the virtual netdev. Note that this op doesn't take
dst-queue-id, because the expectation is that the op will _create_ a
new rxq in the virtual netdev. The virtual netdev must have
real_num_rx_queues less than num_rx_queues at the time of calling
bind-queue.

Signed-off-by: David Wei <dw@...idwei.uk>
Co-developed-by: Daniel Borkmann <daniel@...earbox.net>
Signed-off-by: Daniel Borkmann <daniel@...earbox.net>
Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0]
---
 Documentation/netlink/specs/netdev.yaml | 37 +++++++++++++++++++++++++
 include/uapi/linux/netdev.h             | 11 ++++++++
 net/core/netdev-genl-gen.c              | 14 ++++++++++
 net/core/netdev-genl-gen.h              |  1 +
 net/core/netdev-genl.c                  |  4 +++
 tools/include/uapi/linux/netdev.h       | 11 ++++++++
 6 files changed, 78 insertions(+)

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index e00d3fa1c152..99a430ea8a9a 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -561,6 +561,29 @@ attribute-sets:
         type: u32
         checks:
           min: 1
+  -
+    name: queue-pair
+    attributes:
+      -
+        name: src-ifindex
+        doc: netdev ifindex of the physical device
+        type: u32
+        checks:
+          min: 1
+      -
+        name: src-queue-id
+        doc: netdev queue id of the physical device
+        type: u32
+      -
+        name: dst-ifindex
+        doc: netdev ifindex of the virtual device
+        type: u32
+        checks:
+          min: 1
+      -
+        name: dst-queue-id
+        doc: netdev queue id of the virtual device
+        type: u32
 
 operations:
   list:
@@ -772,6 +795,20 @@ operations:
           attributes:
             - id
 
+    -
+      name: bind-queue
+      doc: Bind a physical netdev queue to a virtual one
+      attribute-set: queue-pair
+      do:
+        request:
+          attributes:
+            - src-ifindex
+            - src-queue-id
+            - dst-ifindex
+        reply:
+          attributes:
+            - dst-queue-id
+
 kernel-family:
   headers: ["net/netdev_netlink.h"]
   sock-priv: struct netdev_nl_sock
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 48eb49aa03d4..05e17765a39d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -210,6 +210,16 @@ enum {
 	NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
 };
 
+enum {
+	NETDEV_A_QUEUE_PAIR_SRC_IFINDEX = 1,
+	NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID,
+	NETDEV_A_QUEUE_PAIR_DST_IFINDEX,
+	NETDEV_A_QUEUE_PAIR_DST_QUEUE_ID,
+
+	__NETDEV_A_QUEUE_PAIR_MAX,
+	NETDEV_A_QUEUE_PAIR_MAX = (__NETDEV_A_QUEUE_PAIR_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
@@ -226,6 +236,7 @@ enum {
 	NETDEV_CMD_BIND_RX,
 	NETDEV_CMD_NAPI_SET,
 	NETDEV_CMD_BIND_TX,
+	NETDEV_CMD_BIND_QUEUE,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index e9a2a6f26cb7..10b2ab4dd500 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -106,6 +106,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1]
 	[NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
 };
 
+/* NETDEV_CMD_BIND_QUEUE - do */
+static const struct nla_policy netdev_bind_queue_nl_policy[NETDEV_A_QUEUE_PAIR_DST_IFINDEX + 1] = {
+	[NETDEV_A_QUEUE_PAIR_SRC_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+	[NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID] = { .type = NLA_U32, },
+	[NETDEV_A_QUEUE_PAIR_DST_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
 /* Ops table for netdev */
 static const struct genl_split_ops netdev_nl_ops[] = {
 	{
@@ -204,6 +211,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
 		.maxattr	= NETDEV_A_DMABUF_FD,
 		.flags		= GENL_CMD_CAP_DO,
 	},
+	{
+		.cmd		= NETDEV_CMD_BIND_QUEUE,
+		.doit		= netdev_nl_bind_queue_doit,
+		.policy		= netdev_bind_queue_nl_policy,
+		.maxattr	= NETDEV_A_QUEUE_PAIR_DST_IFINDEX,
+		.flags		= GENL_CMD_CAP_DO,
+	},
 };
 
 static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index cf3fad74511f..309248fe2b9e 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -35,6 +35,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info);
 
 enum {
 	NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 470fabbeacd9..b0aea27bf84e 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -1120,6 +1120,10 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info)
+{
+}
+
 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
 {
 	INIT_LIST_HEAD(&priv->bindings);
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 48eb49aa03d4..05e17765a39d 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -210,6 +210,16 @@ enum {
 	NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
 };
 
+enum {
+	NETDEV_A_QUEUE_PAIR_SRC_IFINDEX = 1,
+	NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID,
+	NETDEV_A_QUEUE_PAIR_DST_IFINDEX,
+	NETDEV_A_QUEUE_PAIR_DST_QUEUE_ID,
+
+	__NETDEV_A_QUEUE_PAIR_MAX,
+	NETDEV_A_QUEUE_PAIR_MAX = (__NETDEV_A_QUEUE_PAIR_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
@@ -226,6 +236,7 @@ enum {
 	NETDEV_CMD_BIND_RX,
 	NETDEV_CMD_NAPI_SET,
 	NETDEV_CMD_BIND_TX,
+	NETDEV_CMD_BIND_QUEUE,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ