[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251015140140.62273-14-daniel@iogearbox.net>
Date: Wed, 15 Oct 2025 16:01:38 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: netdev@...r.kernel.org
Cc: bpf@...r.kernel.org,
kuba@...nel.org,
davem@...emloft.net,
razor@...ckwall.org,
pabeni@...hat.com,
willemb@...gle.com,
sdf@...ichev.me,
john.fastabend@...il.com,
martin.lau@...nel.org,
jordan@...fe.io,
maciej.fijalkowski@...el.com,
magnus.karlsson@...el.com,
dw@...idwei.uk,
toke@...hat.com,
yangzhenze@...edance.com,
wangdongdong.6@...edance.com
Subject: [PATCH net-next v2 13/15] netkit: Implement rtnl_link_ops->alloc and ndo_queue_create
From: David Wei <dw@...idwei.uk>
Implement rtnl_link_ops->alloc that allows the number of rx queues to be
set when netkit is created. By default, netkit has only a single rxq (and
single txq). The number of queues is deliberately not allowed to be changed
via ethtool -L and is fixed for the lifetime of a netkit instance.
For netkit device creation, numrxqueues with larger than one rxq can be
specified. These rxqs are then mappable to real rxqs in physical netdevs:
ip link add type netkit peer numrxqueues 64 # for device pair
ip link add numrxqueues 64 type netkit single # for single device
The limit of numrxqueues for netkit is currently set to 256, which allows
binding multiple real rxqs from physical netdevs.
The implementation of ndo_queue_create() adds a new rxq during the bind
queue operation. We allow to create queues either in single device mode or
for the case of dual device mode for the netkit peer device which gets
placed into the target network namespace. For dual device mode the bind
against the primary device does not make sense for the targeted use cases,
and therefore gets rejected.
Signed-off-by: David Wei <dw@...idwei.uk>
Co-developed-by: Daniel Borkmann <daniel@...earbox.net>
Signed-off-by: Daniel Borkmann <daniel@...earbox.net>
---
drivers/net/netkit.c | 113 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 105 insertions(+), 8 deletions(-)
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 96734828bfb8..31235aa3379a 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -9,11 +9,19 @@
#include <linux/bpf_mprog.h>
#include <linux/indirect_call_wrapper.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/netkit.h>
#include <net/dst.h>
#include <net/tcx.h>
-#define DRV_NAME "netkit"
+#define NETKIT_DRV_NAME "netkit"
+
+#define NETKIT_NUM_RX_QUEUES_MAX 256
+#define NETKIT_NUM_TX_QUEUES_MAX 1
+
+#define NETKIT_NUM_RX_QUEUES_REAL 1
+#define NETKIT_NUM_TX_QUEUES_REAL 1
struct netkit {
__cacheline_group_begin(netkit_fastpath);
@@ -37,6 +45,8 @@ struct netkit_link {
struct net_device *dev;
};
+static struct rtnl_link_ops netkit_link_ops;
+
static __always_inline int
netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
enum netkit_action ret)
@@ -243,13 +253,99 @@ static const struct net_device_ops netkit_netdev_ops = {
static void netkit_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
+}
+
+static void netkit_get_channels(struct net_device *dev,
+ struct ethtool_channels *channels)
+{
+ channels->max_rx = dev->num_rx_queues;
+ channels->max_tx = dev->num_tx_queues;
+ channels->max_other = 0;
+ channels->max_combined = 1;
+ channels->rx_count = dev->real_num_rx_queues;
+ channels->tx_count = dev->real_num_tx_queues;
+ channels->other_count = 0;
+ channels->combined_count = 0;
}
static const struct ethtool_ops netkit_ethtool_ops = {
.get_drvinfo = netkit_get_drvinfo,
+ .get_channels = netkit_get_channels,
};
+static int netkit_queue_create(struct net_device *dev)
+{
+ struct netkit *nk = netkit_priv(dev);
+ u32 rxq_count_old, rxq_count_new;
+ int err;
+
+ rxq_count_old = dev->real_num_rx_queues;
+ rxq_count_new = rxq_count_old + 1;
+
+ /* Only allow to bind in single device mode or to bind against
+ * the peer device which then ends up in the target netns.
+ */
+ if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary)
+ return -EOPNOTSUPP;
+
+ if (netif_running(dev))
+ netif_carrier_off(dev);
+ err = netif_set_real_num_rx_queues(dev, rxq_count_new);
+ if (netif_running(dev))
+ netif_carrier_on(dev);
+
+ return err ? err : rxq_count_new;
+}
+
+static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
+ .ndo_queue_create = netkit_queue_create,
+};
+
+static struct net_device *netkit_alloc(struct nlattr *tb[],
+ const char *ifname,
+ unsigned char name_assign_type,
+ unsigned int num_tx_queues,
+ unsigned int num_rx_queues)
+{
+ const struct rtnl_link_ops *ops = &netkit_link_ops;
+ struct net_device *dev;
+
+ if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
+ num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = alloc_netdev_mqs(ops->priv_size, ifname,
+ name_assign_type, ops->setup,
+ num_tx_queues, num_rx_queues);
+ if (dev) {
+ dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
+ dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
+ }
+ return dev;
+}
+
+static void netkit_queue_unpeer(struct net_device *dev)
+{
+ struct netdev_rx_queue *src_rxq, *dst_rxq;
+ struct net_device *src_dev;
+ int i;
+
+ if (dev->real_num_rx_queues == 1)
+ return;
+ netdev_lock(dev);
+ for (i = 1; i < dev->real_num_rx_queues; i++) {
+ dst_rxq = __netif_get_rx_queue(dev, i);
+ src_rxq = dst_rxq->peer;
+ src_dev = src_rxq->dev;
+
+ netdev_lock(src_dev);
+ netdev_rx_queue_unpeer(src_dev, src_rxq, dst_rxq);
+ netdev_unlock(src_dev);
+ }
+ netdev_unlock(dev);
+}
+
static void netkit_setup(struct net_device *dev)
{
static const netdev_features_t netkit_features_hw_vlan =
@@ -280,8 +376,9 @@ static void netkit_setup(struct net_device *dev)
dev->priv_flags |= IFF_DISABLE_NETPOLL;
dev->lltx = true;
- dev->ethtool_ops = &netkit_ethtool_ops;
- dev->netdev_ops = &netkit_netdev_ops;
+ dev->netdev_ops = &netkit_netdev_ops;
+ dev->ethtool_ops = &netkit_ethtool_ops;
+ dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
dev->features |= netkit_features;
dev->hw_features = netkit_features;
@@ -330,8 +427,6 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
-static struct rtnl_link_ops netkit_link_ops;
-
static int netkit_new_link(struct net_device *dev,
struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
@@ -865,6 +960,7 @@ static void netkit_release_all(struct net_device *dev)
static void netkit_uninit(struct net_device *dev)
{
netkit_release_all(dev);
+ netkit_queue_unpeer(dev);
}
static void netkit_del_link(struct net_device *dev, struct list_head *head)
@@ -1005,8 +1101,9 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
};
static struct rtnl_link_ops netkit_link_ops = {
- .kind = DRV_NAME,
+ .kind = NETKIT_DRV_NAME,
.priv_size = sizeof(struct netkit),
+ .alloc = netkit_alloc,
.setup = netkit_setup,
.newlink = netkit_new_link,
.dellink = netkit_del_link,
@@ -1042,4 +1139,4 @@ MODULE_DESCRIPTION("BPF-programmable network device");
MODULE_AUTHOR("Daniel Borkmann <daniel@...earbox.net>");
MODULE_AUTHOR("Nikolay Aleksandrov <razor@...ckwall.org>");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
--
2.43.0
Powered by blists - more mailing lists