lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 12 Jun 2023 10:23:03 -0700
From: Stanislav Fomichev <sdf@...gle.com>
To: bpf@...r.kernel.org
Cc: ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, 
	martin.lau@...ux.dev, song@...nel.org, yhs@...com, john.fastabend@...il.com, 
	kpsingh@...nel.org, sdf@...gle.com, haoluo@...gle.com, jolsa@...nel.org, 
	netdev@...r.kernel.org
Subject: [RFC bpf-next 3/7] bpf: implement devtx hook points

devtx is a lightweight set of hooks before and after packet transmission.
The hook is supposed to work for both skb and xdp paths by exposing
a light-weight packet wrapper via devtx_frame (header portion + frags).

devtx is implemented as a tracing program which has access to the
XDP-metadata-like kfuncs. The initial set of kfuncs is implemented
in the next patch, but the idea is similar to XDP metadata:
the kfuncs have netdev-specific implementation, but common
interface. Upon loading, the kfuncs are resolved to direct
calls against per-netdev implementation. This can be achieved
by marking devtx-tracing programs as dev-bound (largely
reusing xdp-dev-bound program infrastructure).

Attachment and detachment is implemented via syscall BPF program
by calling bpf_devtx_sb_attach (attach to tx-submission)
or bpf_devtx_cp_attach (attach to tx completion). Right now,
the attachment does not return a link and doesn't support
multiple programs. I plan to switch to Daniel's bpf_mprog infra
once it's available.

Cc: netdev@...r.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
---
 MAINTAINERS               |   2 +
 include/linux/netdevice.h |   2 +
 include/net/devtx.h       |  76 ++++++++++++++
 kernel/bpf/offload.c      |   6 ++
 net/core/Makefile         |   1 +
 net/core/dev.c            |   2 +
 net/core/devtx.c          | 208 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 297 insertions(+)
 create mode 100644 include/net/devtx.h
 create mode 100644 net/core/devtx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c904dba1733b..516529b42e66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22976,11 +22976,13 @@ L:	bpf@...r.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/*/*/*/*/*xdp*
 F:	drivers/net/ethernet/*/*/*xdp*
+F:	include/net/devtx.h
 F:	include/net/xdp.h
 F:	include/net/xdp_priv.h
 F:	include/trace/events/xdp.h
 F:	kernel/bpf/cpumap.c
 F:	kernel/bpf/devmap.c
+F:	net/core/devtx.c
 F:	net/core/xdp.c
 F:	samples/bpf/xdp*
 F:	tools/testing/selftests/bpf/*/*xdp*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08fbd4622ccf..e08e3fd39dfc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2238,6 +2238,8 @@ struct net_device {
 	unsigned int		real_num_rx_queues;
 
 	struct bpf_prog __rcu	*xdp_prog;
+	struct bpf_prog	__rcu	*devtx_sb;
+	struct bpf_prog	__rcu	*devtx_cp;
 	unsigned long		gro_flush_timeout;
 	int			napi_defer_hard_irqs;
 #define GRO_LEGACY_MAX_SIZE	65536u
diff --git a/include/net/devtx.h b/include/net/devtx.h
new file mode 100644
index 000000000000..7eab66d0ce80
--- /dev/null
+++ b/include/net/devtx.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __LINUX_NET_DEVTX_H__
+#define __LINUX_NET_DEVTX_H__
+
+#include <linux/jump_label.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/xdp.h>
+
+struct devtx_frame {
+	void *data;
+	u16 len;
+	struct skb_shared_info *sinfo; /* for frags */
+};
+
+#ifdef CONFIG_NET
+void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx);
+void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx);
+bool is_devtx_kfunc(u32 kfunc_id);
+void devtx_shutdown(struct net_device *netdev);
+
+static inline void devtx_frame_from_skb(struct devtx_frame *ctx, struct sk_buff *skb)
+{
+	ctx->data = skb->data;
+	ctx->len = skb_headlen(skb);
+	ctx->sinfo = skb_shinfo(skb);
+}
+
+static inline void devtx_frame_from_xdp(struct devtx_frame *ctx, struct xdp_frame *xdpf)
+{
+	ctx->data = xdpf->data;
+	ctx->len = xdpf->len;
+	ctx->sinfo = xdp_frame_has_frags(xdpf) ? xdp_get_shared_info_from_frame(xdpf) : NULL;
+}
+
+DECLARE_STATIC_KEY_FALSE(devtx_enabled);
+
+static inline bool devtx_submit_enabled(struct net_device *netdev)
+{
+	return static_branch_unlikely(&devtx_enabled) &&
+	       rcu_access_pointer(netdev->devtx_sb);
+}
+
+static inline bool devtx_complete_enabled(struct net_device *netdev)
+{
+	return static_branch_unlikely(&devtx_enabled) &&
+	       rcu_access_pointer(netdev->devtx_cp);
+}
+#else
+static inline void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx)
+{
+}
+
+static inline void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx)
+{
+}
+
+static inline bool is_devtx_kfunc(u32 kfunc_id)
+{
+	return false;
+}
+
+static inline void devtx_shutdown(struct net_device *netdev)
+{
+}
+
+static inline void devtx_frame_from_skb(struct devtx_frame *ctx, struct sk_buff *skb)
+{
+}
+
+static inline void devtx_frame_from_xdp(struct devtx_frame *ctx, struct xdp_frame *xdpf)
+{
+}
+#endif
+
+#endif /* __LINUX_NET_DEVTX_H__ */
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 235d81f7e0ed..9cfe96422c80 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -25,6 +25,7 @@
 #include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
+#include <net/devtx.h>
 
 /* Protects offdevs, members of bpf_offload_netdev and offload members
  * of all progs.
@@ -228,6 +229,7 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
 	int err;
 
 	if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
+	    attr->prog_type != BPF_PROG_TYPE_TRACING &&
 	    attr->prog_type != BPF_PROG_TYPE_XDP)
 		return -EINVAL;
 
@@ -238,6 +240,10 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
 	    attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
 		return -EINVAL;
 
+	if (attr->prog_type == BPF_PROG_TYPE_TRACING &&
+	    !is_devtx_kfunc(prog->aux->attach_btf_id))
+		return -EINVAL;
+
 	netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex);
 	if (!netdev)
 		return -EINVAL;
diff --git a/net/core/Makefile b/net/core/Makefile
index 8f367813bc68..c1db05ccfac7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -39,4 +39,5 @@ obj-$(CONFIG_FAILOVER) += failover.o
 obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += devtx.o
 obj-$(CONFIG_OF)	+= of_net.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 3393c2f3dbe8..ef0e65e68024 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,6 +150,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
+#include <net/devtx.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -10875,6 +10876,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
 		dev_shutdown(dev);
 
 		dev_xdp_uninstall(dev);
+		devtx_shutdown(dev);
 		bpf_dev_bound_netdev_unregister(dev);
 
 		netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/devtx.c b/net/core/devtx.c
new file mode 100644
index 000000000000..b7cbc26d1c01
--- /dev/null
+++ b/net/core/devtx.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <net/devtx.h>
+#include <linux/filter.h>
+
+DEFINE_STATIC_KEY_FALSE(devtx_enabled);
+EXPORT_SYMBOL_GPL(devtx_enabled);
+
+static void devtx_run(struct net_device *netdev, struct devtx_frame *ctx, struct bpf_prog **pprog)
+{
+	struct bpf_prog *prog;
+	void *real_ctx[1] = {ctx};
+
+	prog = rcu_dereference(*pprog);
+	if (likely(prog))
+		bpf_prog_run(prog, real_ctx);
+}
+
+void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx)
+{
+	rcu_read_lock();
+	devtx_run(netdev, ctx, &netdev->devtx_sb);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(devtx_submit);
+
+void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx)
+{
+	rcu_read_lock();
+	devtx_run(netdev, ctx, &netdev->devtx_cp);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(devtx_complete);
+
+/**
+ * devtx_sb - Called for every egress netdev packet
+ *
+ * Note: this function is never actually called by the kernel and declared
+ * only to allow loading an attaching appropriate tracepoints.
+ */
+__weak noinline void devtx_sb(struct devtx_frame *ctx)
+{
+}
+
+/**
+ * devtx_cp - Called upon egress netdev packet completion
+ *
+ * Note: this function is never actually called by the kernel and declared
+ * only to allow loading an attaching appropriate tracepoints.
+ */
+__weak noinline void devtx_cp(struct devtx_frame *ctx)
+{
+}
+
+BTF_SET8_START(bpf_devtx_hook_ids)
+BTF_ID_FLAGS(func, devtx_sb)
+BTF_ID_FLAGS(func, devtx_cp)
+BTF_SET8_END(bpf_devtx_hook_ids)
+
+static const struct btf_kfunc_id_set bpf_devtx_hook_set = {
+	.owner = THIS_MODULE,
+	.set   = &bpf_devtx_hook_ids,
+};
+
+static DEFINE_MUTEX(devtx_attach_lock);
+
+static int __bpf_devtx_detach(struct net_device *netdev, struct bpf_prog **pprog)
+{
+	if (!*pprog)
+		return -EINVAL;
+	bpf_prog_put(*pprog);
+	*pprog = NULL;
+
+	static_branch_dec(&devtx_enabled);
+	return 0;
+}
+
+static int __bpf_devtx_attach(struct net_device *netdev, int prog_fd,
+			      const char *attach_func_name, struct bpf_prog **pprog)
+{
+	struct bpf_prog *prog;
+	int ret = 0;
+
+	if (prog_fd < 0)
+		return __bpf_devtx_detach(netdev, pprog);
+
+	if (*pprog)
+		return -EBUSY;
+
+	prog = bpf_prog_get(prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (prog->type != BPF_PROG_TYPE_TRACING ||
+	    prog->expected_attach_type != BPF_TRACE_FENTRY ||
+	    !bpf_prog_is_dev_bound(prog->aux) ||
+	    !bpf_offload_dev_match(prog, netdev) ||
+	    strcmp(prog->aux->attach_func_name, attach_func_name)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	*pprog = prog;
+	static_branch_inc(&devtx_enabled);
+
+	return ret;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+/**
+ * bpf_devtx_sb_attach - Attach devtx 'packet submit' program
+ * @ifindex: netdev interface index.
+ * @prog_fd: BPF program file descriptor.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_devtx_sb_attach(int ifindex, int prog_fd)
+{
+	struct net_device *netdev;
+	int ret;
+
+	netdev = dev_get_by_index(current->nsproxy->net_ns, ifindex);
+	if (!netdev)
+		return -EINVAL;
+
+	mutex_lock(&devtx_attach_lock);
+	ret = __bpf_devtx_attach(netdev, prog_fd, "devtx_sb", &netdev->devtx_sb);
+	mutex_unlock(&devtx_attach_lock);
+
+	dev_put(netdev);
+
+	return ret;
+}
+
+/**
+ * bpf_devtx_cp_attach - Attach devtx 'packet complete' program
+ * @ifindex: netdev interface index.
+ * @prog_fd: BPF program file descriptor.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_devtx_cp_attach(int ifindex, int prog_fd)
+{
+	struct net_device *netdev;
+	int ret;
+
+	netdev = dev_get_by_index(current->nsproxy->net_ns, ifindex);
+	if (!netdev)
+		return -EINVAL;
+
+	mutex_lock(&devtx_attach_lock);
+	ret = __bpf_devtx_attach(netdev, prog_fd, "devtx_cp", &netdev->devtx_cp);
+	mutex_unlock(&devtx_attach_lock);
+
+	dev_put(netdev);
+
+	return ret;
+}
+
+__diag_pop();
+
+bool is_devtx_kfunc(u32 kfunc_id)
+{
+	return !!btf_id_set8_contains(&bpf_devtx_hook_ids, kfunc_id);
+}
+
+void devtx_shutdown(struct net_device *netdev)
+{
+	mutex_lock(&devtx_attach_lock);
+	__bpf_devtx_detach(netdev, &netdev->devtx_sb);
+	__bpf_devtx_detach(netdev, &netdev->devtx_cp);
+	mutex_unlock(&devtx_attach_lock);
+}
+
+BTF_SET8_START(bpf_devtx_syscall_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_devtx_sb_attach)
+BTF_ID_FLAGS(func, bpf_devtx_cp_attach)
+BTF_SET8_END(bpf_devtx_syscall_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_devtx_syscall_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &bpf_devtx_syscall_kfunc_ids,
+};
+
+static int __init devtx_init(void)
+{
+	int ret;
+
+	ret = register_btf_fmodret_id_set(&bpf_devtx_hook_set);
+	if (ret) {
+		pr_warn("failed to register devtx hooks: %d", ret);
+		return ret;
+	}
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_devtx_syscall_kfunc_set);
+	if (ret) {
+		pr_warn("failed to register syscall kfuncs: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+late_initcall(devtx_init);
-- 
2.41.0.162.gfafddb0af9-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ