[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZIiGVrHLKQRzMzGg@corigine.com>
Date: Tue, 13 Jun 2023 17:08:06 +0200
From: Simon Horman <simon.horman@...igine.com>
To: Stanislav Fomichev <sdf@...gle.com>
Cc: bpf@...r.kernel.org, ast@...nel.org, daniel@...earbox.net,
andrii@...nel.org, martin.lau@...ux.dev, song@...nel.org,
yhs@...com, john.fastabend@...il.com, kpsingh@...nel.org,
haoluo@...gle.com, jolsa@...nel.org, netdev@...r.kernel.org
Subject: Re: [RFC bpf-next 3/7] bpf: implement devtx hook points
On Mon, Jun 12, 2023 at 10:23:03AM -0700, Stanislav Fomichev wrote:
> devtx is a lightweight set of hooks before and after packet transmission.
> The hook is supposed to work for both skb and xdp paths by exposing
> a light-weight packet wrapper via devtx_frame (header portion + frags).
>
> devtx is implemented as a tracing program which has access to the
> XDP-metadata-like kfuncs. The initial set of kfuncs is implemented
> in the next patch, but the idea is similar to XDP metadata:
> the kfuncs have netdev-specific implementation, but common
> interface. Upon loading, the kfuncs are resolved to direct
> calls against per-netdev implementation. This can be achieved
> by marking devtx-tracing programs as dev-bound (largely
> reusing xdp-dev-bound program infrastructure).
>
> Attachment and detachment is implemented via syscall BPF program
> by calling bpf_devtx_sb_attach (attach to tx-submission)
> or bpf_devtx_cp_attach (attach to tx completion). Right now,
> the attachment does not return a link and doesn't support
> multiple programs. I plan to switch to Daniel's bpf_mprog infra
> once it's available.
>
> Cc: netdev@...r.kernel.org
> Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
...
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22976,11 +22976,13 @@ L: bpf@...r.kernel.org
> S: Supported
> F: drivers/net/ethernet/*/*/*/*/*xdp*
> F: drivers/net/ethernet/*/*/*xdp*
> +F: include/net/devtx.h
> F: include/net/xdp.h
> F: include/net/xdp_priv.h
> F: include/trace/events/xdp.h
> F: kernel/bpf/cpumap.c
> F: kernel/bpf/devmap.c
> +F: net/core/devtx.c
> F: net/core/xdp.c
> F: samples/bpf/xdp*
> F: tools/testing/selftests/bpf/*/*xdp*
Hi Stan,
some feedback from my side.
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 08fbd4622ccf..e08e3fd39dfc 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2238,6 +2238,8 @@ struct net_device {
> unsigned int real_num_rx_queues;
>
> struct bpf_prog __rcu *xdp_prog;
> + struct bpf_prog __rcu *devtx_sb;
> + struct bpf_prog __rcu *devtx_cp;
It would be good to add these new fields to the kernel doc
for struct net_device.
> unsigned long gro_flush_timeout;
> int napi_defer_hard_irqs;
> #define GRO_LEGACY_MAX_SIZE 65536u
> diff --git a/include/net/devtx.h b/include/net/devtx.h
> new file mode 100644
> index 000000000000..7eab66d0ce80
> --- /dev/null
> +++ b/include/net/devtx.h
> @@ -0,0 +1,76 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef __LINUX_NET_DEVTX_H__
> +#define __LINUX_NET_DEVTX_H__
> +
> +#include <linux/jump_label.h>
> +#include <linux/skbuff.h>
> +#include <linux/netdevice.h>
> +#include <net/xdp.h>
> +
> +struct devtx_frame {
> + void *data;
> + u16 len;
> + struct skb_shared_info *sinfo; /* for frags */
> +};
> +
> +#ifdef CONFIG_NET
> +void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx);
> +void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx);
> +bool is_devtx_kfunc(u32 kfunc_id);
> +void devtx_shutdown(struct net_device *netdev);
> +
> +static inline void devtx_frame_from_skb(struct devtx_frame *ctx, struct sk_buff *skb)
> +{
> + ctx->data = skb->data;
> + ctx->len = skb_headlen(skb);
> + ctx->sinfo = skb_shinfo(skb);
> +}
> +
> +static inline void devtx_frame_from_xdp(struct devtx_frame *ctx, struct xdp_frame *xdpf)
> +{
> + ctx->data = xdpf->data;
> + ctx->len = xdpf->len;
> + ctx->sinfo = xdp_frame_has_frags(xdpf) ? xdp_get_shared_info_from_frame(xdpf) : NULL;
> +}
> +
> +DECLARE_STATIC_KEY_FALSE(devtx_enabled);
> +
> +static inline bool devtx_submit_enabled(struct net_device *netdev)
> +{
> + return static_branch_unlikely(&devtx_enabled) &&
> + rcu_access_pointer(netdev->devtx_sb);
> +}
> +
> +static inline bool devtx_complete_enabled(struct net_device *netdev)
> +{
> + return static_branch_unlikely(&devtx_enabled) &&
> + rcu_access_pointer(netdev->devtx_cp);
> +}
> +#else
> +static inline void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx)
> +{
> +}
> +
> +static inline void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx)
> +{
> +}
> +
> +static inline bool is_devtx_kfunc(u32 kfunc_id)
> +{
> + return false;
> +}
> +
> +static inline void devtx_shutdown(struct net_device *netdev)
> +{
> +}
> +
> +static inline void devtx_frame_from_skb(struct devtx_frame *ctx, struct sk_buff *skb)
> +{
> +}
> +
> +static inline void devtx_frame_from_xdp(struct devtx_frame *ctx, struct xdp_frame *xdpf)
> +{
> +}
> +#endif
> +
> +#endif /* __LINUX_NET_DEVTX_H__ */
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 235d81f7e0ed..9cfe96422c80 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -25,6 +25,7 @@
> #include <linux/rhashtable.h>
> #include <linux/rtnetlink.h>
> #include <linux/rwsem.h>
> +#include <net/devtx.h>
>
> /* Protects offdevs, members of bpf_offload_netdev and offload members
> * of all progs.
> @@ -228,6 +229,7 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
> int err;
>
> if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
> + attr->prog_type != BPF_PROG_TYPE_TRACING &&
> attr->prog_type != BPF_PROG_TYPE_XDP)
> return -EINVAL;
>
> @@ -238,6 +240,10 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
> attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
> return -EINVAL;
>
> + if (attr->prog_type == BPF_PROG_TYPE_TRACING &&
> + !is_devtx_kfunc(prog->aux->attach_btf_id))
> + return -EINVAL;
> +
> netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex);
> if (!netdev)
> return -EINVAL;
> diff --git a/net/core/Makefile b/net/core/Makefile
> index 8f367813bc68..c1db05ccfac7 100644
> --- a/net/core/Makefile
> +++ b/net/core/Makefile
> @@ -39,4 +39,5 @@ obj-$(CONFIG_FAILOVER) += failover.o
> obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
> obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
> obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
> +obj-$(CONFIG_BPF_SYSCALL) += devtx.o
> obj-$(CONFIG_OF) += of_net.o
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 3393c2f3dbe8..ef0e65e68024 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -150,6 +150,7 @@
> #include <linux/pm_runtime.h>
> #include <linux/prandom.h>
> #include <linux/once_lite.h>
> +#include <net/devtx.h>
>
> #include "dev.h"
> #include "net-sysfs.h"
> @@ -10875,6 +10876,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
> dev_shutdown(dev);
>
> dev_xdp_uninstall(dev);
> + devtx_shutdown(dev);
> bpf_dev_bound_netdev_unregister(dev);
>
> netdev_offload_xstats_disable_all(dev);
> diff --git a/net/core/devtx.c b/net/core/devtx.c
> new file mode 100644
> index 000000000000..b7cbc26d1c01
> --- /dev/null
> +++ b/net/core/devtx.c
> @@ -0,0 +1,208 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <net/devtx.h>
> +#include <linux/filter.h>
> +
> +DEFINE_STATIC_KEY_FALSE(devtx_enabled);
> +EXPORT_SYMBOL_GPL(devtx_enabled);
> +
> +static void devtx_run(struct net_device *netdev, struct devtx_frame *ctx, struct bpf_prog **pprog)
Is an __rcu annotation appropriate for prog here?
Also elsewhere in this patch.
> +{
> + struct bpf_prog *prog;
> + void *real_ctx[1] = {ctx};
> +
> + prog = rcu_dereference(*pprog);
> + if (likely(prog))
> + bpf_prog_run(prog, real_ctx);
> +}
> +
> +void devtx_submit(struct net_device *netdev, struct devtx_frame *ctx)
> +{
> + rcu_read_lock();
> + devtx_run(netdev, ctx, &netdev->devtx_sb);
> + rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(devtx_submit);
> +
> +void devtx_complete(struct net_device *netdev, struct devtx_frame *ctx)
> +{
> + rcu_read_lock();
> + devtx_run(netdev, ctx, &netdev->devtx_cp);
> + rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(devtx_complete);
> +
> +/**
> + * devtx_sb - Called for every egress netdev packet
As this is a kernel doc, it would be good to document the ctx parameter here.
> + *
> + * Note: this function is never actually called by the kernel and declared
> + * only to allow loading an attaching appropriate tracepoints.
> + */
> +__weak noinline void devtx_sb(struct devtx_frame *ctx)
I guess this is intentional.
But gcc complains that this is neither static nor is a forward
declaration provided. Likewise for devtx_cp()
> +{
> +}
> +
> +/**
> + * devtx_cp - Called upon egress netdev packet completion
Likewise, here too.
> + *
> + * Note: this function is never actually called by the kernel and declared
> + * only to allow loading an attaching appropriate tracepoints.
> + */
> +__weak noinline void devtx_cp(struct devtx_frame *ctx)
> +{
> +}
> +
> +BTF_SET8_START(bpf_devtx_hook_ids)
> +BTF_ID_FLAGS(func, devtx_sb)
> +BTF_ID_FLAGS(func, devtx_cp)
> +BTF_SET8_END(bpf_devtx_hook_ids)
> +
> +static const struct btf_kfunc_id_set bpf_devtx_hook_set = {
> + .owner = THIS_MODULE,
> + .set = &bpf_devtx_hook_ids,
> +};
> +
> +static DEFINE_MUTEX(devtx_attach_lock);
> +
> +static int __bpf_devtx_detach(struct net_device *netdev, struct bpf_prog **pprog)
> +{
As per my prior comment about *prog and __rcu annotations.
I'm genuinely unsure how the usage of **pprog in this function sits with RCU.
> + if (!*pprog)
> + return -EINVAL;
> + bpf_prog_put(*pprog);
> + *pprog = NULL;
> +
> + static_branch_dec(&devtx_enabled);
> + return 0;
> +}
> +
> +static int __bpf_devtx_attach(struct net_device *netdev, int prog_fd,
> + const char *attach_func_name, struct bpf_prog **pprog)
> +{
> + struct bpf_prog *prog;
> + int ret = 0;
> +
> + if (prog_fd < 0)
> + return __bpf_devtx_detach(netdev, pprog);
> +
> + if (*pprog)
> + return -EBUSY;
> +
> + prog = bpf_prog_get(prog_fd);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + if (prog->type != BPF_PROG_TYPE_TRACING ||
> + prog->expected_attach_type != BPF_TRACE_FENTRY ||
> + !bpf_prog_is_dev_bound(prog->aux) ||
> + !bpf_offload_dev_match(prog, netdev) ||
> + strcmp(prog->aux->attach_func_name, attach_func_name)) {
> + bpf_prog_put(prog);
> + return -EINVAL;
> + }
> +
> + *pprog = prog;
> + static_branch_inc(&devtx_enabled);
> +
> + return ret;
> +}
...
Powered by blists - more mailing lists