[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8e5e26e8-52c7-40a8-bf49-98ac2c330db9@gmail.com>
Date: Wed, 25 Oct 2023 18:18:27 -0700
From: Kui-Feng Lee <sinquersw@...il.com>
To: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: netdev@...r.kernel.org, razor@...ckwall.org, ast@...nel.org,
andrii@...nel.org, john.fastabend@...il.com, sdf@...gle.com,
toke@...nel.org, kuba@...nel.org, andrew@...n.ch,
Toke Høiland-Jørgensen <toke@...hat.com>,
Daniel Borkmann <daniel@...earbox.net>, bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next v4 1/7] netkit, bpf: Add bpf programmable net
device
On 10/25/23 18:15, Kui-Feng Lee wrote:
>
>
> On 10/25/23 15:09, Martin KaFai Lau wrote:
>> On 10/25/23 2:24 PM, Kui-Feng Lee wrote:
>>>
>>>
>>> On 10/24/23 14:48, Daniel Borkmann wrote:
>>>> This work adds a new, minimal BPF-programmable device called "netkit"
>>>> (former PoC code-name "meta") we recently presented at LSF/MM/BPF. The
>>>> core idea is that BPF programs are executed within the drivers xmit
>>>> routine
>>>> and therefore e.g. in case of containers/Pods moving BPF processing
>>>> closer
>>>> to the source.
>>>>
>>>
>>> Sorry for intruding into this discussion! Although it is too late to
>>> mentioned this since this patchset have been v4 already.
>>>
>>> I notice netkit has introduced a new attach type. I wonder if it
>>> possible to implement it as a new struct_ops type.
>>
>> Could your elaborate more about what does this struct_ops type do and
>> how is it different from the SCHED_CLS bpf prog that the netkit is
>> running?
>
> I found the code has been landed.
> Basing on the landed code and
> the patchset of registering bpf struct_ops from modules that I
> am working on, it will looks like what is done in following patch.
> No changes on syscall, uapi and libbpf are required.
>
>
> diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
> index 7e484f9fd3ae..e4eafaf397bf 100644
> --- a/drivers/net/netkit.c
> +++ b/drivers/net/netkit.c
> @@ -20,6 +20,7 @@ struct netkit {
> struct bpf_mprog_entry __rcu *active;
> enum netkit_action policy;
> struct bpf_mprog_bundle bundle;
> + struct hlist_head ops_list;
>
> /* Needed in slow-path */
> enum netkit_mode mode;
> @@ -27,6 +28,13 @@ struct netkit {
> u32 headroom;
> };
>
> +struct netkit_ops {
> + struct hlist_node node;
> + int ifindex;
> +
> + int (*xmit)(struct sk_buff *skb);
> +};
> +
> struct netkit_link {
> struct bpf_link link;
> struct net_device *dev;
> @@ -46,6 +54,22 @@ netkit_run(const struct bpf_mprog_entry *entry,
> struct sk_buff *skb,
> if (ret != NETKIT_NEXT)
> break;
> }
> +
> + return ret;
> +}
> +
> +static __always_inline int
> +netkit_run_st_ops(const struct netkit *nk, struct sk_buff *skb,
> + enum netkit_action ret)
> +{
> + struct netkit_ops *ops;
> +
> + hlist_for_each_entry_rcu(ops, &nk->ops_list, node) {
> + ret = ops->xmit(skb);
> + if (ret != NETKIT_NEXT)
> + break;
> + }
> +
> return ret;
> }
>
> @@ -80,6 +104,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb,
> struct net_device *dev)
> entry = rcu_dereference(nk->active);
> if (entry)
> ret = netkit_run(entry, skb, ret);
> + if (ret == NETKIT_NEXT)
> + ret = netkit_run_st_ops(nk, skb, ret);
> switch (ret) {
> case NETKIT_NEXT:
> case NETKIT_PASS:
> @@ -900,6 +926,78 @@ static const struct nla_policy
> netkit_policy[IFLA_NETKIT_MAX + 1] = {
> .reject_message = "Primary attribute is
> read-only" },
> };
>
> +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
> +
> +static bool bpf_netkit_ops_is_valid_access(int off, int size,
> + enum bpf_access_type type,
> + const struct bpf_prog *prog,
> + struct bpf_insn_access_aux *info)
> +{
> + return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
> +}
> +
> +static const struct bpf_verifier_ops bpf_netkit_verifier_ops = {
> + .is_valid_access = bpf_netkit_ops_is_valid_access,
> +};
> +
> +static int bpf_netkit_ops_reg(void *kdata)
> +{
> + struct netkit_ops *ops = kdata;
> + struct netkit_link *nkl;
> + struct net_device *dev;
> +
> + BTF_STRUCT_OPS_TYPE_EMIT(netkit_ops);
> + dev = netkit_dev_fetch(current->nsproxy->net_ns,
> + ops->ifindex,
> + BPF_NETKIT_PRIMARY);
> + nkl = netkit_link(dev);
> + hlist_add_tail_rcu(&ops->node, &nkl->ops_list);
> +
> + return 0;
> +}
> +
> +static int bpf_netkit_ops_init(struct btf *btf)
> +{
> + return 0;
> +}
> +
> +static int bpf_netkit_ops_init_member(const struct btf_type *t,
> + const struct btf_member *member,
> + void *kdata, const void *udata)
> +{
> + struct netkit_ops *kops = kdata;
> + struct netkit_ops *uops = kdata;
> +
> + u32 moff = __btf_member_bit_offset(t, member) / 8;
> + if (moff == offsetof(struct netkit_ops, ifindex)) {
> + kops->ifindex = uops->ifindex;
> + return 1;
> + }
> + if (mod < offsetof(struct netkit_ops, ifindex))
> + return 1;
> +
> + return 0;
> +}
> +
> +static void bpf_netkit_ops_unreg(void *kdata)
> +{
> + struct netkit_ops *ops = kdata;
> +
> + hlist_del_rcu(&ops->node);
> +}
> +
> +struct bpf_struct_ops bpf_netkit_ops = {
> + .verifier_ops = &bpf_netkit_verifier_ops,
> + .init = bpf_netkit_ops_init,
> + .init_member = bpf_netkit_ops_init_member,
> + .reg = bpf_netkit_ops_reg,
> + .unreg = bpf_netki_ops_unreg,
> + .name = "netkit_ops",
> + .owner = THIS_MODULE,
> +};
> +
> +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
> +
> static struct rtnl_link_ops netkit_link_ops = {
> .kind = DRV_NAME,
> .priv_size = sizeof(struct netkit),
> @@ -917,17 +1015,22 @@ static struct rtnl_link_ops netkit_link_ops = {
>
> static __init int netkit_init(void)
> {
> + int ret;
> +
> BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
> (int)NETKIT_PASS != (int)TCX_PASS ||
> (int)NETKIT_DROP != (int)TCX_DROP ||
> (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
>
> - return rtnl_link_register(&netkit_link_ops);
> + ret = rtnl_link_register(&netkit_link_ops);
> +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
> + ret = ret ?: register_bpf_struct_ops(&bpf_netkit_ops);
> +#endif
> }
>
> static __exit void netkit_exit(void)
> {
> - rtnl_link_unregister(&netkit_link_ops);
> + rtnl_link_unregister(&bpf_netkit_ops);
This change should be removed.
> }
>
> module_init(netkit_init);
Powered by blists - more mailing lists