[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f3bfe9a5-40e8-4a1c-a5e5-0f7f24b9e395@linux.dev>
Date: Thu, 25 Jul 2024 18:15:37 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Amery Hung <ameryhung@...il.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, yangpeihao@...u.edu.cn,
daniel@...earbox.net, andrii@...nel.org, alexei.starovoitov@...il.com,
martin.lau@...nel.org, sinquersw@...il.com, toke@...hat.com,
jhs@...atatu.com, jiri@...nulli.us, sdf@...gle.com,
xiyou.wangcong@...il.com, yepeilin.cs@...il.com
Subject: Re: [RFC PATCH v9 07/11] bpf: net_sched: Allow more optional
operators in Qdisc_ops
On 7/14/24 10:51 AM, Amery Hung wrote:
> So far, init, reset, and destroy are implemented by bpf qdisc infra as
> fixed operators that manipulate the watchdog according to the occasion.
> This patch allows users to implement these three operators to perform
> desired work alongside the predefined ones.
>
> Signed-off-by: Amery Hung <amery.hung@...edance.com>
> ---
> include/net/sch_generic.h | 6 ++++++
> net/sched/bpf_qdisc.c | 20 ++++----------------
> net/sched/sch_api.c | 11 +++++++++++
> net/sched/sch_generic.c | 8 ++++++++
> 4 files changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 214ed2e34faa..3041782b7527 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -1359,4 +1359,10 @@ static inline void qdisc_synchronize(const struct Qdisc *q)
> msleep(1);
> }
>
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack);
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch);
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch);
> +#endif
> +
> #endif
> diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
> index eff7559aa346..903b4eb54510 100644
> --- a/net/sched/bpf_qdisc.c
> +++ b/net/sched/bpf_qdisc.c
> @@ -9,9 +9,6 @@
> static struct bpf_struct_ops bpf_Qdisc_ops;
>
> static u32 unsupported_ops[] = {
> - offsetof(struct Qdisc_ops, init),
> - offsetof(struct Qdisc_ops, reset),
> - offsetof(struct Qdisc_ops, destroy),
> offsetof(struct Qdisc_ops, change),
> offsetof(struct Qdisc_ops, attach),
> offsetof(struct Qdisc_ops, change_real_num_tx),
> @@ -36,8 +33,8 @@ static int bpf_qdisc_init(struct btf *btf)
> return 0;
> }
>
> -static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> - struct netlink_ext_ack *extack)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt,
> + struct netlink_ext_ack *extack)
> {
> struct bpf_sched_data *q = qdisc_priv(sch);
>
> @@ -45,14 +42,14 @@ static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> return 0;
> }
>
> -static void bpf_qdisc_reset_op(struct Qdisc *sch)
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch)
> {
> struct bpf_sched_data *q = qdisc_priv(sch);
>
> qdisc_watchdog_cancel(&q->watchdog);
> }
>
> -static void bpf_qdisc_destroy_op(struct Qdisc *sch)
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch)
The reset_post_ops and destroy_post_op are identical. They only do
qdisc_watchdog_cancel().
> {
> struct bpf_sched_data *q = qdisc_priv(sch);
>
> @@ -235,15 +232,6 @@ static int bpf_qdisc_init_member(const struct btf_type *t,
> return -EINVAL;
> qdisc_ops->static_flags = TCQ_F_BPF;
> return 1;
> - case offsetof(struct Qdisc_ops, init):
> - qdisc_ops->init = bpf_qdisc_init_op;
> - return 1;
> - case offsetof(struct Qdisc_ops, reset):
> - qdisc_ops->reset = bpf_qdisc_reset_op;
> - return 1;
> - case offsetof(struct Qdisc_ops, destroy):
> - qdisc_ops->destroy = bpf_qdisc_destroy_op;
> - return 1;
> case offsetof(struct Qdisc_ops, peek):
> if (!uqdisc_ops->peek)
> qdisc_ops->peek = qdisc_peek_dequeued;
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index 5064b6d2d1ec..9fb9375e2793 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1352,6 +1352,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> rcu_assign_pointer(sch->stab, stab);
> }
>
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> + if (sch->flags & TCQ_F_BPF) {
I can see the reason why this patch is needed. It is a few line changes and they
are not in the fast path... still weakly not excited about them but I know it
could be a personal preference.
I think at the very least, instead of adding a new TCQ_F_BPF, let see if the
"owner == BPF_MODULE_OWNER" test can be reused like how it is done in the
bpf_try_module_get().
A rough direction I am spinning...
The pre/post is mainly to initialize and cleanup the "struct bpf_sched_data"
before/after calling the bpf prog.
For the pre (init), there is a ".gen_prologue(...., const struct bpf_prog
*prog)" in the "bpf_verifier_ops". Take a look at the tc_cls_act_prologue().
It calls a BPF_FUNC_skb_pull_data helper. It potentially can call a kfunc
bpf_qdisc_watchdog_cancel. However, the gen_prologue is invoked too late in the
verifier for kfunc calling now. This will need some thoughts and works.
For the post (destroy,reset), there is no "gen_epilogue" now. If
bpf_qdisc_watchdog_schedule() is not allowed to be called in the ".reset" and
".destroy" bpf prog. I think it can be changed to pre also? There is a ".filter"
function in the "struct btf_kfunc_id_set" during the kfunc register.
> + err = bpf_qdisc_init_pre_op(sch, tca[TCA_OPTIONS], extack);
> + if (err != 0)
> + goto err_out4;
> + }
> +#endif
> if (ops->init) {
> err = ops->init(sch, tca[TCA_OPTIONS], extack);
> if (err != 0)
> @@ -1388,6 +1395,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> */
> if (ops->destroy)
> ops->destroy(sch);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> + if (sch->flags & TCQ_F_BPF)
> + bpf_qdisc_destroy_post_op(sch);
> +#endif
> qdisc_put_stab(rtnl_dereference(sch->stab));
> err_out3:
> lockdep_unregister_key(&sch->root_lock_key);
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 76e4a6efd17c..0ac05665c69f 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1033,6 +1033,10 @@ void qdisc_reset(struct Qdisc *qdisc)
>
> if (ops->reset)
> ops->reset(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> + if (qdisc->flags & TCQ_F_BPF)
> + bpf_qdisc_reset_post_op(qdisc);
> +#endif
>
> __skb_queue_purge(&qdisc->gso_skb);
> __skb_queue_purge(&qdisc->skb_bad_txq);
> @@ -1076,6 +1080,10 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>
> if (ops->destroy)
> ops->destroy(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> + if (qdisc->flags & TCQ_F_BPF)
> + bpf_qdisc_destroy_post_op(qdisc);
> +#endif
>
> lockdep_unregister_key(&qdisc->root_lock_key);
> bpf_module_put(ops, ops->owner);
Powered by blists - more mailing lists