lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMB2axNNmoGAE8DBULe8Pjd3jtc=Tt4xKCyamPwqtB8fT5j75A@mail.gmail.com>
Date: Fri, 26 Jul 2024 15:30:50 -0700
From: Amery Hung <ameryhung@...il.com>
To: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, yangpeihao@...u.edu.cn, 
	daniel@...earbox.net, andrii@...nel.org, alexei.starovoitov@...il.com, 
	martin.lau@...nel.org, sinquersw@...il.com, toke@...hat.com, jhs@...atatu.com, 
	jiri@...nulli.us, sdf@...gle.com, xiyou.wangcong@...il.com, 
	yepeilin.cs@...il.com
Subject: Re: [RFC PATCH v9 07/11] bpf: net_sched: Allow more optional
 operators in Qdisc_ops

On Thu, Jul 25, 2024 at 6:15 PM Martin KaFai Lau <martin.lau@...ux.dev> wrote:
>
> On 7/14/24 10:51 AM, Amery Hung wrote:
> > So far, init, reset, and destroy are implemented by bpf qdisc infra as
> > fixed operators that manipulate the watchdog according to the occasion.
> > This patch allows users to implement these three operators to perform
> > desired work alongside the predefined ones.
> >
> > Signed-off-by: Amery Hung <amery.hung@...edance.com>
> > ---
> >   include/net/sch_generic.h |  6 ++++++
> >   net/sched/bpf_qdisc.c     | 20 ++++----------------
> >   net/sched/sch_api.c       | 11 +++++++++++
> >   net/sched/sch_generic.c   |  8 ++++++++
> >   4 files changed, 29 insertions(+), 16 deletions(-)
> >
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index 214ed2e34faa..3041782b7527 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -1359,4 +1359,10 @@ static inline void qdisc_synchronize(const struct Qdisc *q)
> >               msleep(1);
> >   }
> >
> > +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> > +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack);
> > +void bpf_qdisc_destroy_post_op(struct Qdisc *sch);
> > +void bpf_qdisc_reset_post_op(struct Qdisc *sch);
> > +#endif
> > +
> >   #endif
> > diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
> > index eff7559aa346..903b4eb54510 100644
> > --- a/net/sched/bpf_qdisc.c
> > +++ b/net/sched/bpf_qdisc.c
> > @@ -9,9 +9,6 @@
> >   static struct bpf_struct_ops bpf_Qdisc_ops;
> >
> >   static u32 unsupported_ops[] = {
> > -     offsetof(struct Qdisc_ops, init),
> > -     offsetof(struct Qdisc_ops, reset),
> > -     offsetof(struct Qdisc_ops, destroy),
> >       offsetof(struct Qdisc_ops, change),
> >       offsetof(struct Qdisc_ops, attach),
> >       offsetof(struct Qdisc_ops, change_real_num_tx),
> > @@ -36,8 +33,8 @@ static int bpf_qdisc_init(struct btf *btf)
> >       return 0;
> >   }
> >
> > -static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> > -                          struct netlink_ext_ack *extack)
> > +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt,
> > +                       struct netlink_ext_ack *extack)
> >   {
> >       struct bpf_sched_data *q = qdisc_priv(sch);
> >
> > @@ -45,14 +42,14 @@ static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> >       return 0;
> >   }
> >
> > -static void bpf_qdisc_reset_op(struct Qdisc *sch)
> > +void bpf_qdisc_reset_post_op(struct Qdisc *sch)
> >   {
> >       struct bpf_sched_data *q = qdisc_priv(sch);
> >
> >       qdisc_watchdog_cancel(&q->watchdog);
> >   }
> >
> > -static void bpf_qdisc_destroy_op(struct Qdisc *sch)
> > +void bpf_qdisc_destroy_post_op(struct Qdisc *sch)
>
> The reset_post_ops and destroy_post_op are identical. They only do
> qdisc_watchdog_cancel().
>
> >   {
> >       struct bpf_sched_data *q = qdisc_priv(sch);
> >
> > @@ -235,15 +232,6 @@ static int bpf_qdisc_init_member(const struct btf_type *t,
> >                       return -EINVAL;
> >               qdisc_ops->static_flags = TCQ_F_BPF;
> >               return 1;
> > -     case offsetof(struct Qdisc_ops, init):
> > -             qdisc_ops->init = bpf_qdisc_init_op;
> > -             return 1;
> > -     case offsetof(struct Qdisc_ops, reset):
> > -             qdisc_ops->reset = bpf_qdisc_reset_op;
> > -             return 1;
> > -     case offsetof(struct Qdisc_ops, destroy):
> > -             qdisc_ops->destroy = bpf_qdisc_destroy_op;
> > -             return 1;
> >       case offsetof(struct Qdisc_ops, peek):
> >               if (!uqdisc_ops->peek)
> >                       qdisc_ops->peek = qdisc_peek_dequeued;
> > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > index 5064b6d2d1ec..9fb9375e2793 100644
> > --- a/net/sched/sch_api.c
> > +++ b/net/sched/sch_api.c
> > @@ -1352,6 +1352,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >               rcu_assign_pointer(sch->stab, stab);
> >       }
> >
> > +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> > +     if (sch->flags & TCQ_F_BPF) {
>
> I can see the reason why this patch is needed. It is a few line changes and they
> are not in the fast path... still weakly not excited about them but I know it
> could be a personal preference.
>
> I think at the very least, instead of adding a new TCQ_F_BPF, let see if the
> "owner == BPF_MODULE_OWNER" test can be reused like how it is done in the
> bpf_try_module_get().
>

Thanks for the suggestion. Will do.

>
> A rough direction I am spinning...
>
> The pre/post is mainly to initialize and cleanup the "struct bpf_sched_data"
> before/after calling the bpf prog.
>
> For the pre (init), there is a ".gen_prologue(...., const struct bpf_prog
> *prog)" in the "bpf_verifier_ops". Take a look at the tc_cls_act_prologue().
> It calls a BPF_FUNC_skb_pull_data helper. It potentially can call a kfunc
> bpf_qdisc_watchdog_cancel. However, the gen_prologue is invoked too late in the
> verifier for kfunc calling now. This will need some thoughts and works.
>
> For the post (destroy,reset), there is no "gen_epilogue" now. If
> bpf_qdisc_watchdog_schedule() is not allowed to be called in the ".reset" and
> ".destroy" bpf prog. I think it can be changed to pre also? There is a ".filter"
> function in the "struct btf_kfunc_id_set" during the kfunc register.
>

I can see how that would work. The ability to add prologue, epilogue
to struct_ops operators is one thing on my wish list.

Meanwhile, I am not sure whether that should be written in the kernel
or rewritten by the verifier. An argument for keeping it in the kernel
is that the prologue or epilogue can get quite complex and involves
many kernel structures not exposed to the bpf program (pre-defined ops
in Qdisc_ops in v8).

Maybe we can keep the current approach in the initial version as they
are not in the fast path, and then move to (gen_prologue,
gen_epilogue) once the plumbing is done?

> > +             err = bpf_qdisc_init_pre_op(sch, tca[TCA_OPTIONS], extack);
> > +             if (err != 0)
> > +                     goto err_out4;
> > +     }
> > +#endif
> >       if (ops->init) {
> >               err = ops->init(sch, tca[TCA_OPTIONS], extack);
> >               if (err != 0)
> > @@ -1388,6 +1395,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >        */
> >       if (ops->destroy)
> >               ops->destroy(sch);
> > +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> > +     if (sch->flags & TCQ_F_BPF)
> > +             bpf_qdisc_destroy_post_op(sch);
> > +#endif
> >       qdisc_put_stab(rtnl_dereference(sch->stab));
> >   err_out3:
> >       lockdep_unregister_key(&sch->root_lock_key);
> > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> > index 76e4a6efd17c..0ac05665c69f 100644
> > --- a/net/sched/sch_generic.c
> > +++ b/net/sched/sch_generic.c
> > @@ -1033,6 +1033,10 @@ void qdisc_reset(struct Qdisc *qdisc)
> >
> >       if (ops->reset)
> >               ops->reset(qdisc);
> > +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> > +     if (qdisc->flags & TCQ_F_BPF)
> > +             bpf_qdisc_reset_post_op(qdisc);
> > +#endif
> >
> >       __skb_queue_purge(&qdisc->gso_skb);
> >       __skb_queue_purge(&qdisc->skb_bad_txq);
> > @@ -1076,6 +1080,10 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >
> >       if (ops->destroy)
> >               ops->destroy(qdisc);
> > +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> > +     if (qdisc->flags & TCQ_F_BPF)
> > +             bpf_qdisc_destroy_post_op(qdisc);
> > +#endif
> >
> >       lockdep_unregister_key(&qdisc->root_lock_key);
> >       bpf_module_put(ops, ops->owner);
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ