lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f3bfe9a5-40e8-4a1c-a5e5-0f7f24b9e395@linux.dev>
Date: Thu, 25 Jul 2024 18:15:37 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Amery Hung <ameryhung@...il.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, yangpeihao@...u.edu.cn,
 daniel@...earbox.net, andrii@...nel.org, alexei.starovoitov@...il.com,
 martin.lau@...nel.org, sinquersw@...il.com, toke@...hat.com,
 jhs@...atatu.com, jiri@...nulli.us, sdf@...gle.com,
 xiyou.wangcong@...il.com, yepeilin.cs@...il.com
Subject: Re: [RFC PATCH v9 07/11] bpf: net_sched: Allow more optional
 operators in Qdisc_ops

On 7/14/24 10:51 AM, Amery Hung wrote:
> So far, init, reset, and destroy are implemented by bpf qdisc infra as
> fixed operators that manipulate the watchdog according to the occasion.
> This patch allows users to implement these three operators to perform
> desired work alongside the predefined ones.
> 
> Signed-off-by: Amery Hung <amery.hung@...edance.com>
> ---
>   include/net/sch_generic.h |  6 ++++++
>   net/sched/bpf_qdisc.c     | 20 ++++----------------
>   net/sched/sch_api.c       | 11 +++++++++++
>   net/sched/sch_generic.c   |  8 ++++++++
>   4 files changed, 29 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 214ed2e34faa..3041782b7527 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -1359,4 +1359,10 @@ static inline void qdisc_synchronize(const struct Qdisc *q)
>   		msleep(1);
>   }
>   
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack);
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch);
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch);
> +#endif
> +
>   #endif
> diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
> index eff7559aa346..903b4eb54510 100644
> --- a/net/sched/bpf_qdisc.c
> +++ b/net/sched/bpf_qdisc.c
> @@ -9,9 +9,6 @@
>   static struct bpf_struct_ops bpf_Qdisc_ops;
>   
>   static u32 unsupported_ops[] = {
> -	offsetof(struct Qdisc_ops, init),
> -	offsetof(struct Qdisc_ops, reset),
> -	offsetof(struct Qdisc_ops, destroy),
>   	offsetof(struct Qdisc_ops, change),
>   	offsetof(struct Qdisc_ops, attach),
>   	offsetof(struct Qdisc_ops, change_real_num_tx),
> @@ -36,8 +33,8 @@ static int bpf_qdisc_init(struct btf *btf)
>   	return 0;
>   }
>   
> -static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> -			     struct netlink_ext_ack *extack)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt,
> +			  struct netlink_ext_ack *extack)
>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
> @@ -45,14 +42,14 @@ static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
>   	return 0;
>   }
>   
> -static void bpf_qdisc_reset_op(struct Qdisc *sch)
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch)
>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
>   	qdisc_watchdog_cancel(&q->watchdog);
>   }
>   
> -static void bpf_qdisc_destroy_op(struct Qdisc *sch)
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch)

The reset_post_ops and destroy_post_op are identical. They only do 
qdisc_watchdog_cancel().

>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
> @@ -235,15 +232,6 @@ static int bpf_qdisc_init_member(const struct btf_type *t,
>   			return -EINVAL;
>   		qdisc_ops->static_flags = TCQ_F_BPF;
>   		return 1;
> -	case offsetof(struct Qdisc_ops, init):
> -		qdisc_ops->init = bpf_qdisc_init_op;
> -		return 1;
> -	case offsetof(struct Qdisc_ops, reset):
> -		qdisc_ops->reset = bpf_qdisc_reset_op;
> -		return 1;
> -	case offsetof(struct Qdisc_ops, destroy):
> -		qdisc_ops->destroy = bpf_qdisc_destroy_op;
> -		return 1;
>   	case offsetof(struct Qdisc_ops, peek):
>   		if (!uqdisc_ops->peek)
>   			qdisc_ops->peek = qdisc_peek_dequeued;
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index 5064b6d2d1ec..9fb9375e2793 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1352,6 +1352,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>   		rcu_assign_pointer(sch->stab, stab);
>   	}
>   
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (sch->flags & TCQ_F_BPF) {

I can see the reason why this patch is needed. It is a few line changes and they 
are not in the fast path... still weakly not excited about them but I know it 
could be a personal preference.

I think at the very least, instead of adding a new TCQ_F_BPF, let see if the 
"owner == BPF_MODULE_OWNER" test can be reused like how it is done in the 
bpf_try_module_get().


A rough direction I am spinning...

The pre/post is mainly to initialize and cleanup the "struct bpf_sched_data" 
before/after calling the bpf prog.

For the pre (init), there is a ".gen_prologue(...., const struct bpf_prog 
*prog)" in the "bpf_verifier_ops". Take a look at the tc_cls_act_prologue().
It calls a BPF_FUNC_skb_pull_data helper. It potentially can call a kfunc 
bpf_qdisc_watchdog_cancel. However, the gen_prologue is invoked too late in the 
verifier for kfunc calling now. This will need some thoughts and works.

For the post (destroy,reset), there is no "gen_epilogue" now. If 
bpf_qdisc_watchdog_schedule() is not allowed to be called in the ".reset" and 
".destroy" bpf prog. I think it can be changed to pre also? There is a ".filter" 
function in the "struct btf_kfunc_id_set" during the kfunc register.

> +		err = bpf_qdisc_init_pre_op(sch, tca[TCA_OPTIONS], extack);
> +		if (err != 0)
> +			goto err_out4;
> +	}
> +#endif
>   	if (ops->init) {
>   		err = ops->init(sch, tca[TCA_OPTIONS], extack);
>   		if (err != 0)
> @@ -1388,6 +1395,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>   	 */
>   	if (ops->destroy)
>   		ops->destroy(sch);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (sch->flags & TCQ_F_BPF)
> +		bpf_qdisc_destroy_post_op(sch);
> +#endif
>   	qdisc_put_stab(rtnl_dereference(sch->stab));
>   err_out3:
>   	lockdep_unregister_key(&sch->root_lock_key);
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 76e4a6efd17c..0ac05665c69f 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1033,6 +1033,10 @@ void qdisc_reset(struct Qdisc *qdisc)
>   
>   	if (ops->reset)
>   		ops->reset(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (qdisc->flags & TCQ_F_BPF)
> +		bpf_qdisc_reset_post_op(qdisc);
> +#endif
>   
>   	__skb_queue_purge(&qdisc->gso_skb);
>   	__skb_queue_purge(&qdisc->skb_bad_txq);
> @@ -1076,6 +1080,10 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>   
>   	if (ops->destroy)
>   		ops->destroy(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (qdisc->flags & TCQ_F_BPF)
> +		bpf_qdisc_destroy_post_op(qdisc);
> +#endif
>   
>   	lockdep_unregister_key(&qdisc->root_lock_key);
>   	bpf_module_put(ops, ops->owner);


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ