lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <beb5e6f3-e2a1-637d-e06d-247b36474e95@iogearbox.net>
Date: Wed, 20 Sep 2023 00:15:13 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: Victor Nogueira <victor@...atatu.com>, jhs@...atatu.com,
 xiyou.wangcong@...il.com, jiri@...nulli.us, davem@...emloft.net,
 edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com, paulb@...dia.com
Cc: netdev@...r.kernel.org, kernel@...atatu.com, martin.lau@...ux.dev,
 bpf@...r.kernel.org
Subject: Re: [PATCH net-next 1/1] net/sched: Disambiguate verdict from return
 code

[ +Martin, bpf ]

On 9/19/23 4:59 PM, Victor Nogueira wrote:
> Currently there is no way to distinguish between an error and a
> classification verdict. This patch adds the verdict field as a part of
> struct tcf_result. That way, tcf_classify can return a proper
> error number when it fails, and we keep the classification result
> information encapsulated in struct tcf_result.
> 
> Also add values SKB_DROP_REASON_TC_EGRESS_ERROR and
> SKB_DROP_REASON_TC_INGRESS_ERROR to enum skb_drop_reason.
> With that we can distinguish between a drop from a processing error versus
> a drop from classification.
> 
> Signed-off-by: Victor Nogueira <victor@...atatu.com>
> ---
>   include/net/dropreason-core.h |  6 +++++
>   include/net/sch_generic.h     |  7 ++++++
>   net/core/dev.c                | 42 ++++++++++++++++++++++++++---------
>   net/sched/cls_api.c           | 38 ++++++++++++++++++++-----------
>   net/sched/sch_cake.c          | 32 +++++++++++++-------------
>   net/sched/sch_drr.c           | 33 +++++++++++++--------------
>   net/sched/sch_ets.c           |  6 +++--
>   net/sched/sch_fq_codel.c      | 29 ++++++++++++------------
>   net/sched/sch_fq_pie.c        | 28 +++++++++++------------
>   net/sched/sch_hfsc.c          |  6 +++--
>   net/sched/sch_htb.c           |  6 +++--
>   net/sched/sch_multiq.c        |  6 +++--
>   net/sched/sch_prio.c          |  7 ++++--
>   net/sched/sch_qfq.c           | 34 +++++++++++++---------------
>   net/sched/sch_sfb.c           | 29 ++++++++++++------------
>   net/sched/sch_sfq.c           | 28 +++++++++++------------
>   16 files changed, 195 insertions(+), 142 deletions(-)
> 
> diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
> index a587e83fc169..b1c069c8e7f2 100644
> --- a/include/net/dropreason-core.h
> +++ b/include/net/dropreason-core.h
> @@ -80,6 +80,8 @@
>   	FN(IPV6_NDISC_BAD_OPTIONS)	\
>   	FN(IPV6_NDISC_NS_OTHERHOST)	\
>   	FN(QUEUE_PURGE)			\
> +	FN(TC_EGRESS_ERROR)		\
> +	FN(TC_INGRESS_ERROR)		\
>   	FNe(MAX)
>   
>   /**
> @@ -345,6 +347,10 @@ enum skb_drop_reason {
>   	SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
>   	/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
>   	SKB_DROP_REASON_QUEUE_PURGE,
> +	/** @SKB_DROP_REASON_TC_EGRESS_ERROR: dropped in TC egress HOOK due to error */
> +	SKB_DROP_REASON_TC_EGRESS_ERROR,
> +	/** @SKB_DROP_REASON_TC_INGRESS_ERROR: dropped in TC ingress HOOK due to error */
> +	SKB_DROP_REASON_TC_INGRESS_ERROR,
>   	/**
>   	 * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
>   	 * shouldn't be used as a real 'reason' - only for tracing code gen
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index f232512505f8..9a3f71d2545e 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -326,6 +326,7 @@ struct Qdisc_ops {
>   
>   
>   struct tcf_result {
> +	u32 verdict;
>   	union {
>   		struct {
>   			unsigned long	class;
> @@ -336,6 +337,12 @@ struct tcf_result {
>   	};
>   };
>   
> +static inline void tcf_result_set_verdict(struct tcf_result *res,
> +					  const u32 verdict)
> +{
> +	res->verdict = verdict;
> +}
> +
>   struct tcf_chain;
>   
>   struct tcf_proto_ops {
> diff --git a/net/core/dev.c b/net/core/dev.c
> index ccff2b6ef958..1450f4741d9b 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3910,31 +3910,39 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
>   #endif /* CONFIG_NET_EGRESS */
>   
>   #ifdef CONFIG_NET_XGRESS
> -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
> +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
> +		  struct tcf_result *res)
>   {
> -	int ret = TC_ACT_UNSPEC;
> +	int ret = 0;
>   #ifdef CONFIG_NET_CLS_ACT
>   	struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
> -	struct tcf_result res;
>   
> -	if (!miniq)
> +	if (!miniq) {
> +		tcf_result_set_verdict(res, TC_ACT_UNSPEC);
>   		return ret;
> +	}
>   
>   	tc_skb_cb(skb)->mru = 0;
>   	tc_skb_cb(skb)->post_ct = false;
>   
>   	mini_qdisc_bstats_cpu_update(miniq, skb);
> -	ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
> +	ret = tcf_classify(skb, miniq->block, miniq->filter_list, res, false);
> +	if (ret < 0) {
> +		mini_qdisc_qstats_cpu_drop(miniq);
> +		return ret;
> +	}
>   	/* Only tcf related quirks below. */
> -	switch (ret) {
> +	switch (res->verdict) {
>   	case TC_ACT_SHOT:
>   		mini_qdisc_qstats_cpu_drop(miniq);
>   		break;
>   	case TC_ACT_OK:
>   	case TC_ACT_RECLASSIFY:
> -		skb->tc_index = TC_H_MIN(res.classid);
> +		skb->tc_index = TC_H_MIN(res->classid);
>   		break;
>   	}
> +#else
> +	tcf_result_set_verdict(res, TC_ACT_UNSPEC);
>   #endif /* CONFIG_NET_CLS_ACT */
>   	return ret;
>   }
> @@ -3977,6 +3985,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
>   		   struct net_device *orig_dev, bool *another)
>   {
>   	struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
> +	struct tcf_result res = {0};
>   	int sch_ret;
>   
>   	if (!entry)
> @@ -3994,9 +4003,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
>   		if (sch_ret != TC_ACT_UNSPEC)
>   			goto ingress_verdict;
>   	}
> -	sch_ret = tc_run(tcx_entry(entry), skb);
> +	sch_ret = tc_run(tcx_entry(entry), skb, &res);
> +	if (sch_ret < 0) {
> +		kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS_ERROR);
> +		*ret = NET_RX_DROP;
> +		return NULL;
> +	}
>   ingress_verdict:
> -	switch (sch_ret) {
> +	switch (res.verdict) {

This breaks tcx, please move all this logic into tc_run(). No changes to sch_handle_ingress()
or sch_handle_egress should be necessary, you can then just remap the return code to TC_ACT_SHOT
in such case.

>   	case TC_ACT_REDIRECT:
>   		/* skb_mac_header check was done by BPF, so we can safely
>   		 * push the L2 header back before redirecting to another
> @@ -4032,6 +4046,7 @@ static __always_inline struct sk_buff *
>   sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
>   {
>   	struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
> +	struct tcf_result res = {0};
>   	int sch_ret;
>   
>   	if (!entry)
> @@ -4045,9 +4060,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
>   		if (sch_ret != TC_ACT_UNSPEC)
>   			goto egress_verdict;
>   	}
> -	sch_ret = tc_run(tcx_entry(entry), skb);
> +	sch_ret = tc_run(tcx_entry(entry), skb, &res);
> +	if (sch_ret < 0) {
> +		kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS_ERROR);
> +		*ret = NET_XMIT_DROP;
> +		return NULL;
> +	}
>   egress_verdict:
> -	switch (sch_ret) {
> +	switch (res.verdict) {
>   	case TC_ACT_REDIRECT:
>   		/* No need to push/pop skb's mac_header here on egress! */
>   		skb_do_redirect(skb);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ