[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAM0EoMncgehpwCOxaUUKhOP7V0DyJtbDP9Q5aUkMG2h5dmfQJA@mail.gmail.com>
Date: Tue, 19 Sep 2023 19:20:57 -0400
From: Jamal Hadi Salim <jhs@...atatu.com>
To: Daniel Borkmann <daniel@...earbox.net>
Cc: Victor Nogueira <victor@...atatu.com>, xiyou.wangcong@...il.com, jiri@...nulli.us,
davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
paulb@...dia.com, netdev@...r.kernel.org, kernel@...atatu.com,
martin.lau@...ux.dev, bpf@...r.kernel.org
Subject: Re: [PATCH net-next 1/1] net/sched: Disambiguate verdict from return code
On Tue, Sep 19, 2023 at 6:15 PM Daniel Borkmann <daniel@...earbox.net> wrote:
>
> [ +Martin, bpf ]
>
> On 9/19/23 4:59 PM, Victor Nogueira wrote:
> > Currently there is no way to distinguish between an error and a
> > classification verdict. This patch adds the verdict field as a part of
> > struct tcf_result. That way, tcf_classify can return a proper
> > error number when it fails, and we keep the classification result
> > information encapsulated in struct tcf_result.
> >
> > Also add values SKB_DROP_REASON_TC_EGRESS_ERROR and
> > SKB_DROP_REASON_TC_INGRESS_ERROR to enum skb_drop_reason.
> > With that we can distinguish between a drop from a processing error versus
> > a drop from classification.
> >
> > Signed-off-by: Victor Nogueira <victor@...atatu.com>
> > ---
> > include/net/dropreason-core.h | 6 +++++
> > include/net/sch_generic.h | 7 ++++++
> > net/core/dev.c | 42 ++++++++++++++++++++++++++---------
> > net/sched/cls_api.c | 38 ++++++++++++++++++++-----------
> > net/sched/sch_cake.c | 32 +++++++++++++-------------
> > net/sched/sch_drr.c | 33 +++++++++++++--------------
> > net/sched/sch_ets.c | 6 +++--
> > net/sched/sch_fq_codel.c | 29 ++++++++++++------------
> > net/sched/sch_fq_pie.c | 28 +++++++++++------------
> > net/sched/sch_hfsc.c | 6 +++--
> > net/sched/sch_htb.c | 6 +++--
> > net/sched/sch_multiq.c | 6 +++--
> > net/sched/sch_prio.c | 7 ++++--
> > net/sched/sch_qfq.c | 34 +++++++++++++---------------
> > net/sched/sch_sfb.c | 29 ++++++++++++------------
> > net/sched/sch_sfq.c | 28 +++++++++++------------
> > 16 files changed, 195 insertions(+), 142 deletions(-)
> >
> > diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
> > index a587e83fc169..b1c069c8e7f2 100644
> > --- a/include/net/dropreason-core.h
> > +++ b/include/net/dropreason-core.h
> > @@ -80,6 +80,8 @@
> > FN(IPV6_NDISC_BAD_OPTIONS) \
> > FN(IPV6_NDISC_NS_OTHERHOST) \
> > FN(QUEUE_PURGE) \
> > + FN(TC_EGRESS_ERROR) \
> > + FN(TC_INGRESS_ERROR) \
> > FNe(MAX)
> >
> > /**
> > @@ -345,6 +347,10 @@ enum skb_drop_reason {
> > SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
> > /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
> > SKB_DROP_REASON_QUEUE_PURGE,
> > + /** @SKB_DROP_REASON_TC_EGRESS_ERROR: dropped in TC egress HOOK due to error */
> > + SKB_DROP_REASON_TC_EGRESS_ERROR,
> > + /** @SKB_DROP_REASON_TC_INGRESS_ERROR: dropped in TC ingress HOOK due to error */
> > + SKB_DROP_REASON_TC_INGRESS_ERROR,
> > /**
> > * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
> > * shouldn't be used as a real 'reason' - only for tracing code gen
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index f232512505f8..9a3f71d2545e 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -326,6 +326,7 @@ struct Qdisc_ops {
> >
> >
> > struct tcf_result {
> > + u32 verdict;
> > union {
> > struct {
> > unsigned long class;
> > @@ -336,6 +337,12 @@ struct tcf_result {
> > };
> > };
> >
> > +static inline void tcf_result_set_verdict(struct tcf_result *res,
> > + const u32 verdict)
> > +{
> > + res->verdict = verdict;
> > +}
> > +
> > struct tcf_chain;
> >
> > struct tcf_proto_ops {
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index ccff2b6ef958..1450f4741d9b 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -3910,31 +3910,39 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
> > #endif /* CONFIG_NET_EGRESS */
> >
> > #ifdef CONFIG_NET_XGRESS
> > -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
> > +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
> > + struct tcf_result *res)
> > {
> > - int ret = TC_ACT_UNSPEC;
> > + int ret = 0;
> > #ifdef CONFIG_NET_CLS_ACT
> > struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
> > - struct tcf_result res;
> >
> > - if (!miniq)
> > + if (!miniq) {
> > + tcf_result_set_verdict(res, TC_ACT_UNSPEC);
> > return ret;
> > + }
> >
> > tc_skb_cb(skb)->mru = 0;
> > tc_skb_cb(skb)->post_ct = false;
> >
> > mini_qdisc_bstats_cpu_update(miniq, skb);
> > - ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
> > + ret = tcf_classify(skb, miniq->block, miniq->filter_list, res, false);
> > + if (ret < 0) {
> > + mini_qdisc_qstats_cpu_drop(miniq);
> > + return ret;
> > + }
> > /* Only tcf related quirks below. */
> > - switch (ret) {
> > + switch (res->verdict) {
> > case TC_ACT_SHOT:
> > mini_qdisc_qstats_cpu_drop(miniq);
> > break;
> > case TC_ACT_OK:
> > case TC_ACT_RECLASSIFY:
> > - skb->tc_index = TC_H_MIN(res.classid);
> > + skb->tc_index = TC_H_MIN(res->classid);
> > break;
> > }
> > +#else
> > + tcf_result_set_verdict(res, TC_ACT_UNSPEC);
> > #endif /* CONFIG_NET_CLS_ACT */
> > return ret;
> > }
> > @@ -3977,6 +3985,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
> > struct net_device *orig_dev, bool *another)
> > {
> > struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
> > + struct tcf_result res = {0};
> > int sch_ret;
> >
> > if (!entry)
> > @@ -3994,9 +4003,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
> > if (sch_ret != TC_ACT_UNSPEC)
> > goto ingress_verdict;
> > }
> > - sch_ret = tc_run(tcx_entry(entry), skb);
> > + sch_ret = tc_run(tcx_entry(entry), skb, &res);
> > + if (sch_ret < 0) {
> > + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS_ERROR);
> > + *ret = NET_RX_DROP;
> > + return NULL;
> > + }
> > ingress_verdict:
> > - switch (sch_ret) {
> > + switch (res.verdict) {
>
> This breaks tcx, please move all this logic into tc_run(). No changes to sch_handle_ingress()
> or sch_handle_egress should be necessary, you can then just remap the return code to TC_ACT_SHOT
> in such case.
>
I think it is valuable to have a good reason code like
SKB_DROP_REASON_TC_XXX_ERROR to disambiguate between errors vs
verdicts in the case of tc_run() variant.
For tcx_run(), does this look ok (for consistency)?:
if (static_branch_unlikely(&tcx_needed_key)) {
sch_ret = tcx_run(entry, skb, true);
if (sch_ret != TC_ACT_UNSPEC) {
res.verdict = sch_ret;
goto ingress_verdict;
}
}
cheers,
jamal
> > case TC_ACT_REDIRECT:
> > /* skb_mac_header check was done by BPF, so we can safely
> > * push the L2 header back before redirecting to another
> > @@ -4032,6 +4046,7 @@ static __always_inline struct sk_buff *
> > sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> > {
> > struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
> > + struct tcf_result res = {0};
> > int sch_ret;
> >
> > if (!entry)
> > @@ -4045,9 +4060,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> > if (sch_ret != TC_ACT_UNSPEC)
> > goto egress_verdict;
> > }
> > - sch_ret = tc_run(tcx_entry(entry), skb);
> > + sch_ret = tc_run(tcx_entry(entry), skb, &res);
> > + if (sch_ret < 0) {
> > + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS_ERROR);
> > + *ret = NET_XMIT_DROP;
> > + return NULL;
> > + }
> > egress_verdict:
> > - switch (sch_ret) {
> > + switch (res.verdict) {
> > case TC_ACT_REDIRECT:
> > /* No need to push/pop skb's mac_header here on egress! */
> > skb_do_redirect(skb);
Powered by blists - more mailing lists