[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAM0EoMnK1eTJaFAYaKKO=nDmzP0JSSkv3pYJtMq274iYYp1kJw@mail.gmail.com>
Date: Fri, 15 Dec 2023 15:58:38 -0500
From: Jamal Hadi Salim <jhs@...atatu.com>
To: Jiri Pirko <jiri@...nulli.us>
Cc: Jamal Hadi Salim <hadi@...atatu.com>, Victor Nogueira <victor@...atatu.com>, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
xiyou.wangcong@...il.com, mleitner@...hat.com, vladbu@...dia.com,
paulb@...dia.com, pctammela@...atatu.com, netdev@...r.kernel.org,
kernel@...atatu.com
Subject: Re: [PATCH net-next v7 1/3] net/sched: Introduce tc block netdev
tracking infra
On Fri, Dec 15, 2023 at 1:12 PM Jiri Pirko <jiri@...nulli.us> wrote:
>
> Fri, Dec 15, 2023 at 06:17:27PM CET, hadi@...atatu.com wrote:
> >On Fri, Dec 15, 2023 at 10:52 AM Jiri Pirko <jiri@...nulli.us> wrote:
> >>
> >> Fri, Dec 15, 2023 at 03:35:01PM CET, hadi@...atatu.com wrote:
> >> >On Fri, Dec 15, 2023 at 8:31 AM Jiri Pirko <jiri@...nulli.us> wrote:
> >> >>
> >> >> Fri, Dec 15, 2023 at 12:10:48PM CET, victor@...atatu.com wrote:
> >> >> >This commit makes tc blocks track which ports have been added to them.
> >> >> >And, with that, we'll be able to use this new information to send
> >> >> >packets to the block's ports. Which will be done in the patch #3 of this
> >> >> >series.
> >> >> >
> >> >> >Suggested-by: Jiri Pirko <jiri@...dia.com>
> >> >> >Co-developed-by: Jamal Hadi Salim <jhs@...atatu.com>
> >> >> >Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
> >> >> >Co-developed-by: Pedro Tammela <pctammela@...atatu.com>
> >> >> >Signed-off-by: Pedro Tammela <pctammela@...atatu.com>
> >> >> >Signed-off-by: Victor Nogueira <victor@...atatu.com>
> >> >> >---
> >> >> > include/net/sch_generic.h | 4 +++
> >> >> > net/sched/cls_api.c | 2 ++
> >> >> > net/sched/sch_api.c | 55 +++++++++++++++++++++++++++++++++++++++
> >> >> > net/sched/sch_generic.c | 31 ++++++++++++++++++++--
> >> >> > 4 files changed, 90 insertions(+), 2 deletions(-)
> >> >> >
> >> >> >diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> >> >> >index dcb9160e6467..cefca55dd4f9 100644
> >> >> >--- a/include/net/sch_generic.h
> >> >> >+++ b/include/net/sch_generic.h
> >> >> >@@ -19,6 +19,7 @@
> >> >> > #include <net/gen_stats.h>
> >> >> > #include <net/rtnetlink.h>
> >> >> > #include <net/flow_offload.h>
> >> >> >+#include <linux/xarray.h>
> >> >> >
> >> >> > struct Qdisc_ops;
> >> >> > struct qdisc_walker;
> >> >> >@@ -126,6 +127,8 @@ struct Qdisc {
> >> >> >
> >> >> > struct rcu_head rcu;
> >> >> > netdevice_tracker dev_tracker;
> >> >> >+ netdevice_tracker in_block_tracker;
> >> >> >+ netdevice_tracker eg_block_tracker;
> >> >> > /* private data */
> >> >> > long privdata[] ____cacheline_aligned;
> >> >> > };
> >> >> >@@ -457,6 +460,7 @@ struct tcf_chain {
> >> >> > };
> >> >> >
> >> >> > struct tcf_block {
> >> >> >+ struct xarray ports; /* datapath accessible */
> >> >> > /* Lock protects tcf_block and lifetime-management data of chains
> >> >> > * attached to the block (refcnt, action_refcnt, explicitly_created).
> >> >> > */
> >> >> >diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> >> >> >index dc1c19a25882..6020a32ecff2 100644
> >> >> >--- a/net/sched/cls_api.c
> >> >> >+++ b/net/sched/cls_api.c
> >> >> >@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
> >> >> > {
> >> >> > mutex_destroy(&block->lock);
> >> >> > mutex_destroy(&block->proto_destroy_lock);
> >> >> >+ xa_destroy(&block->ports);
> >> >> > kfree_rcu(block, rcu);
> >> >> > }
> >> >> >
> >> >> >@@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> >> >> > refcount_set(&block->refcnt, 1);
> >> >> > block->net = net;
> >> >> > block->index = block_index;
> >> >> >+ xa_init(&block->ports);
> >> >> >
> >> >> > /* Don't store q pointer for blocks which are shared */
> >> >> > if (!tcf_block_shared(block))
> >> >> >diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> >> >> >index e9eaf637220e..09ec64f2f463 100644
> >> >> >--- a/net/sched/sch_api.c
> >> >> >+++ b/net/sched/sch_api.c
> >> >> >@@ -1180,6 +1180,57 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >> >> > return 0;
> >> >> > }
> >> >> >
> >> >> >+static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> >> >> >+ struct nlattr **tca,
> >> >> >+ struct netlink_ext_ack *extack)
> >> >> >+{
> >> >> >+ const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> >> >> >+ struct tcf_block *in_block = NULL;
> >> >> >+ struct tcf_block *eg_block = NULL;
> >> >>
> >> >> No need to null.
> >> >>
> >> >> Can't you just have:
> >> >> struct tcf_block *block;
> >> >>
> >> >> And use it in both ifs? You can easily obtain the block again on
> >> >> the error path.
> >> >>
> >> >
> >> >It's just easier to read.
> >>
> >> Hmm.
> >>
> >> >
> >> >> >+ int err;
> >> >> >+
> >> >> >+ if (tca[TCA_INGRESS_BLOCK]) {
> >> >> >+ /* works for both ingress and clsact */
> >> >> >+ in_block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> >> >+ if (!in_block) {
> >> >>
> >> >> I don't see how this could happen. In fact, why exactly do you check
> >> >> tca[TCA_INGRESS_BLOCK]?
> >> >>
> >> >
> >> >It's lazy but what is wrong with doing that?
> >>
> >> It's not needed, that's wrong.
> >>
> >
> >Are you ok with the proposal i made?
>
> Not sure what you mean here.
I was referring to the suggestion I made. Ok, this is going nowhere;
we'll just do _exactly_ what you said ;->
>
> >
> >>
> >> >
> >> >> At this time, the clsact/ingress init() function was already called, you
> >> >> can just do:
> >> >>
> >> >> block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> >> if (block) {
> >> >> err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >> if (err) {
> >> >> NL_SET_ERR_MSG(extack, "Ingress block dev insert failed");
> >> >> return err;
> >> >> }
> >> >> netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> >> >> }
> >> >> block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> >> >> if (block) {
> >> >> err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >> if (err) {
> >> >> NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> >> >> goto err_out;
> >> >> }
> >> >> netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> >> >> }
> >> >> return 0;
> >> >>
> >> >> err_out:
> >> >> block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> >> if (block) {
> >> >> xa_erase(&block->ports, dev->ifindex);
> >> >> netdev_put(dev, &sch->in_block_tracker);
> >> >> }
> >> >> return err;
> >> >>
> >> >> >+ NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> >> >> >+ return -EINVAL;
> >> >> >+ }
> >> >> >+
> >> >> >+ err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >> >+ if (err) {
> >> >> >+ NL_SET_ERR_MSG(extack, "Ingress block dev insert failed");
> >> >> >+ return err;
> >> >> >+ }
> >> >> >+
> >> >
> >> >How about a middle ground:
> >> > in_block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> > if (in_block) {
> >> > err = xa_insert(&in_block->ports, dev->ifindex, dev,
> >> >GFP_KERNEL);
> >> > if (err) {
> >> > NL_SET_ERR_MSG(extack, "ingress block dev
> >> >insert failed");
> >> > return err;
> >> > }
> >> > netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL)
> >> > }
> >> > eg_block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> >> > if (eg_block) {
> >> > err = xa_insert(&eg_block->ports, dev->ifindex, dev,
> >> >GFP_KERNEL);
> >> > if (err) {
> >> > netdev_put(dev, &sch->eg_block_tracker);
> >> > NL_SET_ERR_MSG(extack, "Egress block dev
> >> >insert failed");
> >> > xa_erase(&in_block->ports, dev->ifindex);
> >> > netdev_put(dev, &sch->in_block_tracker);
> >> > return err;
> >> > }
> >> > netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> >> > }
> >> > return 0;
> >> >
> >> >> >+ netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> >> >>
> >> >> Why exactly do you need an extra reference of netdev? Qdisc is already
> >> >> having one.
> >> >
> >> >More fine grained tracking.
> >>
> >> Again, good for what exactly?
> >
> >We do xa_insert(&eg_block->ports, dev->ifindex, dev,...) which calls
> >for a hold on the dev.
>
> Why you need to take a reference for it exactly? You already hold a
> reference for qdisc, why is that not enough?
>
Justification is:
We added the device on a new list/xarray (as you saw on the patch). To
keep track of it we incr the refcnt when it's added to the xarray and
decr when it is removed. It certainly helped when we were debugging
the code - in case of failures the trace was very nicely descriptive.
We could remove it.
cheers,
jamal
>
> >Then we need to track that for debugging; so, instead of incrementing
> >the same qdisc tracker again for each of those inserts we keep a
> >separate tracker.
> >
> >cheers,
> >jamal
> >
> >>
> >> >
> >> >>
> >> >> >+ }
> >> >> >+
> >> >> >+ if (tca[TCA_EGRESS_BLOCK]) {
> >> >> >+ eg_block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> >> >> >+ if (!eg_block) {
> >> >> >+ NL_SET_ERR_MSG(extack, "Shared egress block missing");
> >> >> >+ err = -EINVAL;
> >> >> >+ goto err_out;
> >> >> >+ }
> >> >> >+
> >> >> >+ err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >> >+ if (err) {
> >> >> >+ NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> >> >> >+ goto err_out;
> >> >> >+ }
> >> >> >+ netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> >> >> >+ }
> >> >> >+
> >> >> >+ return 0;
> >> >> >+err_out:
> >> >> >+ if (in_block) {
> >> >> >+ xa_erase(&in_block->ports, dev->ifindex);
> >> >> >+ netdev_put(dev, &sch->in_block_tracker);
> >> >> >+ }
> >> >> >+ return err;
> >> >> >+}
> >> >> >+
> >> >> > static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> >> >> > struct netlink_ext_ack *extack)
> >> >> > {
> >> >> >@@ -1350,6 +1401,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >> >> > qdisc_hash_add(sch, false);
> >> >> > trace_qdisc_create(ops, dev, parent);
> >> >> >
> >> >> >+ err = qdisc_block_add_dev(sch, dev, tca, extack);
> >> >> >+ if (err)
> >> >> >+ goto err_out4;
> >> >> >+
> >> >> > return sch;
> >> >> >
> >> >> > err_out4:
> >> >> >diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> >> >> >index 8dd0e5925342..32bed60dea9f 100644
> >> >> >--- a/net/sched/sch_generic.c
> >> >> >+++ b/net/sched/sch_generic.c
> >> >> >@@ -1050,7 +1050,11 @@ static void qdisc_free_cb(struct rcu_head *head)
> >> >> >
> >> >> > static void __qdisc_destroy(struct Qdisc *qdisc)
> >> >> > {
> >> >> >- const struct Qdisc_ops *ops = qdisc->ops;
> >> >> >+ struct net_device *dev = qdisc_dev(qdisc);
> >> >> >+ const struct Qdisc_ops *ops = qdisc->ops;
> >> >> >+ const struct Qdisc_class_ops *cops;
> >> >> >+ struct tcf_block *block;
> >> >> >+ u32 block_index;
> >> >> >
> >> >> > #ifdef CONFIG_NET_SCHED
> >> >> > qdisc_hash_del(qdisc);
> >> >> >@@ -1061,11 +1065,34 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >> >> >
> >> >> > qdisc_reset(qdisc);
> >> >> >
> >> >> >+ cops = ops->cl_ops;
> >> >> >+ if (ops->ingress_block_get) {
> >> >> >+ block_index = ops->ingress_block_get(qdisc);
> >> >> >+ if (block_index) {
> >> >>
> >> >> I don't follow. What you need block_index for? Why can't you just call:
> >> >> block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
> >> >> right away?
> >> >
> >> >Good point.
> >> >
> >> >cheers,
> >> >jamal
> >> >
> >> >>
> >> >> >+ block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
> >> >> >+ if (block) {
> >> >> >+ if (xa_erase(&block->ports, dev->ifindex))
> >> >> >+ netdev_put(dev, &qdisc->in_block_tracker);
> >> >> >+ }
> >> >> >+ }
> >> >> >+ }
> >> >> >+
> >> >> >+ if (ops->egress_block_get) {
> >> >> >+ block_index = ops->egress_block_get(qdisc);
> >> >> >+ if (block_index) {
> >> >> >+ block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL);
> >> >> >+ if (block) {
> >> >> >+ if (xa_erase(&block->ports, dev->ifindex))
> >> >> >+ netdev_put(dev, &qdisc->eg_block_tracker);
> >> >> >+ }
> >> >> >+ }
> >> >> >+ }
> >> >> >+
> >> >> > if (ops->destroy)
> >> >> > ops->destroy(qdisc);
> >> >> >
> >> >> > module_put(ops->owner);
> >> >> >- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> >> >> >+ netdev_put(dev, &qdisc->dev_tracker);
> >> >> >
> >> >> > trace_qdisc_destroy(qdisc);
> >> >> >
> >> >> >--
> >> >> >2.25.1
> >> >> >
Powered by blists - more mailing lists