[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAFAkD9t-b_gs8AWOCHOF_8Qr5NTrWJVLJJhqcE1uv+XVs_7Bw@mail.gmail.com>
Date: Fri, 15 Dec 2023 12:17:27 -0500
From: Jamal Hadi Salim <hadi@...atatu.com>
To: Jiri Pirko <jiri@...nulli.us>
Cc: Victor Nogueira <victor@...atatu.com>, jhs@...atatu.com, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
xiyou.wangcong@...il.com, mleitner@...hat.com, vladbu@...dia.com,
paulb@...dia.com, pctammela@...atatu.com, netdev@...r.kernel.org,
kernel@...atatu.com
Subject: Re: [PATCH net-next v7 1/3] net/sched: Introduce tc block netdev
tracking infra
On Fri, Dec 15, 2023 at 10:52 AM Jiri Pirko <jiri@...nulli.us> wrote:
>
> Fri, Dec 15, 2023 at 03:35:01PM CET, hadi@...atatu.com wrote:
> >On Fri, Dec 15, 2023 at 8:31 AM Jiri Pirko <jiri@...nulli.us> wrote:
> >>
> >> Fri, Dec 15, 2023 at 12:10:48PM CET, victor@...atatu.com wrote:
> >> >This commit makes tc blocks track which ports have been added to them.
> >> >And, with that, we'll be able to use this new information to send
> >> >packets to the block's ports. Which will be done in the patch #3 of this
> >> >series.
> >> >
> >> >Suggested-by: Jiri Pirko <jiri@...dia.com>
> >> >Co-developed-by: Jamal Hadi Salim <jhs@...atatu.com>
> >> >Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
> >> >Co-developed-by: Pedro Tammela <pctammela@...atatu.com>
> >> >Signed-off-by: Pedro Tammela <pctammela@...atatu.com>
> >> >Signed-off-by: Victor Nogueira <victor@...atatu.com>
> >> >---
> >> > include/net/sch_generic.h | 4 +++
> >> > net/sched/cls_api.c | 2 ++
> >> > net/sched/sch_api.c | 55 +++++++++++++++++++++++++++++++++++++++
> >> > net/sched/sch_generic.c | 31 ++++++++++++++++++++--
> >> > 4 files changed, 90 insertions(+), 2 deletions(-)
> >> >
> >> >diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> >> >index dcb9160e6467..cefca55dd4f9 100644
> >> >--- a/include/net/sch_generic.h
> >> >+++ b/include/net/sch_generic.h
> >> >@@ -19,6 +19,7 @@
> >> > #include <net/gen_stats.h>
> >> > #include <net/rtnetlink.h>
> >> > #include <net/flow_offload.h>
> >> >+#include <linux/xarray.h>
> >> >
> >> > struct Qdisc_ops;
> >> > struct qdisc_walker;
> >> >@@ -126,6 +127,8 @@ struct Qdisc {
> >> >
> >> > struct rcu_head rcu;
> >> > netdevice_tracker dev_tracker;
> >> >+ netdevice_tracker in_block_tracker;
> >> >+ netdevice_tracker eg_block_tracker;
> >> > /* private data */
> >> > long privdata[] ____cacheline_aligned;
> >> > };
> >> >@@ -457,6 +460,7 @@ struct tcf_chain {
> >> > };
> >> >
> >> > struct tcf_block {
> >> >+ struct xarray ports; /* datapath accessible */
> >> > /* Lock protects tcf_block and lifetime-management data of chains
> >> > * attached to the block (refcnt, action_refcnt, explicitly_created).
> >> > */
> >> >diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> >> >index dc1c19a25882..6020a32ecff2 100644
> >> >--- a/net/sched/cls_api.c
> >> >+++ b/net/sched/cls_api.c
> >> >@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
> >> > {
> >> > mutex_destroy(&block->lock);
> >> > mutex_destroy(&block->proto_destroy_lock);
> >> >+ xa_destroy(&block->ports);
> >> > kfree_rcu(block, rcu);
> >> > }
> >> >
> >> >@@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> >> > refcount_set(&block->refcnt, 1);
> >> > block->net = net;
> >> > block->index = block_index;
> >> >+ xa_init(&block->ports);
> >> >
> >> > /* Don't store q pointer for blocks which are shared */
> >> > if (!tcf_block_shared(block))
> >> >diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> >> >index e9eaf637220e..09ec64f2f463 100644
> >> >--- a/net/sched/sch_api.c
> >> >+++ b/net/sched/sch_api.c
> >> >@@ -1180,6 +1180,57 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >> > return 0;
> >> > }
> >> >
> >> >+static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> >> >+ struct nlattr **tca,
> >> >+ struct netlink_ext_ack *extack)
> >> >+{
> >> >+ const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> >> >+ struct tcf_block *in_block = NULL;
> >> >+ struct tcf_block *eg_block = NULL;
> >>
> >> No need to null.
> >>
> >> Can't you just have:
> >> struct tcf_block *block;
> >>
> >> And use it in both ifs? You can easily obtain the block again on
> >> the error path.
> >>
> >
> >It's just easier to read.
>
> Hmm.
>
> >
> >> >+ int err;
> >> >+
> >> >+ if (tca[TCA_INGRESS_BLOCK]) {
> >> >+ /* works for both ingress and clsact */
> >> >+ in_block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> >+ if (!in_block) {
> >>
> >> I don't see how this could happen. In fact, why exactly do you check
> >> tca[TCA_INGRESS_BLOCK]?
> >>
> >
> >It's lazy but what is wrong with doing that?
>
> It's not needed, that's wrong.
>
Are you ok with the proposal i made?
>
> >
> >> At this time, the clsact/ingress init() function was already called, you
> >> can just do:
> >>
> >> block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> if (block) {
> >> err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> if (err) {
> >> NL_SET_ERR_MSG(extack, "Ingress block dev insert failed");
> >> return err;
> >> }
> >> netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> >> }
> >> block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> >> if (block) {
> >> err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> if (err) {
> >> NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> >> goto err_out;
> >> }
> >> netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> >> }
> >> return 0;
> >>
> >> err_out:
> >> block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> >> if (block) {
> >> xa_erase(&block->ports, dev->ifindex);
> >> netdev_put(dev, &sch->in_block_tracker);
> >> }
> >> return err;
> >>
> >> >+ NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> >> >+ return -EINVAL;
> >> >+ }
> >> >+
> >> >+ err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >+ if (err) {
> >> >+ NL_SET_ERR_MSG(extack, "Ingress block dev insert failed");
> >> >+ return err;
> >> >+ }
> >> >+
> >
> >How about a middle ground:
> > in_block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> > if (in_block) {
> > err = xa_insert(&in_block->ports, dev->ifindex, dev,
> >GFP_KERNEL);
> > if (err) {
> > NL_SET_ERR_MSG(extack, "ingress block dev
> >insert failed");
> > return err;
> > }
> > netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL)
> > }
> > eg_block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> > if (eg_block) {
> > err = xa_insert(&eg_block->ports, dev->ifindex, dev,
> >GFP_KERNEL);
> > if (err) {
> > netdev_put(dev, &sch->eg_block_tracker);
> > NL_SET_ERR_MSG(extack, "Egress block dev
> >insert failed");
> > xa_erase(&in_block->ports, dev->ifindex);
> > netdev_put(dev, &sch->in_block_tracker);
> > return err;
> > }
> > netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> > }
> > return 0;
> >
> >> >+ netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> >>
> >> Why exactly do you need an extra reference of netdev? Qdisc is already
> >> having one.
> >
> >More fine grained tracking.
>
> Again, good for what exactly?
We do xa_insert(&eg_block->ports, dev->ifindex, dev,...) which calls
for a hold on the dev.
Then we need to track that for debugging; so, instead of incrementing
the same qdisc tracker again for each of those inserts we keep a
separate tracker.
cheers,
jamal
>
> >
> >>
> >> >+ }
> >> >+
> >> >+ if (tca[TCA_EGRESS_BLOCK]) {
> >> >+ eg_block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> >> >+ if (!eg_block) {
> >> >+ NL_SET_ERR_MSG(extack, "Shared egress block missing");
> >> >+ err = -EINVAL;
> >> >+ goto err_out;
> >> >+ }
> >> >+
> >> >+ err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> >> >+ if (err) {
> >> >+ NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> >> >+ goto err_out;
> >> >+ }
> >> >+ netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> >> >+ }
> >> >+
> >> >+ return 0;
> >> >+err_out:
> >> >+ if (in_block) {
> >> >+ xa_erase(&in_block->ports, dev->ifindex);
> >> >+ netdev_put(dev, &sch->in_block_tracker);
> >> >+ }
> >> >+ return err;
> >> >+}
> >> >+
> >> > static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> >> > struct netlink_ext_ack *extack)
> >> > {
> >> >@@ -1350,6 +1401,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >> > qdisc_hash_add(sch, false);
> >> > trace_qdisc_create(ops, dev, parent);
> >> >
> >> >+ err = qdisc_block_add_dev(sch, dev, tca, extack);
> >> >+ if (err)
> >> >+ goto err_out4;
> >> >+
> >> > return sch;
> >> >
> >> > err_out4:
> >> >diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> >> >index 8dd0e5925342..32bed60dea9f 100644
> >> >--- a/net/sched/sch_generic.c
> >> >+++ b/net/sched/sch_generic.c
> >> >@@ -1050,7 +1050,11 @@ static void qdisc_free_cb(struct rcu_head *head)
> >> >
> >> > static void __qdisc_destroy(struct Qdisc *qdisc)
> >> > {
> >> >- const struct Qdisc_ops *ops = qdisc->ops;
> >> >+ struct net_device *dev = qdisc_dev(qdisc);
> >> >+ const struct Qdisc_ops *ops = qdisc->ops;
> >> >+ const struct Qdisc_class_ops *cops;
> >> >+ struct tcf_block *block;
> >> >+ u32 block_index;
> >> >
> >> > #ifdef CONFIG_NET_SCHED
> >> > qdisc_hash_del(qdisc);
> >> >@@ -1061,11 +1065,34 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >> >
> >> > qdisc_reset(qdisc);
> >> >
> >> >+ cops = ops->cl_ops;
> >> >+ if (ops->ingress_block_get) {
> >> >+ block_index = ops->ingress_block_get(qdisc);
> >> >+ if (block_index) {
> >>
> >> I don't follow. What you need block_index for? Why can't you just call:
> >> block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
> >> right away?
> >
> >Good point.
> >
> >cheers,
> >jamal
> >
> >>
> >> >+ block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
> >> >+ if (block) {
> >> >+ if (xa_erase(&block->ports, dev->ifindex))
> >> >+ netdev_put(dev, &qdisc->in_block_tracker);
> >> >+ }
> >> >+ }
> >> >+ }
> >> >+
> >> >+ if (ops->egress_block_get) {
> >> >+ block_index = ops->egress_block_get(qdisc);
> >> >+ if (block_index) {
> >> >+ block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL);
> >> >+ if (block) {
> >> >+ if (xa_erase(&block->ports, dev->ifindex))
> >> >+ netdev_put(dev, &qdisc->eg_block_tracker);
> >> >+ }
> >> >+ }
> >> >+ }
> >> >+
> >> > if (ops->destroy)
> >> > ops->destroy(qdisc);
> >> >
> >> > module_put(ops->owner);
> >> >- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> >> >+ netdev_put(dev, &qdisc->dev_tracker);
> >> >
> >> > trace_qdisc_destroy(qdisc);
> >> >
> >> >--
> >> >2.25.1
> >> >
Powered by blists - more mailing lists