lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAM0EoM=WKsJva-Z27GiXKZedeZU7C8Mmqvq6eKHxZRE-op87jA@mail.gmail.com>
Date: Thu, 24 Aug 2023 10:05:43 -0400
From: Jamal Hadi Salim <jhs@...atatu.com>
To: Vlad Buslov <vladbu@...dia.com>
Cc: Victor Nogueira <victor@...atatu.com>, xiyou.wangcong@...il.com, jiri@...nulli.us, 
	davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com, 
	netdev@...r.kernel.org, mleitner@...hat.com, horms@...nel.org, 
	pctammela@...atatu.com, kernel@...atatu.com
Subject: Re: [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev
 tracking infra

On Mon, Aug 21, 2023 at 3:18 PM Vlad Buslov <vladbu@...dia.com> wrote:
>
>
> On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@...atatu.com> wrote:
> > The tc block is a collection of netdevs/ports which allow qdiscs to share
> > filter block instances (as opposed to the traditional tc filter per port).
> > Example:
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> >
> > Now we can add a filter using the block index:
> > $ tc filter add block 22 protocol ip pref 25 \
> >   flower dst_ip 192.168.0.0/16 action drop
> >
> > Up to this point, the block is unaware of its ports. This patch fixes that
> > and makes the tc block ports available to the datapath as well as control
> > path on offloading.
> >
> > Suggested-by: Jiri Pirko <jiri@...dia.com>
> > Co-developed-by: Jamal Hadi Salim <jhs@...atatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
> > Co-developed-by: Pedro Tammela <pctammela@...atatu.com>
> > Signed-off-by: Pedro Tammela <pctammela@...atatu.com>
> > Signed-off-by: Victor Nogueira <victor@...atatu.com>
> > ---
> >  include/net/sch_generic.h |  4 ++
> >  net/sched/cls_api.c       |  1 +
> >  net/sched/sch_api.c       | 79 +++++++++++++++++++++++++++++++++++++--
> >  net/sched/sch_generic.c   | 34 ++++++++++++++++-
> >  4 files changed, 112 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index e92f73bb3198..824a0ecb5afc 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -19,6 +19,7 @@
> >  #include <net/gen_stats.h>
> >  #include <net/rtnetlink.h>
> >  #include <net/flow_offload.h>
> > +#include <linux/xarray.h>
> >
> >  struct Qdisc_ops;
> >  struct qdisc_walker;
> > @@ -126,6 +127,8 @@ struct Qdisc {
> >
> >       struct rcu_head         rcu;
> >       netdevice_tracker       dev_tracker;
> > +     netdevice_tracker       in_block_tracker;
> > +     netdevice_tracker       eg_block_tracker;
> >       /* private data */
> >       long privdata[] ____cacheline_aligned;
> >  };
> > @@ -458,6 +461,7 @@ struct tcf_chain {
> >  };
> >
> >  struct tcf_block {
> > +     struct xarray ports; /* datapath accessible */
> >       /* Lock protects tcf_block and lifetime-management data of chains
> >        * attached to the block (refcnt, action_refcnt, explicitly_created).
> >        */
> > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> > index a193cc7b3241..a976792ef02f 100644
> > --- a/net/sched/cls_api.c
> > +++ b/net/sched/cls_api.c
> > @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> >       refcount_set(&block->refcnt, 1);
> >       block->net = net;
> >       block->index = block_index;
> > +     xa_init(&block->ports);
>
> Missing dual call to xa_destroy() for this.
>

Good catch - that should go in block destroy. I am not sure why
kmemleak test didnt catch this.

> >
> >       /* Don't store q pointer for blocks which are shared */
> >       if (!tcf_block_shared(block))
> > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > index aa6b1fe65151..6c0c220cdb21 100644
> > --- a/net/sched/sch_api.c
> > +++ b/net/sched/sch_api.c
> > @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >       return 0;
> >  }
> >
> > +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> > +{
> > +     if (tca[TCA_INGRESS_BLOCK])
> > +             sch->ops->ingress_block_set(sch, 0);
> > +
> > +     if (tca[TCA_EGRESS_BLOCK])
> > +             sch->ops->egress_block_set(sch, 0);
> > +}
> > +
> > +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> > +                            struct nlattr **tca,
> > +                            struct netlink_ext_ack *extack)
> > +{
> > +     const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> > +     struct tcf_block *in_block = NULL;
> > +     struct tcf_block *eg_block = NULL;
> > +     unsigned long cl = 0;
> > +     int err;
> > +
> > +     if (tca[TCA_INGRESS_BLOCK]) {
> > +             /* works for both ingress and clsact */
> > +             cl = TC_H_MIN_INGRESS;
> > +             in_block = cl_ops->tcf_block(sch, cl, NULL);
> > +             if (!in_block) {
> > +                     NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > +             if (err) {
> > +                     NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > +                     return err;
> > +             }
> > +
> > +             netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> > +     }
> > +
> > +     if (tca[TCA_EGRESS_BLOCK]) {
> > +             cl = TC_H_MIN_EGRESS;
> > +             eg_block = cl_ops->tcf_block(sch, cl, NULL);
> > +             if (!eg_block) {
> > +                     NL_SET_ERR_MSG(extack, "Shared egress block missing");
> > +                     err = -EINVAL;
> > +                     goto err_out;
> > +             }
> > +
> > +             err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > +             if (err) {
> > +                     netdev_put(dev, &sch->eg_block_tracker);
> > +                     NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> > +                     goto err_out;
> > +             }
> > +             netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> > +     }
> > +
> > +     return 0;
> > +err_out:
> > +     if (in_block) {
> > +             xa_erase(&in_block->ports, dev->ifindex);
> > +             netdev_put(dev, &sch->in_block_tracker);
> > +             NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > +     }
> > +     return err;
> > +}
> > +
> >  static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> >                                  struct netlink_ext_ack *extack)
> >  {
> > @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >       sch = qdisc_alloc(dev_queue, ops, extack);
> >       if (IS_ERR(sch)) {
> >               err = PTR_ERR(sch);
> > -             goto err_out2;
> > +             goto err_out1;
> >       }
> >
> >       sch->parent = parent;
> > @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >                       if (handle == 0) {
> >                               NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
> >                               err = -ENOSPC;
> > -                             goto err_out3;
> > +                             goto err_out2;
> >                       }
> >               }
> >               if (!netif_is_multiqueue(dev))
> > @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >
> >       err = qdisc_block_indexes_set(sch, tca, extack);
> >       if (err)
> > -             goto err_out3;
> > +             goto err_out2;
> >
> >       if (tca[TCA_STAB]) {
> >               stab = qdisc_get_stab(tca[TCA_STAB], extack);
> > @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >       qdisc_hash_add(sch, false);
> >       trace_qdisc_create(ops, dev, parent);
> >
> > +     err = qdisc_block_add_dev(sch, dev, tca, extack);
> > +     if (err)
> > +             goto err_out4;
> > +
> >       return sch;
> >
> >  err_out4:
> > @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >               ops->destroy(sch);
> >       qdisc_put_stab(rtnl_dereference(sch->stab));
> >  err_out3:
> > +     qdisc_block_undo_set(sch, tca);
>
> Is this a bugfix? This new call is for all sites that jump to
> err_out{3|4} even though you only added new code to the end of the
> function.

I guess it could be labelled as a "bug fix" - the existing code did
not "rewind" the block ID setting when you have attributes
TCA_EGRESS/INGRESS_BLOCK and the blockid is set and then something
later on fails down the codepath..
Maybe need to separate out this into a different patch or even send it
as a bug fix.

cheers,
jamal
> > +err_out2:
> >       netdev_put(dev, &sch->dev_tracker);
> >       qdisc_free(sch);
> > -err_out2:
> > +err_out1:
> >       module_put(ops->owner);
> >  err_out:
> >       *errp = err;
> > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> > index 5d7e23f4cc0e..0fb51fd6f01e 100644
> > --- a/net/sched/sch_generic.c
> > +++ b/net/sched/sch_generic.c
> > @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
> >
> >  static void __qdisc_destroy(struct Qdisc *qdisc)
> >  {
> > -     const struct Qdisc_ops  *ops = qdisc->ops;
> > +     struct net_device *dev = qdisc_dev(qdisc);
> > +     const struct Qdisc_ops *ops = qdisc->ops;
> > +     const struct Qdisc_class_ops *cops;
> > +     struct tcf_block *block;
> > +     unsigned long cl;
> > +     u32 block_index;
> >
> >  #ifdef CONFIG_NET_SCHED
> >       qdisc_hash_del(qdisc);
> > @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >
> >       qdisc_reset(qdisc);
> >
> > +     cops = ops->cl_ops;
> > +     if (ops->ingress_block_get) {
> > +             block_index = ops->ingress_block_get(qdisc);
> > +             if (block_index) {
> > +                     cl = TC_H_MIN_INGRESS;
> > +                     block = cops->tcf_block(qdisc, cl, NULL);
> > +                     if (block) {
> > +                             if (xa_erase(&block->ports, dev->ifindex))
> > +                                     netdev_put(dev, &qdisc->in_block_tracker);
> > +                     }
> > +             }
> > +     }
> > +
> > +     if (ops->egress_block_get) {
> > +             block_index = ops->egress_block_get(qdisc);
> > +             if (block_index) {
> > +                     cl = TC_H_MIN_EGRESS;
> > +                     block = cops->tcf_block(qdisc, cl, NULL);
> > +                     if (block) {
> > +                             if (xa_erase(&block->ports, dev->ifindex))
> > +                                     netdev_put(dev, &qdisc->eg_block_tracker);
> > +                     }
> > +             }
> > +     }
> > +
> >       if (ops->destroy)
> >               ops->destroy(qdisc);
> >
> >       module_put(ops->owner);
> > -     netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> > +     netdev_put(dev, &qdisc->dev_tracker);
> >
> >       trace_qdisc_destroy(qdisc);
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ