[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170908134332.GD9064@sisyphus.home.austad.us>
Date: Fri, 8 Sep 2017 15:43:32 +0200
From: Henrik Austad <henrik@...tad.us>
To: Vinicius Costa Gomes <vinicius.gomes@...el.com>
Cc: netdev@...r.kernel.org, jhs@...atatu.com, xiyou.wangcong@...il.com,
jiri@...nulli.us, intel-wired-lan@...ts.osuosl.org,
andre.guedes@...el.com, ivan.briano@...el.com,
jesus.sanchez-palencia@...el.com, boon.leong.ong@...el.com,
richardcochran@...il.com
Subject: Re: [RFC net-next 2/5] net/sched: Introduce Credit Based Shaper
(CBS) qdisc
On Thu, Aug 31, 2017 at 06:26:22PM -0700, Vinicius Costa Gomes wrote:
> This queueing discipline implements the shaper algorithm defined by
> the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
>
> It's primary usage is to apply some bandwidth reservation to user
> defined traffic classes, which are mapped to different queues via the
> mqprio qdisc.
>
> Initially, it only supports offloading the traffic shaping work to
> supporting controllers.
>
> Later, when a software implementation is added, the current dependency
> on being installed "under" mqprio can be lifted.
>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@...el.com>
> Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@...el.com>
So, I started testing this in my VM to make sure I didn't do anything silly
and it exploded spectacularly as it used the underlying e1000 driver which
does not have a ndo_setup_tc present.
I commented inline below, but as a tl;dr
The cbs_init() would call into cbs_change() that would correctly detect
that ndo_setup_tc is missing and abort. However, at this stage it would try
to desroy the qdisc and in cbs_destroy() there's no such guard leading to a
BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
Fixing that, another NULL would be found when the code walks into
qdisc_destroy(q->qdisc).
Apart from this, it loaded fine after some wrestling with tc :)
Awesome! :D
> ---
> include/linux/netdevice.h | 1 +
> net/sched/Kconfig | 11 ++
> net/sched/Makefile | 1 +
> net/sched/sch_cbs.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 299 insertions(+)
> create mode 100644 net/sched/sch_cbs.c
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 35de8312e0b5..dd9a2ecd0c03 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -775,6 +775,7 @@ enum tc_setup_type {
> TC_SETUP_CLSFLOWER,
> TC_SETUP_CLSMATCHALL,
> TC_SETUP_CLSBPF,
> + TC_SETUP_CBS,
> };
>
> /* These structures hold the attributes of xdp state that are being passed
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index e70ed26485a2..c03d86a7775e 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -172,6 +172,17 @@ config NET_SCH_TBF
> To compile this code as a module, choose M here: the
> module will be called sch_tbf.
>
> +config NET_SCH_CBS
Shouldn't this depend on NET_SCH_MQPRIO as it is supposed to hook into
that?
@@ -173,6 +173,7 @@ config NET_SCH_TBF
module will be called sch_tbf.
config NET_SCH_CBS
+ depends on NET_SCH_MQPRIO
tristate "Credit Based Shaper (CBS)"
---help---
Say Y here if you want to use the Credit Based Shaper (CBS) packet
> + tristate "Credit Based Shaper (CBS)"
> + ---help---
> + Say Y here if you want to use the Credit Based Shaper (CBS) packet
> + scheduling algorithm.
> +
> + See the top of <file:net/sched/sch_cbs.c> for more details.
> +
> + To compile this code as a module, choose M here: the
> + module will be called sch_cbs.
> +
> config NET_SCH_GRED
> tristate "Generic Random Early Detection (GRED)"
> ---help---
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 7b915d226de7..80c8f92d162d 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
> obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
> obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
> obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
> +obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
>
> obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
> obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
> diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
> new file mode 100644
> index 000000000000..1c86a9e14150
> --- /dev/null
> +++ b/net/sched/sch_cbs.c
> @@ -0,0 +1,286 @@
> +/*
> + * net/sched/sch_cbs.c Credit Based Shaper
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + *
> + * Authors: Vininicius Costa Gomes <vinicius.gomes@...el.com>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <net/netlink.h>
> +#include <net/sch_generic.h>
> +#include <net/pkt_sched.h>
> +
> +struct cbs_sched_data {
> + struct Qdisc *qdisc; /* Inner qdisc, default - pfifo queue */
> + s32 queue;
> + s32 locredit;
> + s32 hicredit;
> + s32 sendslope;
> + s32 idleslope;
> +};
> +
> +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> + struct sk_buff **to_free)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> + int ret;
> +
> + ret = qdisc_enqueue(skb, q->qdisc, to_free);
> + if (ret != NET_XMIT_SUCCESS) {
> + if (net_xmit_drop_count(ret))
> + qdisc_qstats_drop(sch);
> + return ret;
> + }
> +
> + qdisc_qstats_backlog_inc(sch, skb);
> + sch->q.qlen++;
> + return NET_XMIT_SUCCESS;
> +}
> +
> +static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> + struct sk_buff *skb;
> +
> + skb = q->qdisc->ops->peek(q->qdisc);
> + if (skb) {
> + skb = qdisc_dequeue_peeked(q->qdisc);
> + if (unlikely(!skb))
> + return NULL;
> +
> + qdisc_qstats_backlog_dec(sch, skb);
> + sch->q.qlen--;
> + qdisc_bstats_update(sch, skb);
> +
> + return skb;
> + }
> + return NULL;
> +}
> +
> +static void cbs_reset(struct Qdisc *sch)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> +
> + qdisc_reset(q->qdisc);
> +}
> +
> +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
> + [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
> +};
> +
> +static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> + struct tc_cbs_qopt_offload cbs = { };
> + struct nlattr *tb[TCA_CBS_MAX + 1];
> + const struct net_device_ops *ops;
> + struct tc_cbs_qopt *qopt;
> + struct net_device *dev;
> + int err;
> +
> + err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
> + if (err < 0)
> + return err;
> +
> + err = -EINVAL;
> + if (!tb[TCA_CBS_PARMS])
> + goto done;
> +
> + qopt = nla_data(tb[TCA_CBS_PARMS]);
> +
> + dev = qdisc_dev(sch);
> + ops = dev->netdev_ops;
> +
> + /* FIXME: this means that we can only install this qdisc
> + * "under" mqprio. Do we need a more generic way to retrieve
> + * the queue, or do we pass the netdev_queue to the driver?
> + */
> + cbs.queue = TC_H_MIN(sch->parent) - 1 - netdev_get_num_tc(dev);
> +
> + cbs.enable = 1;
> + cbs.hicredit = qopt->hicredit;
> + cbs.locredit = qopt->locredit;
> + cbs.idleslope = qopt->idleslope;
> + cbs.sendslope = qopt->sendslope;
> +
> + err = -ENOTSUPP;
> + if (!ops->ndo_setup_tc)
> + goto done;
This confuses tc a bit, and looking at other qdisc schedulers, it seems
like EOPNOTSUPP is an alternative, this changes the output from
RTNETLINK answers: Unknown error 524
- to
RTNETLINK answers: Operation not supported
which perhaps is more informative.
> +
> + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
> + if (err < 0)
> + goto done;
> +
> + q->queue = cbs.queue;
> + q->hicredit = cbs.hicredit;
> + q->locredit = cbs.locredit;
> + q->idleslope = cbs.idleslope;
> + q->sendslope = cbs.sendslope;
> +
> +done:
> + return err;
> +}
> +
> +static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> +
> + if (!opt)
> + return -EINVAL;
> +
> + q->qdisc = fifo_create_dflt(sch, &pfifo_qdisc_ops, 1024);
> + qdisc_hash_add(q->qdisc, true);
> +
> + return cbs_change(sch, opt);
> +}
> +
> +static void cbs_destroy(struct Qdisc *sch)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> + struct tc_cbs_qopt_offload cbs = { };
> + struct net_device *dev;
> + int err;
> +
> + q->hicredit = 0;
> + q->locredit = 0;
> + q->idleslope = 0;
> + q->sendslope = 0;
> +
> + dev = qdisc_dev(sch);
> +
> + cbs.queue = q->queue;
> + cbs.enable = 0;
> +
> + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
This can lead to NULL pointer deref if it is not set (tested for in
cbs_change and error there leads us here, which then explodes).
> + if (err < 0)
> + pr_warn("Couldn't reset queue %d to default values\n",
> + cbs.queue);
> +
> + qdisc_destroy(q->qdisc);
Same, q->qdisc was NULL when cbs_init() failed.
> +}
> +
> +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> + struct nlattr *nest;
> + struct tc_cbs_qopt opt;
> +
> + sch->qstats.backlog = q->qdisc->qstats.backlog;
> + nest = nla_nest_start(skb, TCA_OPTIONS);
> + if (!nest)
> + goto nla_put_failure;
> +
> + opt.hicredit = q->hicredit;
> + opt.locredit = q->locredit;
> + opt.sendslope = q->sendslope;
> + opt.idleslope = q->idleslope;
> +
> + if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
> + goto nla_put_failure;
> +
> + return nla_nest_end(skb, nest);
> +
> +nla_put_failure:
> + nla_nest_cancel(skb, nest);
> + return -1;
> +}
> +
> +static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
> + struct sk_buff *skb, struct tcmsg *tcm)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> +
> + tcm->tcm_handle |= TC_H_MIN(1);
> + tcm->tcm_info = q->qdisc->handle;
> +
> + return 0;
> +}
> +
> +static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
> + struct Qdisc **old)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> +
> + if (!new)
> + new = &noop_qdisc;
> +
> + *old = qdisc_replace(sch, new, &q->qdisc);
> + return 0;
> +}
> +
> +static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
> +{
> + struct cbs_sched_data *q = qdisc_priv(sch);
> +
> + return q->qdisc;
> +}
> +
> +static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
> +{
> + return 1;
> +}
> +
> +static int cbs_delete(struct Qdisc *sch, unsigned long arg)
> +{
> + return 0;
> +}
> +
> +static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
> +{
> + if (!walker->stop) {
> + if (walker->count >= walker->skip)
> + if (walker->fn(sch, 1, walker) < 0) {
> + walker->stop = 1;
> + return;
> + }
> + walker->count++;
> + }
> +}
> +
> +static const struct Qdisc_class_ops cbs_class_ops = {
> + .graft = cbs_graft,
> + .leaf = cbs_leaf,
> + .find = cbs_find,
> + .delete = cbs_delete,
> + .walk = cbs_walk,
> + .dump = cbs_dump_class,
> +};
> +
> +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
> + .next = NULL,
> + .cl_ops = &cbs_class_ops,
> + .id = "cbs",
> + .priv_size = sizeof(struct cbs_sched_data),
> + .enqueue = cbs_enqueue,
> + .dequeue = cbs_dequeue,
> + .peek = qdisc_peek_dequeued,
> + .init = cbs_init,
> + .reset = cbs_reset,
> + .destroy = cbs_destroy,
> + .change = cbs_change,
> + .dump = cbs_dump,
> + .owner = THIS_MODULE,
> +};
> +
> +static int __init cbs_module_init(void)
> +{
> + return register_qdisc(&cbs_qdisc_ops);
> +}
> +
> +static void __exit cbs_module_exit(void)
> +{
> + unregister_qdisc(&cbs_qdisc_ops);
> +}
> +module_init(cbs_module_init)
> +module_exit(cbs_module_exit)
> +MODULE_LICENSE("GPL");
> --
> 2.14.1
>
--
Henrik Austad
Download attachment "signature.asc" of type "application/pgp-signature" (182 bytes)
Powered by blists - more mailing lists