lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170908134332.GD9064@sisyphus.home.austad.us>
Date:   Fri, 8 Sep 2017 15:43:32 +0200
From:   Henrik Austad <henrik@...tad.us>
To:     Vinicius Costa Gomes <vinicius.gomes@...el.com>
Cc:     netdev@...r.kernel.org, jhs@...atatu.com, xiyou.wangcong@...il.com,
        jiri@...nulli.us, intel-wired-lan@...ts.osuosl.org,
        andre.guedes@...el.com, ivan.briano@...el.com,
        jesus.sanchez-palencia@...el.com, boon.leong.ong@...el.com,
        richardcochran@...il.com
Subject: Re: [RFC net-next 2/5] net/sched: Introduce Credit Based Shaper
 (CBS) qdisc

On Thu, Aug 31, 2017 at 06:26:22PM -0700, Vinicius Costa Gomes wrote:
> This queueing discipline implements the shaper algorithm defined by
> the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
> 
> It's primary usage is to apply some bandwidth reservation to user
> defined traffic classes, which are mapped to different queues via the
> mqprio qdisc.
> 
> Initially, it only supports offloading the traffic shaping work to
> supporting controllers.
> 
> Later, when a software implementation is added, the current dependency
> on being installed "under" mqprio can be lifted.
> 
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@...el.com>
> Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@...el.com>

So, I started testing this in my VM to make sure I didn't do anything silly 
and it exploded spectacularly as it used the underlying e1000 driver which 
does not have a ndo_setup_tc present.

I commented inline below, but as a tl;dr

The cbs_init() would call into cbs_change() that would correctly detect 
that ndo_setup_tc is missing and abort. However, at this stage it would try 
to desroy the qdisc and in cbs_destroy() there's no such guard leading to a

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010

Fixing that, another NULL would be found when the code walks into 
qdisc_destroy(q->qdisc).

Apart from this, it loaded fine after some wrestling with tc :)

Awesome! :D

> ---
>  include/linux/netdevice.h |   1 +
>  net/sched/Kconfig         |  11 ++
>  net/sched/Makefile        |   1 +
>  net/sched/sch_cbs.c       | 286 ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 299 insertions(+)
>  create mode 100644 net/sched/sch_cbs.c
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 35de8312e0b5..dd9a2ecd0c03 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -775,6 +775,7 @@ enum tc_setup_type {
>  	TC_SETUP_CLSFLOWER,
>  	TC_SETUP_CLSMATCHALL,
>  	TC_SETUP_CLSBPF,
> +	TC_SETUP_CBS,
>  };
>  
>  /* These structures hold the attributes of xdp state that are being passed
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index e70ed26485a2..c03d86a7775e 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -172,6 +172,17 @@ config NET_SCH_TBF
>  	  To compile this code as a module, choose M here: the
>  	  module will be called sch_tbf.
>  
> +config NET_SCH_CBS

Shouldn't this depend on NET_SCH_MQPRIO as it is supposed to hook into 
that?

@@ -173,6 +173,7 @@ config NET_SCH_TBF
          module will be called sch_tbf.
 
 config NET_SCH_CBS
+       depends on NET_SCH_MQPRIO
        tristate "Credit Based Shaper (CBS)"
        ---help---
          Say Y here if you want to use the Credit Based Shaper (CBS) packet

> +	tristate "Credit Based Shaper (CBS)"
> +	---help---
> +	  Say Y here if you want to use the Credit Based Shaper (CBS) packet
> +	  scheduling algorithm.
> +
> +	  See the top of <file:net/sched/sch_cbs.c> for more details.
> +
> +	  To compile this code as a module, choose M here: the
> +	  module will be called sch_cbs.
> +
>  config NET_SCH_GRED
>  	tristate "Generic Random Early Detection (GRED)"
>  	---help---
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 7b915d226de7..80c8f92d162d 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
>  obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
>  obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
>  obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
> +obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
>  
>  obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
>  obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
> diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
> new file mode 100644
> index 000000000000..1c86a9e14150
> --- /dev/null
> +++ b/net/sched/sch_cbs.c
> @@ -0,0 +1,286 @@
> +/*
> + * net/sched/sch_cbs.c	Credit Based Shaper
> + *
> + *		This program is free software; you can redistribute it and/or
> + *		modify it under the terms of the GNU General Public License
> + *		as published by the Free Software Foundation; either version
> + *		2 of the License, or (at your option) any later version.
> + *
> + * Authors:	Vininicius Costa Gomes <vinicius.gomes@...el.com>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <net/netlink.h>
> +#include <net/sch_generic.h>
> +#include <net/pkt_sched.h>
> +
> +struct cbs_sched_data {
> +	struct Qdisc *qdisc; /* Inner qdisc, default - pfifo queue */
> +	s32 queue;
> +	s32 locredit;
> +	s32 hicredit;
> +	s32 sendslope;
> +	s32 idleslope;
> +};
> +
> +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +		       struct sk_buff **to_free)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +	int ret;
> +
> +	ret = qdisc_enqueue(skb, q->qdisc, to_free);
> +	if (ret != NET_XMIT_SUCCESS) {
> +		if (net_xmit_drop_count(ret))
> +			qdisc_qstats_drop(sch);
> +		return ret;
> +	}
> +
> +	qdisc_qstats_backlog_inc(sch, skb);
> +	sch->q.qlen++;
> +	return NET_XMIT_SUCCESS;
> +}
> +
> +static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +	struct sk_buff *skb;
> +
> +	skb = q->qdisc->ops->peek(q->qdisc);
> +	if (skb) {
> +		skb = qdisc_dequeue_peeked(q->qdisc);
> +		if (unlikely(!skb))
> +			return NULL;
> +
> +		qdisc_qstats_backlog_dec(sch, skb);
> +		sch->q.qlen--;
> +		qdisc_bstats_update(sch, skb);
> +
> +		return skb;
> +	}
> +	return NULL;
> +}
> +
> +static void cbs_reset(struct Qdisc *sch)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +	qdisc_reset(q->qdisc);
> +}
> +
> +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
> +	[TCA_CBS_PARMS]	= { .len = sizeof(struct tc_cbs_qopt) },
> +};
> +
> +static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +	struct tc_cbs_qopt_offload cbs = { };
> +	struct nlattr *tb[TCA_CBS_MAX + 1];
> +	const struct net_device_ops *ops;
> +	struct tc_cbs_qopt *qopt;
> +	struct net_device *dev;
> +	int err;
> +
> +	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
> +	if (err < 0)
> +		return err;
> +
> +	err = -EINVAL;
> +	if (!tb[TCA_CBS_PARMS])
> +		goto done;
> +
> +	qopt = nla_data(tb[TCA_CBS_PARMS]);
> +
> +	dev = qdisc_dev(sch);
> +	ops = dev->netdev_ops;
> +
> +	/* FIXME: this means that we can only install this qdisc
> +	 * "under" mqprio. Do we need a more generic way to retrieve
> +	 * the queue, or do we pass the netdev_queue to the driver?
> +	 */
> +	cbs.queue = TC_H_MIN(sch->parent) - 1 - netdev_get_num_tc(dev);
> +
> +	cbs.enable = 1;
> +	cbs.hicredit = qopt->hicredit;
> +	cbs.locredit = qopt->locredit;
> +	cbs.idleslope = qopt->idleslope;
> +	cbs.sendslope = qopt->sendslope;
> +
> +	err = -ENOTSUPP;
> +	if (!ops->ndo_setup_tc)
> +		goto done;

This confuses tc a bit, and looking at other qdisc schedulers, it seems 
like EOPNOTSUPP is an alternative, this changes the output from

RTNETLINK answers: Unknown error 524
 - to
RTNETLINK answers: Operation not supported

which perhaps is more informative.

> +
> +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
> +	if (err < 0)
> +		goto done;
> +
> +	q->queue = cbs.queue;
> +	q->hicredit = cbs.hicredit;
> +	q->locredit = cbs.locredit;
> +	q->idleslope = cbs.idleslope;
> +	q->sendslope = cbs.sendslope;
> +
> +done:
> +	return err;
> +}
> +
> +static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +	if (!opt)
> +		return -EINVAL;
> +
> +	q->qdisc = fifo_create_dflt(sch, &pfifo_qdisc_ops, 1024);
> +	qdisc_hash_add(q->qdisc, true);
> +
> +	return cbs_change(sch, opt);
> +}
> +
> +static void cbs_destroy(struct Qdisc *sch)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +	struct tc_cbs_qopt_offload cbs = { };
> +	struct net_device *dev;
> +	int err;
> +
> +	q->hicredit = 0;
> +	q->locredit = 0;
> +	q->idleslope = 0;
> +	q->sendslope = 0;
> +
> +	dev = qdisc_dev(sch);
> +
> +	cbs.queue = q->queue;
> +	cbs.enable = 0;
> +
> +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);

This can lead to NULL pointer deref if it is not set (tested for in 
cbs_change and error there leads us here, which then explodes).

> +	if (err < 0)
> +		pr_warn("Couldn't reset queue %d to default values\n",
> +			cbs.queue);
> +
> +	qdisc_destroy(q->qdisc);

Same, q->qdisc was NULL when cbs_init() failed.

> +}
> +
> +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +	struct nlattr *nest;
> +	struct tc_cbs_qopt opt;
> +
> +	sch->qstats.backlog = q->qdisc->qstats.backlog;
> +	nest = nla_nest_start(skb, TCA_OPTIONS);
> +	if (!nest)
> +		goto nla_put_failure;
> +
> +	opt.hicredit = q->hicredit;
> +	opt.locredit = q->locredit;
> +	opt.sendslope = q->sendslope;
> +	opt.idleslope = q->idleslope;
> +
> +	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
> +		goto nla_put_failure;
> +
> +	return nla_nest_end(skb, nest);
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nest);
> +	return -1;
> +}
> +
> +static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
> +			  struct sk_buff *skb, struct tcmsg *tcm)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +	tcm->tcm_handle |= TC_H_MIN(1);
> +	tcm->tcm_info = q->qdisc->handle;
> +
> +	return 0;
> +}
> +
> +static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
> +		     struct Qdisc **old)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +	if (!new)
> +		new = &noop_qdisc;
> +
> +	*old = qdisc_replace(sch, new, &q->qdisc);
> +	return 0;
> +}
> +
> +static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
> +{
> +	struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +	return q->qdisc;
> +}
> +
> +static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
> +{
> +	return 1;
> +}
> +
> +static int cbs_delete(struct Qdisc *sch, unsigned long arg)
> +{
> +	return 0;
> +}
> +
> +static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
> +{
> +	if (!walker->stop) {
> +		if (walker->count >= walker->skip)
> +			if (walker->fn(sch, 1, walker) < 0) {
> +				walker->stop = 1;
> +				return;
> +			}
> +		walker->count++;
> +	}
> +}
> +
> +static const struct Qdisc_class_ops cbs_class_ops = {
> +	.graft		=	cbs_graft,
> +	.leaf		=	cbs_leaf,
> +	.find		=	cbs_find,
> +	.delete		=	cbs_delete,
> +	.walk		=	cbs_walk,
> +	.dump		=	cbs_dump_class,
> +};
> +
> +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
> +	.next		=	NULL,
> +	.cl_ops		=	&cbs_class_ops,
> +	.id		=	"cbs",
> +	.priv_size	=	sizeof(struct cbs_sched_data),
> +	.enqueue	=	cbs_enqueue,
> +	.dequeue	=	cbs_dequeue,
> +	.peek		=	qdisc_peek_dequeued,
> +	.init		=	cbs_init,
> +	.reset		=	cbs_reset,
> +	.destroy	=	cbs_destroy,
> +	.change		=	cbs_change,
> +	.dump		=	cbs_dump,
> +	.owner		=	THIS_MODULE,
> +};
> +
> +static int __init cbs_module_init(void)
> +{
> +	return register_qdisc(&cbs_qdisc_ops);
> +}
> +
> +static void __exit cbs_module_exit(void)
> +{
> +	unregister_qdisc(&cbs_qdisc_ops);
> +}
> +module_init(cbs_module_init)
> +module_exit(cbs_module_exit)
> +MODULE_LICENSE("GPL");
> -- 
> 2.14.1
> 

-- 
Henrik Austad

Download attachment "signature.asc" of type "application/pgp-signature" (182 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ