[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180626001116.GA4274@gmail.com>
Date: Tue, 26 Jun 2018 05:41:18 +0530
From: Nishanth Devarajan <ndev2021@...il.com>
To: Alexander Duyck <alexander.duyck@...il.com>
Cc: Cong Wang <xiyou.wangcong@...il.com>,
Jamal Hadi Salim <jhs@...atatu.com>,
Jiri Pirko <jiri@...nulli.us>,
David Miller <davem@...emloft.net>, netdev@...r.kernel.org,
doucette@...edu, michel@...irati.com.br
Subject: Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
On Sat, Jun 23, 2018 at 03:10:32PM -0700, Alexander Duyck wrote:
> On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan <ndev2021@...il.com> wrote:
> > net/sched: add skbprio scheduler
> >
> > Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
> > according to their skb->priority field. Although Skbprio can be employed in any
> > scenario in which a higher skb->priority field means a higher priority packet,
> > Skbprio was concieved as a solution for denial-of-service defenses that need to
> > route packets with different priorities.
>
> Really this description is not very good. Reading it I was thinking to
> myself "why do we need this, prio already does this". It wasn't until
> I read through the code that I figured out that you are basically
> adding dropping of lower priority frames.
>
OK, I'll take Jamal's suggestion on this and write up a new description.
> >
> > v2
> > *Use skb->priority field rather than DS field. Rename queueing discipline as
> > SKB Priority Queue (previously Gatekeeper Priority Queue).
> >
> > *Queueing discipline is made classful to expose Skbprio's internal priority
> > queues.
> >
> > Signed-off-by: Nishanth Devarajan <ndev2021@...il.com>
> > Reviewed-by: Sachin Paryani <sachin.paryani@...il.com>
> > Reviewed-by: Cody Doucette <doucette@...edu>
> > Reviewed-by: Michel Machado <michel@...irati.com.br>
> > ---
> > include/uapi/linux/pkt_sched.h | 15 ++
> > net/sched/Kconfig | 13 ++
> > net/sched/Makefile | 1 +
> > net/sched/sch_skbprio.c | 347 +++++++++++++++++++++++++++++++++++++++++
> > 4 files changed, 376 insertions(+)
> > create mode 100644 net/sched/sch_skbprio.c
> >
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 37b5096..6fd07e8 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> > @@ -124,6 +124,21 @@ struct tc_fifo_qopt {
> > __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
> > };
> >
> > +/* SKBPRIO section */
> > +
> > +/*
> > + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
> > + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
> > + * to map one to one the DS field of IPV4 and IPV6 headers.
> > + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
> > + */
> > +
> > +#define SKBPRIO_MAX_PRIORITY 64
> > +
> > +struct tc_skbprio_qopt {
> > + __u32 limit; /* Queue length in packets. */
> > +};
> > +
> > /* PRIO section */
> >
> > #define TCQ_PRIO_BANDS 16
> > diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> > index a01169f..9ac4b53 100644
> > --- a/net/sched/Kconfig
> > +++ b/net/sched/Kconfig
> > @@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
> >
> > If unsure, say N.
> >
> > +config NET_SCH_SKBPRIO
> > + tristate "SKB priority queue scheduler (SKBPRIO)"
> > + help
> > + Say Y here if you want to use the SKB priority queue
> > + scheduler. This schedules packets according to skb->priority,
> > + which is useful for request packets in DoS mitigation systems such
> > + as Gatekeeper.
> > +
> > + To compile this driver as a module, choose M here: the module will
> > + be called sch_skbprio.
> > +
> > + If unsure, say N.
> > +
> > config NET_SCH_CHOKE
> > tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
> > help
> > diff --git a/net/sched/Makefile b/net/sched/Makefile
> > index 8811d38..a4d8893 100644
> > --- a/net/sched/Makefile
> > +++ b/net/sched/Makefile
> > @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
> > obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
> > obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
> > obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
> > +obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
> > obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
> > obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
> > obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
> > diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
> > new file mode 100644
> > index 0000000..5e89446
> > --- /dev/null
> > +++ b/net/sched/sch_skbprio.c
> > @@ -0,0 +1,347 @@
> > +/*
> > + * net/sched/sch_skbprio.c SKB Priority Queue.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version
> > + * 2 of the License, or (at your option) any later version.
> > + *
> > + * Authors: Nishanth Devarajan, <ndev2021@...il.com>
> > + * Cody Doucette, <doucette@...edu>
> > + * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
> > + */
> > +
> > +#include <linux/string.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/kernel.h>
> > +#include <linux/errno.h>
> > +#include <linux/skbuff.h>
> > +#include <net/pkt_sched.h>
> > +#include <net/sch_generic.h>
> > +#include <net/inet_ecn.h>
> > +
> > +
> > +/* SKB Priority Queue
> > + * =================================
> > + *
> > + * This qdisc schedules a packet according to skb->priority, where a higher
> > + * value places the packet closer to the exit of the queue. When the queue is
> > + * full, the lowest priority packet in the queue is dropped to make room for
> > + * the packet to be added if it has higher priority. If the packet to be added
> > + * has lower priority than all packets in the queue, it is dropped.
> > + *
> > + * Without the SKB priority queue, queue length limits must be imposed
> > + * for individual queues, and there is no easy way to enforce a global queue
> > + * length limit across all priorities. With the SKBprio queue, a global
> > + * queue length limit can be enforced while not restricting the queue lengths
> > + * of individual priorities.
> > + *
> > + * This is especially useful for a denial-of-service defense system like
> > + * Gatekeeper, which prioritizes packets in flows that demonstrate expected
> > + * behavior of legitimate users. The queue is flexible to allow any number
> > + * of packets of any priority up to the global limit of the scheduler
> > + * without risking resource overconsumption by a flood of low priority packets.
> > + *
> > + * The Gatekeeper codebase is found here:
> > + *
> > + * https://github.com/AltraMayor/gatekeeper
>
> Do we really need to be linking to third party projects in the code? I
> am not even really sure it is related here since I cannot find
> anything that references the queuing discipline.
>
> Also this seems like it should be in the Documentation/networking
> folder rather than being stored in the code itself.
>
The Skbprio qdisc was designed from Gatekeeper's functionality, and we intended
for it to represent Gatekeeper's deployment in production. But I understand
that this should be in Documentation, will change this in v3.
> > + */
> > +
> > +struct skbprio_sched_data {
> > + /* Parameters. */
> > + u32 max_limit;
> > +
> > + /* Queue state. */
> > + struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
> > + struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
> > + u16 highest_prio;
> > + u16 lowest_prio;
> > +};
> > +
> > +static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
> > +{
> > + int prio;
> > +
> > + for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
> > + if (!skb_queue_empty(&q->qdiscs[prio]))
> > + return prio;
> > + }
> > +
> > + /* SKB queue is empty, return 0 (default highest priority setting). */
> > + return 0;
> > +}
> > +
> > +static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
> > +{
> > + int prio;
> > +
> > + for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
> > + if (!skb_queue_empty(&q->qdiscs[prio]))
> > + return prio;
> > + }
> > +
> > + /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
> > + * (default lowest priority setting).
> > + */
> > + return SKBPRIO_MAX_PRIORITY - 1;
> > +}
> > +
> > +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> > + struct sk_buff **to_free)
> > +{
> > + const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + struct sk_buff_head *qdisc;
> > + struct sk_buff_head *lp_qdisc;
> > + struct sk_buff *to_drop;
> > + u16 prio, lp;
> > +
> > + /* Obtain the priority of @skb. */
> > + prio = min(skb->priority, max_priority);
> > +
> > + qdisc = &q->qdiscs[prio];
> > + if (sch->q.qlen < q->max_limit) {
> > + __skb_queue_tail(qdisc, skb);
> > + qdisc_qstats_backlog_inc(sch, skb);
> > + q->qstats[prio].backlog += qdisc_pkt_len(skb);
> > +
> > + /* Check to update highest and lowest priorities. */
> > + if (prio > q->highest_prio)
> > + q->highest_prio = prio;
> > +
> > + if (prio < q->lowest_prio)
> > + q->lowest_prio = prio;
> > +
> > + sch->q.qlen++;
> > + return NET_XMIT_SUCCESS;
> > + }
> > +
> > + /* If this packet has the lowest priority, drop it. */
> > + lp = q->lowest_prio;
> > + if (prio <= lp) {
> > + q->qstats[prio].drops++;
> > + return qdisc_drop(skb, sch, to_free);
> > + }
> > +
> > + /* Drop the packet at the tail of the lowest priority qdisc. */
> > + lp_qdisc = &q->qdiscs[lp];
> > + to_drop = __skb_dequeue_tail(lp_qdisc);
>
> Is there a specific reason for dropping tail instead of head? I would
> think you might see better latency numbers with this if you were to
> focus on dropping older packets from the lowest priority instead of
> the newest ones. Then things like TCP would likely be able to respond
> more quickly to the congestion effects and would be able to more
> accurately determine actual round trip time versus buffer delay.
>
You make a good point that TCP would respond better to congestion by dropping
at the head. But the idea of dropping at the tail was to relieve immediate
congestion (from a denial of service standpoint) prioritizing older packets
in the queue over ingress attack packets. And since Skbprio is designed as a
denial of service defense, wouldn't this be a better solution?
> > + BUG_ON(!to_drop);
> > + qdisc_qstats_backlog_dec(sch, to_drop);
> > + qdisc_drop(to_drop, sch, to_free);
> > +
> > + q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
> > + q->qstats[lp].drops++;
> > +
> > + __skb_queue_tail(qdisc, skb);
> > + qdisc_qstats_backlog_inc(sch, skb);
> > + q->qstats[prio].backlog += qdisc_pkt_len(skb);
> > +
>
> You could probably just take care of enqueueing first, and then take
> care of the dequeue. That way you can drop the references to skb
> earlier from the stack or free up whatever register is storing it.
>
OK, will change this in v3.
> > + /* Check to update highest and lowest priorities. */
> > + if (skb_queue_empty(lp_qdisc)) {
> > + if (q->lowest_prio == q->highest_prio) {
> > + BUG_ON(sch->q.qlen);
> > + q->lowest_prio = prio;
> > + q->highest_prio = prio;
> > + } else {
> > + q->lowest_prio = calc_new_low_prio(q);
> > + }
> > + }
> > +
> > + if (prio > q->highest_prio)
> > + q->highest_prio = prio;
> > +
> > + return NET_XMIT_CN;
> > +}
> > +
> > +static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
> > + struct sk_buff *skb = __skb_dequeue(hpq);
> > +
> > + if (unlikely(!skb))
> > + return NULL;
> > +
> > + sch->q.qlen--;
> > + qdisc_qstats_backlog_dec(sch, skb);
> > + qdisc_bstats_update(sch, skb);
> > +
> > + q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
> > +
> > + /* Update highest priority field. */
> > + if (skb_queue_empty(hpq)) {
> > + if (q->lowest_prio == q->highest_prio) {
> > + BUG_ON(sch->q.qlen);
> > + q->highest_prio = 0;
> > + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > + } else {
> > + q->highest_prio = calc_new_high_prio(q);
> > + }
> > + }
> > + return skb;
> > +}
> > +
> > +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> > + struct netlink_ext_ack *extack)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + struct tc_skbprio_qopt *ctl = nla_data(opt);
> > + const unsigned int min_limit = 1;
> > +
> > + if (ctl->limit == (typeof(ctl->limit))-1)
> > + q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > + else if (ctl->limit < min_limit ||
> > + ctl->limit > qdisc_dev(sch)->tx_queue_len)
> > + return -EINVAL;
> > + else
> > + q->max_limit = ctl->limit;
> > +
> > + return 0;
> > +}
> > +
> > +static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
> > + struct netlink_ext_ack *extack)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + const unsigned int min_limit = 1;
> > + int prio;
> > +
> > + /* Initialise all queues, one for each possible priority. */
> > + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > + __skb_queue_head_init(&q->qdiscs[prio]);
> > +
> > + memset(&q->qstats, 0, sizeof(q->qstats));
> > + q->highest_prio = 0;
> > + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > + if (!opt) {
> > + q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > + return 0;
> > + }
> > + return skbprio_change(sch, opt, extack);
> > +}
> > +
> > +static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + struct tc_skbprio_qopt opt;
> > +
> > + opt.limit = q->max_limit;
> > +
> > + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
> > + return -1;
> > +
> > + return skb->len;
> > +}
> > +
> > +static void skbprio_reset(struct Qdisc *sch)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + int prio;
> > +
> > + sch->qstats.backlog = 0;
> > + sch->q.qlen = 0;
> > +
> > + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > + __skb_queue_purge(&q->qdiscs[prio]);
> > +
> > + memset(&q->qstats, 0, sizeof(q->qstats));
> > + q->highest_prio = 0;
> > + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > +}
> > +
> > +static void skbprio_destroy(struct Qdisc *sch)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + int prio;
> > +
> > + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > + __skb_queue_purge(&q->qdiscs[prio]);
> > +}
> > +
> > +static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
> > +{
> > + return NULL;
> > +}
> > +
> > +static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
> > +{
> > + return 0;
> > +}
> > +
> > +static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
> > + struct sk_buff *skb, struct tcmsg *tcm)
> > +{
> > + tcm->tcm_handle |= TC_H_MIN(cl);
> > + return 0;
> > +}
> > +
> > +static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
> > + struct gnet_dump *d)
> > +{
> > + struct skbprio_sched_data *q = qdisc_priv(sch);
> > + if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
> > + q->qstats[cl - 1].qlen) < 0)
> > + return -1;
> > + return 0;
> > +}
> > +
> > +static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
> > +{
> > + unsigned int i;
> > +
> > + if (arg->stop)
> > + return;
> > +
> > + for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
> > + if (arg->count < arg->skip) {
> > + arg->count++;
> > + continue;
> > + }
> > + if (arg->fn(sch, i + 1, arg) < 0) {
> > + arg->stop = 1;
> > + break;
> > + }
> > + arg->count++;
> > + }
> > +}
> > +
> > +static const struct Qdisc_class_ops skbprio_class_ops = {
> > + .leaf = skbprio_leaf,
> > + .find = skbprio_find,
> > + .dump = skbprio_dump_class,
> > + .dump_stats = skbprio_dump_class_stats,
> > + .walk = skbprio_walk,
> > +};
> > +
> > +static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
> > + .cl_ops = &skbprio_class_ops,
> > + .id = "skbprio",
> > + .priv_size = sizeof(struct skbprio_sched_data),
> > + .enqueue = skbprio_enqueue,
> > + .dequeue = skbprio_dequeue,
> > + .peek = qdisc_peek_dequeued,
> > + .init = skbprio_init,
> > + .reset = skbprio_reset,
> > + .change = skbprio_change,
> > + .dump = skbprio_dump,
> > + .destroy = skbprio_destroy,
> > + .owner = THIS_MODULE,
> > +};
> > +
> > +static int __init skbprio_module_init(void)
> > +{
> > + return register_qdisc(&skbprio_qdisc_ops);
> > +}
> > +
> > +static void __exit skbprio_module_exit(void)
> > +{
> > + unregister_qdisc(&skbprio_qdisc_ops);
> > +}
> > +
> > +module_init(skbprio_module_init)
> > +module_exit(skbprio_module_exit)
> > +
> > +MODULE_LICENSE("GPL");
> > --
> > 1.9.1
> >
Powered by blists - more mailing lists