lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87bm1optqu.fsf@toke.dk>
Date:   Tue, 02 Apr 2019 12:49:13 +0200
From:   Toke Høiland-Jørgensen <toke@...hat.com>
To:     Gautam Ramakrishnan <gautamramk@...il.com>, jhs@...atatu.com
Cc:     davem@...emloft.net, netdev@...r.kernel.org,
        Gautam Ramakrishnan <gautamramk@...il.com>,
        "Mohit P . Tahiliani" <tahiliani@...k.edu.in>,
        "Sachin D . Patil" <sdp.sachin@...il.com>,
        Mohit Bhasi <mohitbhasi1998@...il.com>,
        "V . Saicharan" <vsaicharan1998@...il.com>,
        Leslie Monis <lesliemonis@...il.com>,
        Dave Taht <dave.taht@...il.com>
Subject: Re: [RFC net-next 2/2] net: sched: fq_pie: Flow Queue PIE AQM

Some suggestions below to make fq_pie and fq_codel more similar (ref. my
previous email).

Also, a few unrelated nits.

> From: Mohit P. Tahiliani <tahiliani@...k.edu.in>
>
> FQ-PIE incorporates fair/flow queuing in which every queue
> is managed by an instance of PIE queueing discipline.
> The algorithm provides good control over the queueing delay
> while at the same time, ensures fairness across various
> flows sharing the same link.
>
> Principles:
>   - Packets are classified stochastically on flows.
>   - Each flow has a PIE managed queue.
>   - Flows are linked onto two (Round Robin) lists,
>     so that new flows have priority on old ones.
>   - For a given flow, packets are dropped on arrival according
>     to a drop probability.
>   - Tail drops only.

Why tail drops only?

> Usage:
> tc qdisc ... fq_pie [ limit PACKETS ] [ flows NUMBER ]
>                     [ alpha NUMBER ] [ beta NUMBER ]
>                     [ target TIME us ] [ tupdate TIME us ]
> 		    [ bytemode ] [ quantum BYTES ]
> 		    [ ecn | ecn_prob PERCENTAGE ]
>
> defaults: 1024 flows, 10240 packets limit, quantum : device MTU
>            target: 15ms
>            tupdate: 15ms
>            alpha: 2 (on a scale of 0 to 16)
>            beta: 20 (on a scale of 0 to 16)
>            ecn: false
>            ecn_prob: 10%
>
> Signed-off-by: Mohit P. Tahiliani <tahiliani@...k.edu.in>
> Signed-off-by: Sachin D. Patil <sdp.sachin@...il.com>
> Signed-off-by: Mohit Bhasi <mohitbhasi1998@...il.com>
> Signed-off-by: V. Saicharan <vsaicharan1998@...il.com>
> Signed-off-by: Leslie Monis <lesliemonis@...il.com>
> Signed-off-by: Gautam Ramakrishnan <gautamramk@...il.com>
> Cc: Dave Taht <dave.taht@...il.com>
> ---
>  include/uapi/linux/pkt_sched.h |  28 ++
>  net/sched/Kconfig              |  14 +-
>  net/sched/Makefile             |   1 +
>  net/sched/sch_fq_pie.c         | 485 +++++++++++++++++++++++++++++++++
>  4 files changed, 527 insertions(+), 1 deletion(-)
>  create mode 100644 net/sched/sch_fq_pie.c
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 7ee74c3474bf..005413bd09ee 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -964,6 +964,34 @@ struct tc_pie_xstats {
>  	__u32 ecn_mark;         /* packets marked with ecn*/
>  };
>  
> +/* FQ PIE */
> +enum {
> +	TCA_FQ_PIE_UNSPEC,
> +	TCA_FQ_PIE_TARGET,
> +	TCA_FQ_PIE_LIMIT,
> +	TCA_FQ_PIE_TUPDATE,
> +	TCA_FQ_PIE_ALPHA,
> +	TCA_FQ_PIE_BETA,
> +	TCA_FQ_PIE_ECN,
> +	TCA_FQ_PIE_QUANTUM,
> +	TCA_FQ_PIE_BYTEMODE,
> +	TCA_FQ_PIE_FLOWS,
> +	TCA_FQ_PIE_ECN_PROB,
> +	__TCA_FQ_PIE_MAX
> +};
> +#define TCA_FQ_PIE_MAX   (__TCA_FQ_PIE_MAX - 1)
> +
> +struct tc_fq_pie_xstats {
> +	__u32 packets_in;       /* total number of packets enqueued */
> +	__u32 dropped;          /* packets dropped due to fq_pie_action */
> +	__u32 overlimit;        /* dropped due to lack of space in queue */
> +	__u32 ecn_mark;         /* packets marked with ecn*/
> +	__u32 new_flow_count;   /* number of time packets
> +				   created a 'new flow' */
> +	__u32 new_flows_len;	/* count of flows in new list */
> +	__u32 old_flows_len;	/* count of flows in old list */
> +};
> +
>  /* CBS */
>  struct tc_cbs_qopt {
>  	__u8 offload;
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index 5c02ad97ef23..49f4dd9894a0 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -358,13 +358,25 @@ config NET_SCH_PIE
>  	help
>  	  Say Y here if you want to use the Proportional Integral controller
>  	  Enhanced scheduler packet scheduling algorithm.
> -	  For more information, please see https://tools.ietf.org/html/rfc8033
> +	  For more information, please see
> +	  http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
>  
>  	  To compile this driver as a module, choose M here: the module
>  	  will be called sch_pie.
>  
>  	  If unsure, say N.
>  
> +config NET_SCH_FQ_PIE
> +	tristate "Flow Queue Proportional Integral controller Enhanced (FQ-PIE) scheduler"
> +	help
> +	  Say Y here if you want to use the Flow Queue Proportional Integral controller
> +	  Enhanced scheduler packet scheduling algorithm.
> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called sch_fq_pie.
> +
> +	  If unsure, say N.
> +
>  config NET_SCH_INGRESS
>  	tristate "Ingress/classifier-action Qdisc"
>  	depends on NET_CLS_ACT
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 8a40431d7b5c..fdcd3f7b2fb2 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -55,6 +55,7 @@ obj-$(CONFIG_NET_SCH_CAKE)	+= sch_cake.o
>  obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
>  obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
>  obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
> +obj-$(CONFIG_NET_SCH_FQ_PIE)    += sch_fq_pie.o
>  obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
>  obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
>  obj-$(CONFIG_NET_SCH_TAPRIO)	+= sch_taprio.o
> diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
> new file mode 100644
> index 000000000000..4ccefa0bc7f0
> --- /dev/null
> +++ b/net/sched/sch_fq_pie.c
> @@ -0,0 +1,485 @@
> +/*
> + * net/sched/sch_fq_pie.c
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.

Lose the license boilerplate and replace it with a SPDX header line.

> + * Author: Mohit P. Tahiliani <tahiliani@...k.edu.in>
> + * Author: Sachin D. Patil <sdp.sachin@...il.com>
> + * Author: Mohit Bhasi <mohitbhasi1998@...il.com>
> + * Author: V Saicharan <vsaicharan1998@...il.com>
> + * Author: Leslie Monis <lesliemonis@...il.com>
> + * Author: Gautam Ramakrishnan <gautamramk@...il.com>
> + *
> + * References:
> + * RFC 8033: https://tools.ietf.org/html/rfc8033
> + */
> +
> +#include <linux/jhash.h>
> +#include <linux/vmalloc.h>
> +#include <net/pie.h>
> +
> +struct fq_pie_params {
> +	struct pie_params p_params;
> +	u32 ecn_prob;
> +	u32 flows_cnt;
> +};
> +
> +struct fq_pie_stats {
> +	u32 packets_in;		/* total number of packets enqueued */
> +	u32 dropped;		/* packets dropped due to fq_pie action */
> +	u32 overlimit;		/* dropped due to lack of space in queue */
> +	u32 ecn_mark;		/* packets marked with ECN */
> +	u32 new_flow_count;	/* number of time packets created a new flow */
> +};
> +
> +struct fq_pie_flow {
> +	s32 deficit;		/* number of credits remaining for the flow */
> +	u32 backlog;		/* size of data in the flow */
> +	u32 qlen;		/* number of packets in the flow */
> +	struct sk_buff *head;
> +	struct sk_buff *tail;
> +	struct list_head flowchain;
> +	struct pie_vars vars;	/* pie vars for the flow */
> +	struct pie_stats stats;	/* pie stats for the flow */
> +};
> +
> +struct fq_pie_sched_data {
> +	u32 quantum;		/* number of credits in deficit round robin */
> +	struct fq_pie_flow *flows;
> +	struct Qdisc *sch;
> +	struct fq_pie_params params;
> +	struct fq_pie_stats stats;
> +	struct list_head old_flows;
> +	struct list_head new_flows;
> +	struct timer_list adapt_timer;
> +};

The flow and sched_data structs have quite a bit in common with those in
fq_codel; but the members are in a different order.

> +static void fq_pie_params_init(struct fq_pie_params *params)
> +{
> +	pie_params_init(&params->p_params);
> +	params->ecn_prob = 10;
> +	params->flows_cnt = 1024;
> +}
> +
> +static inline void flow_queue_add(struct fq_pie_flow *flow,
> +				  struct sk_buff *skb)
> +{
> +	if (!flow->head)
> +		flow->head = skb;
> +	else
> +		flow->tail->next = skb;
> +	flow->tail = skb;
> +	skb->next = NULL;
> +}
> +
> +static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +				struct sk_buff **to_free)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	struct fq_pie_flow *sel_flow;
> +	u32 pkt_len;
> +	u32 idx;
> +	u8 enqueue = false;
> +
> +	/* Classifies packet into corresponding flow */
> +	idx = reciprocal_scale(skb_get_hash(skb), q->params.flows_cnt);
> +	sel_flow = &q->flows[idx];

This is missing the ability to override the classification from tc
filters. See fq_codel_classify().

> +
> +	/* Checks if the qdisc is full */
> +	if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
> +		q->stats.overlimit++;
> +		sel_flow->stats.overlimit++;
> +		goto out;
> +	}

The memory_limit checks in fq_codel have turned out to be quite useful
on constrained systems. I'd suggest adding them here as well.

> +
> +	if (!drop_early(sch, sel_flow->backlog, skb->len, &sel_flow->vars,
> +			&q->params.p_params)) {
> +		enqueue = true;
> +	} else if (q->params.p_params.ecn &&
> +		   sel_flow->vars.prob <=
> +		   (MAX_PROB / 100) * q->params.ecn_prob &&
> +		   INET_ECN_set_ce(skb)) {
> +		/* If packet is ecn capable, mark it if drop probability
> +		 * is lower than the parameter ecn_prob, else drop it.
> +		 */
> +		q->stats.ecn_mark++;
> +		sel_flow->stats.ecn_mark++;
> +		enqueue = true;
> +	}
> +	if (enqueue) {
> +		pkt_len = qdisc_pkt_len(skb);
> +		q->stats.packets_in++;
> +		sch->qstats.backlog += pkt_len;
> +		sch->q.qlen++;
> +		flow_queue_add(sel_flow, skb);
> +		if (list_empty(&sel_flow->flowchain)) {
> +			list_add_tail(&sel_flow->flowchain, &q->new_flows);
> +			q->stats.new_flow_count++;
> +			sel_flow->deficit = q->quantum;
> +			sel_flow->stats.dropped = 0;
> +			sel_flow->qlen = 0;
> +			sel_flow->backlog = 0;
> +		}
> +		sel_flow->qlen++;
> +		sel_flow->stats.packets_in++;
> +		sel_flow->backlog += pkt_len;
> +		return NET_XMIT_SUCCESS;
> +	}
> +out:
> +	q->stats.dropped++;
> +	sel_flow->stats.dropped++;
> +	return qdisc_drop(skb, sch, to_free);

You probably want to return NET_XMIT_CN instead of NET_XMIT_DROP here.

> +}
> +
> +static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
> +	[TCA_FQ_PIE_TARGET] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_BETA] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_ECN] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
> +	[TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}
> +};
> +
> +static inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow)
> +{
> +	struct sk_buff *skb = flow->head;
> +
> +	flow->head = skb->next;
> +	skb->next = NULL;
> +	return skb;
> +}
> +
> +static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	struct sk_buff *skb = NULL;
> +	struct fq_pie_flow *flow;
> +	struct list_head *head;
> +	u32 pkt_len;
> +
> +begin:
> +	head = &q->new_flows;
> +	if (list_empty(head)) {
> +		head = &q->old_flows;
> +		if (list_empty(head))
> +			return NULL;
> +	}
> +
> +	flow = list_first_entry(head, struct fq_pie_flow, flowchain);
> +	/* Flow has exhausted all its credits */
> +	if (flow->deficit <= 0) {
> +		flow->deficit += q->quantum;
> +		list_move_tail(&flow->flowchain, &q->old_flows);
> +		goto begin;
> +	}
> +
> +	if (flow->head) {
> +		skb = dequeue_head(flow);
> +		pkt_len = qdisc_pkt_len(skb);
> +		sch->qstats.backlog -= pkt_len;
> +		sch->q.qlen--;
> +		qdisc_bstats_update(sch, skb);
> +	}

If you factor this out into a dequeue_func(), this dequeue() function is
very close to identical to the one in fq_codel().

> +	if (!skb) {
> +		if (head == &q->new_flows && !list_empty(&q->old_flows))
> +			list_move_tail(&flow->flowchain, &q->old_flows);
> +		else
> +			list_del_init(&flow->flowchain);
> +		goto begin;
> +	}
> +
> +	flow->qlen--;
> +	flow->deficit -= pkt_len;
> +	flow->backlog -= pkt_len;
> +	pie_process_dequeue(flow->backlog, skb, &flow->vars);
> +	return skb;
> +}
> +
> +static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
> +			 struct netlink_ext_ack *extack)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	struct nlattr *tb[TCA_FQ_PIE_MAX + 1];
> +	unsigned int len_dropped = 0;
> +	unsigned int num_dropped = 0;
> +	unsigned int qlen;
> +	int err;
> +
> +	if (!opt)
> +		return -EINVAL;
> +
> +	err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, NULL);
> +	if (err < 0)
> +		return err;
> +
> +	if (tb[TCA_FQ_PIE_FLOWS]) {
> +		if (q->flows)
> +			return -EINVAL;
> +		q->params.flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
> +		if (!q->params.flows_cnt ||
> +		    q->params.flows_cnt > 65536)
> +			return -EINVAL;
> +	}
> +
> +	sch_tree_lock(sch);
> +
> +	/* convert from microseconds to pschedtime */
> +	if (tb[TCA_FQ_PIE_TARGET]) {
> +		/* target is in us */
> +		u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
> +
> +		/* convert to pschedtime */
> +		q->params.p_params.target =
> +		PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
> +	}
> +
> +	/* tupdate is in jiffies */
> +	if (tb[TCA_FQ_PIE_TUPDATE])
> +		q->params.p_params.tupdate =
> +		usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
> +
> +	if (tb[TCA_FQ_PIE_LIMIT]) {
> +		u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
> +
> +		q->params.p_params.limit = limit;
> +		sch->limit = limit;
> +	}
> +
> +	if (tb[TCA_FQ_PIE_ECN_PROB])
> +		q->params.ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
> +
> +	if (tb[TCA_FQ_PIE_ALPHA])
> +		q->params.p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
> +
> +	if (tb[TCA_FQ_PIE_BETA])
> +		q->params.p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
> +
> +	if (tb[TCA_FQ_PIE_ECN])
> +		q->params.p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
> +
> +	if (tb[TCA_FQ_PIE_QUANTUM])
> +		q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
> +
> +	if (tb[TCA_FQ_PIE_BYTEMODE])
> +		q->params.p_params.bytemode =
> +		nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
> +
> +	/* Drop excess packets if new limit is lower */
> +	qlen = sch->q.qlen;
> +	while (sch->q.qlen > sch->limit) {
> +		struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
> +
> +		kfree_skb(skb);
> +		len_dropped += qdisc_pkt_len(skb);
> +		num_dropped += 1;
> +	}
> +	qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
> +
> +	sch_tree_unlock(sch);
> +	return 0;
> +}
> +
> +static void fq_pie_timer(struct timer_list *t)
> +{
> +	struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
> +	struct Qdisc *sch = q->sch;
> +	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
> +	u16 idx;
> +
> +	spin_lock(root_lock);
> +
> +	for (idx = 0; idx < q->params.flows_cnt; idx++)
> +		calculate_probability(q->flows[idx].backlog,
> +				      &q->flows[idx].vars,
> +				      &q->params.p_params);
> +
> +	// reset the timer to fire after 'tupdate'. tupdate is in jiffies.
> +	if (q->params.p_params.tupdate)
> +		mod_timer(&q->adapt_timer,
> +			  jiffies + q->params.p_params.tupdate);
> +	spin_unlock(root_lock);
> +}
> +
> +static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
> +		       struct netlink_ext_ack *extack)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	int err;
> +	int i;
> +
> +	fq_pie_params_init(&q->params);
> +	sch->limit = 10 * 1024;
> +	q->params.p_params.limit = sch->limit;
> +	q->quantum = psched_mtu(qdisc_dev(sch));
> +	q->sch = sch;
> +
> +	INIT_LIST_HEAD(&q->new_flows);
> +	INIT_LIST_HEAD(&q->old_flows);
> +
> +	timer_setup(&q->adapt_timer, fq_pie_timer, 0);
> +	mod_timer(&q->adapt_timer, jiffies + HZ / 2);
> +
> +	if (opt) {
> +		int err = fq_pie_change(sch, opt, extack);
> +
> +		if (err)
> +			return err;
> +	}
> +
> +	if (!q->flows) {
> +		q->flows = kvcalloc(q->params.flows_cnt,
> +				    sizeof(struct fq_pie_flow),
> +				    GFP_KERNEL);
> +		if (!q->flows) {
> +			err = -ENOMEM;
> +			goto init_failure;
> +		}
> +		for (i = 0; i < q->params.flows_cnt; i++) {
> +			struct fq_pie_flow *flow = q->flows + i;
> +
> +			INIT_LIST_HEAD(&flow->flowchain);
> +			pie_vars_init(&flow->vars);
> +		}
> +	}
> +	return 0;
> +
> +init_failure:
> +	q->params.flows_cnt = 0;
> +
> +	return err;
> +}
> +
> +static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	struct nlattr *opts;
> +
> +	opts = nla_nest_start(skb, TCA_OPTIONS);
> +	if (!opts)
> +		goto nla_put_failure;
> +
> +	/* convert target from pschedtime to us */
> +	if (nla_put_u32(skb, TCA_FQ_PIE_TARGET,
> +			((u32)PSCHED_TICKS2NS(q->params.p_params.target)) /
> +			NSEC_PER_USEC) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
> +			jiffies_to_usecs(q->params.p_params.tupdate)) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->params.p_params.alpha) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_BETA, q->params.p_params.beta) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_ECN, q->params.p_params.ecn) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE,
> +			q->params.p_params.bytemode) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->params.flows_cnt) ||
> +	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->params.ecn_prob))
> +		goto nla_put_failure;
> +
> +	return nla_nest_end(skb, opts);
> +
> +nla_put_failure:
> +	return -1;
> +}
> +
> +static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	struct tc_fq_pie_xstats st = {
> +		.packets_in	= q->stats.packets_in,
> +		.overlimit	= q->stats.overlimit,
> +		.dropped	= q->stats.dropped,
> +		.ecn_mark	= q->stats.ecn_mark,
> +		.new_flow_count = q->stats.new_flow_count,
> +	};
> +	struct list_head *pos;
> +
> +	sch_tree_lock(sch);
> +	list_for_each(pos, &q->new_flows)
> +		st.new_flows_len++;
> +
> +	list_for_each(pos, &q->old_flows)
> +		st.old_flows_len++;
> +	sch_tree_unlock(sch);
> +
> +	return gnet_stats_copy_app(d, &st, sizeof(st));
> +}
> +
> +static void fq_pie_reset(struct Qdisc *sch)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +	u16 idx;
> +
> +	INIT_LIST_HEAD(&q->new_flows);
> +	INIT_LIST_HEAD(&q->old_flows);
> +	for (idx = 0; idx < q->params.flows_cnt; idx++) {
> +		struct fq_pie_flow *flow = q->flows + idx;
> +
> +		/* Removes all packets from flow */
> +		rtnl_kfree_skbs(flow->head, flow->tail);
> +		flow->head = NULL;
> +
> +		INIT_LIST_HEAD(&flow->flowchain);
> +		pie_vars_init(&flow->vars);
> +	}
> +
> +	sch->q.qlen = 0;
> +	sch->qstats.backlog = 0;
> +}
> +
> +static void fq_pie_destroy(struct Qdisc *sch)
> +{
> +	struct fq_pie_sched_data *q = qdisc_priv(sch);
> +
> +	kvfree(q->flows);
> +	del_timer_sync(&q->adapt_timer);
> +}
> +
> +static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
> +	.id = "fq_pie",
> +	.priv_size	= sizeof(struct fq_pie_sched_data),
> +	.enqueue	= fq_pie_qdisc_enqueue,
> +	.dequeue	= fq_pie_qdisc_dequeue,
> +	.peek		= qdisc_peek_dequeued,
> +	.init		= fq_pie_init,
> +	.destroy	= fq_pie_destroy,
> +	.reset		= fq_pie_reset,
> +	.change		= fq_pie_change,
> +	.dump		= fq_pie_dump,
> +	.dump_stats	= fq_pie_dump_stats,
> +	.owner		= THIS_MODULE,
> +};
> +
> +static int __init fq_pie_module_init(void)
> +{
> +	return register_qdisc(&fq_pie_qdisc_ops);
> +}
> +
> +static void __exit fq_pie_module_exit(void)
> +{
> +	unregister_qdisc(&fq_pie_qdisc_ops);
> +}
> +
> +module_init(fq_pie_module_init);
> +module_exit(fq_pie_module_exit);
> +
> +MODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE) scheduler");
> +MODULE_AUTHOR("Mohit P. Tahiliani");
> +MODULE_AUTHOR("Sachin D. Patil");
> +MODULE_AUTHOR("Mohit Bhasi");
> +MODULE_AUTHOR("V Saicharan");
> +MODULE_AUTHOR("Leslie Monis");
> +MODULE_AUTHOR("Gautam Ramakrishnan");
> +MODULE_LICENSE("GPL");
> -- 
> 2.17.1

Powered by blists - more mailing lists