lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9ea3761d048bf6b5f36c3d57e3eecbce2780b6c3.1453308862.git.shli@fb.com>
Date:	Wed, 20 Jan 2016 09:49:18 -0800
From:	Shaohua Li <shli@...com>
To:	<linux-kernel@...r.kernel.org>
CC:	<axboe@...nel.dk>, <tj@...nel.org>, <vgoyal@...hat.com>,
	<jmoyer@...hat.com>, <Kernel-team@...com>
Subject: [RFC 2/3] blk-throttling: weight based throttling

We know total bandwidth of a disk and can calculate cgroup's bandwidth
percentage against disk bandwidth according to its weight. We can easily
calculate cgroup bandwidth.

Signed-off-by: Shaohua Li <shli@...com>
---
 block/blk-throttle.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 134 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2149a1d..b3f847d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -12,6 +12,9 @@
 #include <linux/blk-cgroup.h>
 #include "blk.h"
 
+#define MAX_WEIGHT (1000)
+#define WEIGHT_RATIO_SHIFT (12)
+#define WEIGHT_RATIO (1 << WEIGHT_RATIO_SHIFT)
 /* Max dispatch from a group in 1 round */
 static int throtl_grp_quantum = 8;
 
@@ -74,6 +77,10 @@ struct throtl_service_queue {
 	unsigned int		nr_pending;	/* # queued in the tree */
 	unsigned long		first_pending_disptime;	/* disptime of the first tg */
 	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
+
+	unsigned int		weight;
+	unsigned int		children_weight;
+	unsigned int		ratio;
 };
 
 enum tg_state_flags {
@@ -152,6 +159,9 @@ struct throtl_data
 
 	/* Work for dispatching throttled bios */
 	struct work_struct dispatch_work;
+
+	bool bw_based;
+	bool weight_based;
 };
 
 static void throtl_pending_timer_fn(unsigned long arg);
@@ -203,6 +213,15 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
 		return container_of(sq, struct throtl_data, service_queue);
 }
 
+static inline uint64_t queue_bandwidth(struct throtl_data *td, int rw)
+{
+	uint64_t bw = td->queue->avg_bw[rw] * 512;
+
+	/* give extra bw, so cgroup can dispatch enough IO */
+	bw += bw >> 3;
+	return bw;
+}
+
 /**
  * throtl_log - log debug message via blktrace
  * @sq: the service_queue being reported
@@ -371,6 +390,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	sq->parent_sq = &td->service_queue;
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
+	sq->parent_sq->children_weight += sq->weight;
 	tg->td = td;
 }
 
@@ -386,7 +406,8 @@ static void tg_update_has_rules(struct throtl_grp *tg)
 
 	for (rw = READ; rw <= WRITE; rw++)
 		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
-				    (tg->bps[rw] != -1 || tg->iops[rw] != -1);
+				    (tg->bps[rw] != -1 || tg->iops[rw] != -1 ||
+				     tg->service_queue.weight);
 }
 
 static void throtl_pd_online(struct blkg_policy_data *pd)
@@ -401,6 +422,10 @@ static void throtl_pd_online(struct blkg_policy_data *pd)
 static void throtl_pd_free(struct blkg_policy_data *pd)
 {
 	struct throtl_grp *tg = pd_to_tg(pd);
+	struct throtl_service_queue *sq = &tg->service_queue;
+
+	if (sq->parent_sq)
+		sq->parent_sq->children_weight -= sq->weight;
 
 	del_timer_sync(&tg->service_queue.pending_timer);
 	kfree(tg);
@@ -898,6 +923,48 @@ static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
 
 }
 
+static void tg_update_bps(struct throtl_grp *tg)
+{
+	struct throtl_service_queue *sq, *parent_sq;
+
+	sq = &tg->service_queue;
+	parent_sq = sq->parent_sq;
+
+	if (!tg->td->weight_based || !parent_sq)
+		return;
+	sq->ratio = max_t(unsigned int,
+		parent_sq->ratio * sq->weight / parent_sq->children_weight,
+		1);
+
+	tg->bps[READ] = max_t(uint64_t,
+		(queue_bandwidth(tg->td, READ) * sq->ratio) >>
+			WEIGHT_RATIO_SHIFT,
+		1024);
+	tg->bps[WRITE] = max_t(uint64_t,
+		(queue_bandwidth(tg->td, WRITE) * sq->ratio) >>
+			WEIGHT_RATIO_SHIFT,
+		1024);
+}
+
+static void tg_update_ratio(struct throtl_grp *tg)
+{
+	struct throtl_data *td = tg->td;
+	struct cgroup_subsys_state *pos_css;
+	struct blkcg_gq *blkg;
+
+	blkg_for_each_descendant_pre(blkg, pos_css, td->queue->root_blkg) {
+		struct throtl_service_queue *sq;
+
+		tg = blkg_to_tg(blkg);
+		sq = &tg->service_queue;
+
+		if (!sq->parent_sq)
+			continue;
+
+		tg_update_bps(tg);
+	}
+}
+
 static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
@@ -1202,12 +1269,65 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 		v = -1;
 
 	tg = blkg_to_tg(ctx.blkg);
+	if (tg->td->weight_based) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
 
 	if (is_u64)
 		*(u64 *)((void *)tg + of_cft(of)->private) = v;
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
+	tg->td->bw_based = true;
+
+	tg_conf_updated(tg);
+	ret = 0;
+out_finish:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+
+static ssize_t tg_set_weight(struct kernfs_open_file *of,
+			   char *buf, size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkg_conf_ctx ctx;
+	struct throtl_grp *tg;
+	int ret;
+	u64 v;
+	int old_weight;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+	if (ret)
+		return ret;
 
+	ret = -EINVAL;
+	if (sscanf(ctx.body, "%llu", &v) != 1)
+		goto out_finish;
+	if (v > MAX_WEIGHT)
+		v = MAX_WEIGHT;
+	if (v == 0)
+		v = 1;
+
+	tg = blkg_to_tg(ctx.blkg);
+	if (tg->td->bw_based) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
+	tg->td->weight_based = true;
+
+	old_weight = tg->service_queue.weight;
+
+	tg->service_queue.weight = v;
+	if (tg->service_queue.parent_sq) {
+		struct throtl_service_queue *psq = tg->service_queue.parent_sq;
+		if (v > old_weight)
+			psq->children_weight += v - old_weight;
+		else if (v < old_weight)
+			psq->children_weight -= old_weight - v;
+	}
+
+	tg_update_ratio(tg);
 	tg_conf_updated(tg);
 	ret = 0;
 out_finish:
@@ -1229,6 +1349,12 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
 
 static struct cftype throtl_legacy_files[] = {
 	{
+		.name = "throttle.weight",
+		.private = offsetof(struct throtl_grp, service_queue.weight),
+		.seq_show = tg_print_conf_uint,
+		.write = tg_set_weight,
+	},
+	{
 		.name = "throttle.read_bps_device",
 		.private = offsetof(struct throtl_grp, bps[READ]),
 		.seq_show = tg_print_conf_u64,
@@ -1313,6 +1439,10 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
 		return ret;
 
 	tg = blkg_to_tg(ctx.blkg);
+	if (tg->td->weight_based) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
 
 	v[0] = tg->bps[READ];
 	v[1] = tg->bps[WRITE];
@@ -1358,6 +1488,7 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
 	tg->bps[WRITE] = v[1];
 	tg->iops[READ] = v[2];
 	tg->iops[WRITE] = v[3];
+	tg->td->bw_based = true;
 
 	tg_conf_updated(tg);
 	ret = 0;
@@ -1415,6 +1546,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 
 	sq = &tg->service_queue;
 
+	tg_update_bps(tg);
 	while (true) {
 		/* throtl is FIFO - if bios are already queued, should queue */
 		if (sq->nr_queued[rw])
@@ -1563,6 +1695,7 @@ int blk_throtl_init(struct request_queue *q)
 
 	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
 	throtl_service_queue_init(&td->service_queue);
+	td->service_queue.ratio = WEIGHT_RATIO;
 
 	q->td = td;
 	td->queue = q;
-- 
2.4.6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ