linux-kernel - Re: performance drop after using blkcg

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1377746412869-710886.post@n7.nabble.com>
Date:	Wed, 28 Aug 2013 20:20:13 -0700 (PDT)
From:	joeytao <husttsq@...il.com>
To:	linux-kernel@...r.kernel.org
Subject: Re: performance drop after using blkcg

Hello, 

I also do these tests and find the same results. IMO, on faster storage with
deep queue depth, if device is asking for more requests,but our workload
can't send enough requests, we have to idle to provide service
differentiation. We'll see performance drop if  applications can't drive
enough  IO  to keep disk busy.Especially for writes, with the effect of disk
cache and deep queue depth, we'll often see performance drop .

So I come up with an approach called Self-adaption blkcg that if the 
average total service time for a request is much less,we don' choose to
idle. Otherwise, we choose to idle to wait for the request. The patch is
below. After large tests,the new scheduler can provide service 
differentiation in most cases. When the application can't drive enough
requests and the mean total service time is very small, we don't choose to
idle. In most cases, the performance doesn't drop after using blkcg and the
service  differentiation is good.

>From 50705c8d4e456d3286e76bed7281796b1e915e0e Mon Sep 17 00:00:00 2001
From: Joeytao <husttsq@...il.com>
Date: Mon, 26 Aug 2013 15:40:39 +0800
Subject: [PATCH] Self-adaption blkcg

---
 block/cfq-iosched.c       |   41 ++++++++++++++++++++++++++++++++++++++---
 include/linux/iocontext.h |    5 +++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 23500ac..79296de 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -288,6 +288,8 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
+    unsigned int cfq_target_latency; 
+    unsigned int cfq_write_isolation; 
 	unsigned int cic_index;
 	struct list_head cic_list;
 
@@ -589,7 +591,7 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group
*cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
-	return cfq_target_latency * cfqg->weight / st->total_weight;
+	return cfqd->cfq_target_latency * cfqg->weight / st->total_weight;
 }
 
 static inline unsigned
@@ -2028,6 +2031,14 @@ static void cfq_arm_slice_timer(struct cfq_data
*cfqd)
 			     cic->ttime_mean);
 		return;
 	}
+	
+	/* 
+	 * added by joeytao,   
+	 * If our average await_time is 0, then don't idle. This is for requests
of 
+	 * write,because if the cache of disk is on, it's no need to wait.
+	 */
+	if(!cfqd->cfq_write_isolation && sample_valid(cic->awtime_samples) &&
(cic->awtime_mean==0))
+		return;
 
 	/* There are other queues in the group, don't do group idle */
 	if (group_idle && cfqq->cfqg->nr_cfqq > 1)
@@ -2243,7 +2254,7 @@ new_workload:
 		 * to have higher weight. A more accurate thing would be to
 		 * calculate system wide asnc/sync ratio.
 		 */
-		tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
+		tmp = cfqd->cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
 		tmp = tmp/cfqd->busy_queues;
 		slice = min_t(unsigned, slice, tmp);
 
@@ -3228,10 +3239,21 @@ err:
 }
 
 static void
+cfq_update_io_awaittime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+{
+	unsigned long elapsed = jiffies - cic->last_end_request;
+	unsigned long awtime = min(elapsed, 2UL * 16);
+
+	cic->awtime_samples = (7*cic->awtime_samples + 256) / 8;
+	cic->awtime_total = (7*cic->awtime_total + 256*awtime) / 8;
+	cic->awtime_mean = (cic->awtime_total + 128) / cic->awtime_samples;
+}   
+
+static void
 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 {
 	unsigned long elapsed = jiffies - cic->last_end_request;
-	unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+	unsigned long ttime = min(elapsed, 2UL * 8);
 
 	cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
 	cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
@@ -3573,6 +3595,7 @@ static void cfq_completed_request(struct request_queue
*q, struct request *rq)
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
 	if (sync) {
+		cfq_update_io_awaittime(cfqd,RQ_CIC(rq)); /* added by joeytao,
2013.8.27*/
 		RQ_CIC(rq)->last_end_request = now;
 		if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
 			cfqd->last_delayed_sync = now;
@@ -4075,6 +4098,12 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_back_penalty = cfq_back_penalty;
 	cfqd->cfq_slice[0] = cfq_slice_async;
 	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_target_latency = cfq_target_latency; /* added by joeytao,
2013.8.5 */
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+	cfqd->cfq_write_isolation = 0; /* added by joeytao, 2013.8.16 */
+#else
+	cfqd->cfq_write_isolation = 1; /* added by joeytao, 2013.8.21 */
+#endif
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_group_idle = cfq_group_idle;
@@ -4154,6 +4183,8 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1],
1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
+SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
+SHOW_FUNCTION(cfq_write_isolation_show, cfqd->cfq_write_isolation, 0); 
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -4187,6 +4218,8 @@ STORE_FUNCTION(cfq_slice_async_store,
&cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
+STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1,
UINT_MAX, 1);
+STORE_FUNCTION(cfq_write_isolation_store, &cfqd->cfq_write_isolation, 0,
UINT_MAX, 0); 
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -4204,6 +4237,8 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_idle),
 	CFQ_ATTR(group_idle),
 	CFQ_ATTR(low_latency),
+	CFQ_ATTR(target_latency),
+	CFQ_ATTR(write_isolation),
 	__ATTR_NULL
 };
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2eee89..0c45b09 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -18,6 +18,11 @@ struct cfq_io_context {
 	unsigned long ttime_samples;
 	unsigned long ttime_mean;
 
+	/* added by joeytao */
+	unsigned long awtime_total;
+	unsigned long awtime_samples;
+	unsigned long awtime_mean;
+
 	struct list_head queue_list;
 	struct hlist_node cic_list;
 
-- 
1.7.1




--
View this message in context: http://linux-kernel.2935.n7.nabble.com/performance-drop-after-using-blkcg-tp567957p710886.html
Sent from the Linux Kernel mailing list archive at Nabble.com.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/