lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 24 Oct 2016 06:27:11 -0800
From:   Kent Overstreet <kent.overstreet@...il.com>
To:     Davidlohr Bueso <dave@...olabs.net>,
        Eric Wheeler <git@...ux.ewheeler.net>
Cc:     Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        linux-bcache@...r.kernel.org
Subject: Re: ciao set_task_state() (was Re: [PATCH -v4 6/8] locking/mutex:
 Restructure wait loop)

On Sun, Oct 23, 2016 at 06:57:26PM -0700, Davidlohr Bueso wrote:
> On Wed, 19 Oct 2016, Peter Zijlstra wrote:
> 
> > Subject: sched: Better explain sleep/wakeup
> > From: Peter Zijlstra <peterz@...radead.org>
> > Date: Wed Oct 19 15:45:27 CEST 2016
> > 
> > There were a few questions wrt how sleep-wakeup works. Try and explain
> > it more.
> > 
> > Requested-by: Will Deacon <will.deacon@....com>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> > ---
> > include/linux/sched.h |   52 ++++++++++++++++++++++++++++++++++----------------
> > kernel/sched/core.c   |   15 +++++++-------
> > 2 files changed, 44 insertions(+), 23 deletions(-)
> > 
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!
> > #define set_task_state(tsk, state_value)			\
> > 	do {							\
> > 		(tsk)->task_state_change = _THIS_IP_;		\
> > -		smp_store_mb((tsk)->state, (state_value));		\
> > +		smp_store_mb((tsk)->state, (state_value));	\
> > 	} while (0)
> > 
> > -/*
> > - * set_current_state() includes a barrier so that the write of current->state
> > - * is correctly serialised wrt the caller's subsequent test of whether to
> > - * actually sleep:
> > - *
> > - *	set_current_state(TASK_UNINTERRUPTIBLE);
> > - *	if (do_i_need_to_sleep())
> > - *		schedule();
> > - *
> > - * If the caller does not need such serialisation then use __set_current_state()
> > - */
> > #define __set_current_state(state_value)			\
> > 	do {							\
> > 		current->task_state_change = _THIS_IP_;		\
> > @@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!
> > #define set_current_state(state_value)				\
> > 	do {							\
> > 		current->task_state_change = _THIS_IP_;		\
> > -		smp_store_mb(current->state, (state_value));		\
> > +		smp_store_mb(current->state, (state_value));	\
> > 	} while (0)
> > 
> > #else
> > 
> > +/*
> > + * @tsk had better be current, or you get to keep the pieces.
> 
> That reminds me we were getting rid of the set_task_state() calls. Bcache was
> pending, being only user in the kernel that doesn't actually use current; but
> instead breaks newly (yet blocked/uninterruptible) created garbage collection
> kthread. I cannot figure out why this is done (ie purposely accounting the
> load avg. Furthermore gc kicks in in very specific scenarios obviously, such
> as as by the allocator task, so I don't see why bcache gc should want to be
> interruptible.
> 
> Kent, Jens, can we get rid of this?

Here's a patch that just fixes the way gc gets woken up. Eric, you want to take
this?

-- >8 --
Subject: [PATCH] bcache: Make gc wakeup saner

This lets us ditch a set_task_state() call.

Signed-off-by: Kent Overstreet <kent.overstreet@...il.com>
---
 drivers/md/bcache/bcache.h  |  4 ++--
 drivers/md/bcache/btree.c   | 39 ++++++++++++++++++++-------------------
 drivers/md/bcache/btree.h   |  3 +--
 drivers/md/bcache/request.c |  4 +---
 drivers/md/bcache/super.c   |  2 ++
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6b420a55c7..c3ea03c9a1 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -425,7 +425,7 @@ struct cache {
 	 * until a gc finishes - otherwise we could pointlessly burn a ton of
 	 * cpu
 	 */
-	unsigned		invalidate_needs_gc:1;
+	unsigned		invalidate_needs_gc;
 
 	bool			discard; /* Get rid of? */
 
@@ -593,8 +593,8 @@ struct cache_set {
 
 	/* Counts how many sectors bio_insert has added to the cache */
 	atomic_t		sectors_to_gc;
+	wait_queue_head_t	gc_wait;
 
-	wait_queue_head_t	moving_gc_wait;
 	struct keybuf		moving_gc_keys;
 	/* Number of moving GC bios in flight */
 	struct semaphore	moving_in_flight;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 81d3db40cd..2efdce0724 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1757,32 +1757,34 @@ static void bch_btree_gc(struct cache_set *c)
 	bch_moving_gc(c);
 }
 
-static int bch_gc_thread(void *arg)
+static bool gc_should_run(struct cache_set *c)
 {
-	struct cache_set *c = arg;
 	struct cache *ca;
 	unsigned i;
 
-	while (1) {
-again:
-		bch_btree_gc(c);
+	for_each_cache(ca, c, i)
+		if (ca->invalidate_needs_gc)
+			return true;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (kthread_should_stop())
-			break;
+	if (atomic_read(&c->sectors_to_gc) < 0)
+		return true;
 
-		mutex_lock(&c->bucket_lock);
+	return false;
+}
 
-		for_each_cache(ca, c, i)
-			if (ca->invalidate_needs_gc) {
-				mutex_unlock(&c->bucket_lock);
-				set_current_state(TASK_RUNNING);
-				goto again;
-			}
+static int bch_gc_thread(void *arg)
+{
+	struct cache_set *c = arg;
 
-		mutex_unlock(&c->bucket_lock);
+	while (1) {
+		wait_event_interruptible(c->gc_wait,
+			   kthread_should_stop() || gc_should_run(c));
 
-		schedule();
+		if (kthread_should_stop())
+			break;
+
+		set_gc_sectors(c);
+		bch_btree_gc(c);
 	}
 
 	return 0;
@@ -1790,11 +1792,10 @@ static int bch_gc_thread(void *arg)
 
 int bch_gc_thread_start(struct cache_set *c)
 {
-	c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
+	c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
 	if (IS_ERR(c->gc_thread))
 		return PTR_ERR(c->gc_thread);
 
-	set_task_state(c->gc_thread, TASK_INTERRUPTIBLE);
 	return 0;
 }
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 5c391fa01b..9b80417cd5 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -260,8 +260,7 @@ void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
 
 static inline void wake_up_gc(struct cache_set *c)
 {
-	if (c->gc_thread)
-		wake_up_process(c->gc_thread);
+	wake_up(&c->gc_wait);
 }
 
 #define MAP_DONE	0
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 40ffe5e424..a37c1776f2 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -196,10 +196,8 @@ static void bch_data_insert_start(struct closure *cl)
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio, *n;
 
-	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
-		set_gc_sectors(op->c);
+	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
 		wake_up_gc(op->c);
-	}
 
 	if (op->bypass)
 		return bch_data_invalidate(cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 849ad441cd..66669c8f41 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1491,6 +1491,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	mutex_init(&c->bucket_lock);
 	init_waitqueue_head(&c->btree_cache_wait);
 	init_waitqueue_head(&c->bucket_wait);
+	init_waitqueue_head(&c->gc_wait);
 	sema_init(&c->uuid_write_mutex, 1);
 
 	spin_lock_init(&c->btree_gc_time.lock);
@@ -1550,6 +1551,7 @@ static void run_cache_set(struct cache_set *c)
 
 	for_each_cache(ca, c, i)
 		c->nbuckets += ca->sb.nbuckets;
+	set_gc_sectors(c);
 
 	if (CACHE_SYNC(&c->sb)) {
 		LIST_HEAD(journal);
-- 
2.9.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ