lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201024091838.gzhn2dlx2j7xnixg@linutronix.de>
Date:   Sat, 24 Oct 2020 11:18:38 +0200
From:   Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        linux-rt-users <linux-rt-users@...r.kernel.org>,
        Steven Rostedt <rostedt@...dmis.org>
Subject: [ANNOUNCE] v5.9.1-rt19

Dear RT folks!

I'm pleased to announce the v5.9.1-rt19 patch set. 

Changes since v5.9.1-rt18:

  - Mike Galbraith reported a possible circular locking dependency with
    a seqcount. Backported a patch from upstream solving the issue.

  - David Runge reported a crash in the block layer.

  - The migrate-disable series by Peter Zijlstra has been update to v4.

  - Mike Galbraith reported a possible hang due to the new printk code.
    Dropped a patch from the printk code which was causing the problem
    as suggested by John Ogness.

  - The rtmutex clean up in v5.9-rc8-rt12 restructured the code path and
    removed the blk_schedule_flush_plug() invocation from the locking
    path. It turns out that it is still required and has been added
    back.

  - A small rtmutex related clean up:

    - Remove rt_mutex_lock_killable(), it has no users.

    - Use _mutex_lock_io_nested() for _mutex_lock_io() to avoid
      duplicated code.

Known issues
     - It has been pointed out that due to changes to the printk code the
       internal buffer representation changed. This is only an issue if tools
       like `crash' are used to extract the printk buffer from a kernel memory
       image.

The delta patch against v5.9.1-rt18 is appended below and can be found here:
 
     https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/incr/patch-5.9.1-rt18-rt19.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.9.1-rt19

The RT patch against v5.9.1 can be found here:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patch-5.9.1-rt19.patch.xz

The split quilt queue is available at:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patches-5.9.1-rt19.tar.xz

Sebastian

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e37aa31332b70..99d2fb51e0e84 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -647,7 +647,7 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
 {
 	int cpu = raw_smp_processor_id();
 
-	if (!IS_ENABLED(CONFIG_SMP) ||
+	if (!IS_ENABLED(CONFIG_SMP) || IS_ENABLED(CONFIG_PREEMPT_RT) ||
 	    !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
 		return false;
 
diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
index 7179367bfb5e2..f0b2e07cd5c57 100644
--- a/include/linux/mutex_rt.h
+++ b/include/linux/mutex_rt.h
@@ -29,7 +29,6 @@ struct mutex {
 
 extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
 extern void __lockfunc _mutex_lock(struct mutex *lock);
-extern void __lockfunc _mutex_lock_io(struct mutex *lock);
 extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass);
 extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
 extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
@@ -46,7 +45,7 @@ extern void __lockfunc _mutex_unlock(struct mutex *lock);
 #define mutex_lock_killable(l)		_mutex_lock_killable(l)
 #define mutex_trylock(l)		_mutex_trylock(l)
 #define mutex_unlock(l)			_mutex_unlock(l)
-#define mutex_lock_io(l)		_mutex_lock_io(l);
+#define mutex_lock_io(l)		_mutex_lock_io_nested(l, 0);
 
 #define __mutex_owner(l)		((l)->lock.owner)
 
@@ -77,7 +76,7 @@ do {									\
 # define mutex_lock_killable_nested(l, s) \
 					_mutex_lock_killable(l)
 # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
-# define mutex_lock_io_nested(l, s)	_mutex_lock_io(l)
+# define mutex_lock_io_nested(l, s)	_mutex_lock_io_nested(l, s)
 #endif
 
 # define mutex_init(mutex)				\
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 5308cd7ddddf0..b02009f530263 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -112,7 +112,6 @@ extern void rt_mutex_lock(struct rt_mutex *lock);
 #endif
 
 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
-extern int rt_mutex_lock_killable(struct rt_mutex *lock);
 extern int rt_mutex_trylock(struct rt_mutex *lock);
 
 extern void rt_mutex_unlock(struct rt_mutex *lock);
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f73c7eb68f27c..76e44e6c01004 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -173,6 +173,19 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * @lock:	Pointer to the associated lock
  */
 
+#define seqcount_LOCKNAME_init(s, _lock, lockname)			\
+	do {								\
+		seqcount_##lockname##_t *____s = (s);			\
+		seqcount_init(&____s->seqcount);			\
+		__SEQ_LOCK(____s->lock = (_lock));			\
+	} while (0)
+
+#define seqcount_raw_spinlock_init(s, lock)	seqcount_LOCKNAME_init(s, lock, raw_spinlock)
+#define seqcount_spinlock_init(s, lock)		seqcount_LOCKNAME_init(s, lock, spinlock)
+#define seqcount_rwlock_init(s, lock)		seqcount_LOCKNAME_init(s, lock, rwlock);
+#define seqcount_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, mutex);
+#define seqcount_ww_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, ww_mutex);
+
 /*
  * SEQCOUNT_LOCKNAME()	- Instantiate seqcount_LOCKNAME_t and helpers
  * seqprop_LOCKNAME_*()	- Property accessors for seqcount_LOCKNAME_t
@@ -190,13 +203,6 @@ typedef struct seqcount_##lockname {					\
 	__SEQ_LOCK(locktype	*lock);					\
 } seqcount_##lockname##_t;						\
 									\
-static __always_inline void						\
-seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
-{									\
-	seqcount_init(&s->seqcount);					\
-	__SEQ_LOCK(s->lock = lock);					\
-}									\
-									\
 static __always_inline seqcount_t *					\
 __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 {									\
@@ -284,8 +290,8 @@ SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mu
 	__SEQ_LOCK(.lock	= (assoc_lock))				\
 }
 
-#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 #define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
 #define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
 #define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
diff --git a/kernel/locking/mutex-rt.c b/kernel/locking/mutex-rt.c
index 35b06711997dd..2b849e6b9b4ae 100644
--- a/kernel/locking/mutex-rt.c
+++ b/kernel/locking/mutex-rt.c
@@ -65,6 +65,7 @@
 #include <linux/fs.h>
 #include <linux/futex.h>
 #include <linux/hrtimer.h>
+#include <linux/blkdev.h>
 
 #include "rtmutex_common.h"
 
@@ -85,55 +86,24 @@ void __mutex_do_init(struct mutex *mutex, const char *name,
 }
 EXPORT_SYMBOL(__mutex_do_init);
 
+static int _mutex_lock_blk_flush(struct mutex *lock, int state)
+{
+	/*
+	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
+	 * late if one of the callbacks needs to acquire a sleeping lock.
+	 */
+	if (blk_needs_flush_plug(current))
+		blk_schedule_flush_plug(current);
+	return __rt_mutex_lock_state(&lock->lock, state);
+}
+
 void __lockfunc _mutex_lock(struct mutex *lock)
 {
 	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
+	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(_mutex_lock);
 
-void __lockfunc _mutex_lock_io(struct mutex *lock)
-{
-	int token;
-
-	token = io_schedule_prepare();
-	_mutex_lock(lock);
-	io_schedule_finish(token);
-}
-EXPORT_SYMBOL_GPL(_mutex_lock_io);
-
-int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
-{
-	int ret;
-
-	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
-	if (ret)
-		mutex_release(&lock->dep_map, _RET_IP_);
-	return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_interruptible);
-
-int __lockfunc _mutex_lock_killable(struct mutex *lock)
-{
-	int ret;
-
-	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
-	if (ret)
-		mutex_release(&lock->dep_map, _RET_IP_);
-	return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_killable);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
-{
-	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
-	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(_mutex_lock_nested);
-
 void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
 {
 	int token;
@@ -147,10 +117,42 @@ void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
 }
 EXPORT_SYMBOL_GPL(_mutex_lock_io_nested);
 
+int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
+{
+	int ret;
+
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
+	if (ret)
+		mutex_release(&lock->dep_map, _RET_IP_);
+	return ret;
+}
+EXPORT_SYMBOL(_mutex_lock_interruptible);
+
+int __lockfunc _mutex_lock_killable(struct mutex *lock)
+{
+	int ret;
+
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
+	if (ret)
+		mutex_release(&lock->dep_map, _RET_IP_);
+	return ret;
+}
+EXPORT_SYMBOL(_mutex_lock_killable);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
+{
+	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
+	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(_mutex_lock_nested);
+
 void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
 {
 	mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
-	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
+	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(_mutex_lock_nest_lock);
 
@@ -159,7 +161,7 @@ int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass
 	int ret;
 
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
-	ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
+	ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
 	if (ret)
 		mutex_release(&lock->dep_map, _RET_IP_);
 	return ret;
@@ -171,7 +173,7 @@ int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
 	int ret;
 
 	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-	ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
+	ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
 	if (ret)
 		mutex_release(&lock->dep_map, _RET_IP_);
 	return ret;
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index ef22e1b52f8c8..cc0d7e99be00a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -2034,21 +2034,6 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
 	return __rt_mutex_slowtrylock(lock);
 }
 
-/**
- * rt_mutex_lock_killable - lock a rt_mutex killable
- *
- * @lock:		the rt_mutex to be locked
- *
- * Returns:
- *  0		on success
- * -EINTR	when interrupted by a signal
- */
-int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
-{
-	return rt_mutex_lock_state(lock, 0, TASK_KILLABLE);
-}
-EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
-
 int __sched __rt_mutex_trylock(struct rt_mutex *lock)
 {
 #ifdef CONFIG_PREEMPT_RT
diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
index bca7a448206d6..ab05ce0903537 100644
--- a/kernel/locking/rwsem-rt.c
+++ b/kernel/locking/rwsem-rt.c
@@ -3,6 +3,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/signal.h>
 #include <linux/export.h>
+#include <linux/blkdev.h>
 
 #include "rtmutex_common.h"
 
@@ -87,6 +88,13 @@ static int __sched __down_read_common(struct rw_semaphore *sem, int state)
 	if (__down_read_trylock(sem))
 		return 0;
 
+	/*
+	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
+	 * late if one of the callbacks needs to acquire a sleeping lock.
+	 */
+	if (blk_needs_flush_plug(current))
+		blk_schedule_flush_plug(current);
+
 	might_sleep();
 	raw_spin_lock_irq(&m->wait_lock);
 	/*
@@ -209,6 +217,13 @@ static int __sched __down_write_common(struct rw_semaphore *sem, int state)
 	struct rt_mutex *m = &sem->rtmutex;
 	unsigned long flags;
 
+	/*
+	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
+	 * late if one of the callbacks needs to acquire a sleeping lock.
+	 */
+	if (blk_needs_flush_plug(current))
+		blk_schedule_flush_plug(current);
+
 	/* Take the rtmutex as a first step */
 	if (__rt_mutex_lock_state(m, state))
 		return -EINTR;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 78a277ea5c351..0c56873396a99 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3140,66 +3140,6 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
 }
 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
 
-/**
- * pr_flush() - Wait for printing threads to catch up.
- *
- * @timeout_ms:        The maximum time (in ms) to wait.
- * @reset_on_progress: Reset the timeout if forward progress is seen.
- *
- * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
- * represents infinite waiting.
- *
- * If @reset_on_progress is true, the timeout will be reset whenever any
- * printer has been seen to make some forward progress.
- *
- * Context: Any context if @timeout_ms is 0. Otherwise process context and
- * may sleep if a printer is not caught up.
- * Return: true if all enabled printers are caught up.
- */
-static bool pr_flush(int timeout_ms, bool reset_on_progress)
-{
-	int remaining = timeout_ms;
-	struct console *con;
-	u64 last_diff = 0;
-	u64 printk_seq;
-	u64 diff;
-	u64 seq;
-
-	seq = prb_next_seq(prb);
-
-	for (;;) {
-		diff = 0;
-
-		for_each_console(con) {
-			if (!(con->flags & CON_ENABLED))
-				continue;
-			printk_seq = atomic64_read(&con->printk_seq);
-			if (printk_seq < seq)
-				diff += seq - printk_seq;
-		}
-
-		if (diff != last_diff && reset_on_progress)
-			remaining = timeout_ms;
-
-		if (!diff || remaining == 0)
-			break;
-
-		if (remaining < 0) {
-			msleep(100);
-		} else if (remaining < 100) {
-			msleep(remaining);
-			remaining = 0;
-		} else {
-			msleep(100);
-			remaining -= 100;
-		}
-
-		last_diff = diff;
-	}
-
-	return (diff == 0);
-}
-
 /**
  * kmsg_dump - dump kernel log to kernel message dumpers.
  * @reason: the reason (oops, panic etc) for dumping
@@ -3222,12 +3162,6 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 			sync_mode = true;
 			pr_info("enabled sync mode\n");
 		}
-
-		/*
-		 * Give the printing threads time to flush, allowing up to 1
-		 * second of no printing forward progress before giving up.
-		 */
-		pr_flush(1000, true);
 	}
 
 	rcu_read_lock();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d54f1e7ef867..34158d8ecf194 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1903,9 +1903,16 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 }
 
 struct migration_arg {
-	struct task_struct *task;
-	int dest_cpu;
-	struct completion *done;
+	struct task_struct		*task;
+	int				dest_cpu;
+	struct set_affinity_pending	*pending;
+};
+
+struct set_affinity_pending {
+	refcount_t		refs;
+	struct completion	done;
+	struct cpu_stop_work	stop_work;
+	struct migration_arg	arg;
 };
 
 /*
@@ -1937,8 +1944,10 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
  */
 static int migration_cpu_stop(void *data)
 {
+	struct set_affinity_pending *pending;
 	struct migration_arg *arg = data;
 	struct task_struct *p = arg->task;
+	int dest_cpu = arg->dest_cpu;
 	struct rq *rq = this_rq();
 	bool complete = false;
 	struct rq_flags rf;
@@ -1947,7 +1956,7 @@ static int migration_cpu_stop(void *data)
 	 * The original target CPU might have gone down and we might
 	 * be on another CPU but it doesn't matter.
 	 */
-	local_irq_disable();
+	local_irq_save(rf.flags);
 	/*
 	 * We need to explicitly wake pending tasks before running
 	 * __migrate_task() such that we will not miss enforcing cpus_ptr
@@ -1957,6 +1966,8 @@ static int migration_cpu_stop(void *data)
 
 	raw_spin_lock(&p->pi_lock);
 	rq_lock(rq, &rf);
+
+	pending = p->migration_pending;
 	/*
 	 * If task_rq(p) != rq, it cannot be migrated here, because we're
 	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1966,23 +1977,71 @@ static int migration_cpu_stop(void *data)
 		if (is_migration_disabled(p))
 			goto out;
 
-		if (task_on_rq_queued(p))
-			rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
-		else
-			p->wake_cpu = arg->dest_cpu;
-
-		if (arg->done) {
+		if (pending) {
 			p->migration_pending = NULL;
 			complete = true;
 		}
+
+		/* migrate_enable() --  we must not race against SCA */
+		if (dest_cpu < 0) {
+			/*
+			 * When this was migrate_enable() but we no longer
+			 * have a @pending, a concurrent SCA 'fixed' things
+			 * and we should be valid again. Nothing to do.
+			 */
+			if (!pending) {
+				WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq)));
+				goto out;
+			}
+
+			dest_cpu = cpumask_any_distribute(&p->cpus_mask);
+		}
+
+		if (task_on_rq_queued(p))
+			rq = __migrate_task(rq, &rf, p, dest_cpu);
+		else
+			p->wake_cpu = dest_cpu;
+
+	} else if (dest_cpu < 0) {
+		/*
+		 * This happens when we get migrated between migrate_enable()'s
+		 * preempt_enable() and scheduling the stopper task. At that
+		 * point we're a regular task again and not current anymore.
+		 *
+		 * A !PREEMPT kernel has a giant hole here, which makes it far
+		 * more likely.
+		 */
+
+		/*
+		 * When this was migrate_enable() but we no longer have an
+		 * @pending, a concurrent SCA 'fixed' things and we should be
+		 * valid again. Nothing to do.
+		 */
+		if (!pending) {
+			WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq)));
+			goto out;
+		}
+
+		/*
+		 * When migrate_enable() hits a rq mis-match we can't reliably
+		 * determine is_migration_disabled() and so have to chase after
+		 * it.
+		 */
+		task_rq_unlock(rq, p, &rf);
+		stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
+				    &pending->arg, &pending->stop_work);
+		return 0;
 	}
 out:
-	rq_unlock(rq, &rf);
-	raw_spin_unlock(&p->pi_lock);
-	local_irq_enable();
+	task_rq_unlock(rq, p, &rf);
 
 	if (complete)
-		complete_all(arg->done);
+		complete_all(&pending->done);
+
+	/* For pending->{arg,stop_work} */
+	pending = arg->pending;
+	if (pending && refcount_dec_and_test(&pending->refs))
+		wake_up_var(&pending->refs);
 
 	return 0;
 }
@@ -2095,13 +2154,6 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 	__do_set_cpus_allowed(p, new_mask, 0);
 }
 
-struct set_affinity_pending {
-	refcount_t		refs;
-	struct completion	done;
-	struct cpu_stop_work	stop_work;
-	struct migration_arg	arg;
-};
-
 /*
  * This function is wildly self concurrent; here be dragons.
  *
@@ -2173,8 +2225,8 @@ struct set_affinity_pending {
  * pending affinity completion is preceded an uninstallion of
  * p->migration_pending done with p->pi_lock held.
  */
-static int affine_move_task(struct rq *rq, struct rq_flags *rf,
-			    struct task_struct *p, int dest_cpu, unsigned int flags)
+static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
+			    int dest_cpu, unsigned int flags)
 {
 	struct set_affinity_pending my_pending = { }, *pending = NULL;
 	struct migration_arg arg = {
@@ -2240,13 +2292,18 @@ static int affine_move_task(struct rq *rq, struct rq_flags *rf,
 	if (WARN_ON_ONCE(!pending))
 		return -EINVAL;
 
-	arg.done = &pending->done;
-
 	if (flags & SCA_MIGRATE_ENABLE) {
 
+		refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
 		p->migration_flags &= ~MDF_PUSH;
 		task_rq_unlock(rq, p, rf);
-		pending->arg = arg;
+
+		pending->arg = (struct migration_arg) {
+			.task = p,
+			.dest_cpu = -1,
+			.pending = pending,
+		};
+
 		stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
 				    &pending->arg, &pending->stop_work);
 
@@ -2370,7 +2427,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 			p->nr_cpus_allowed != 1);
 	}
 
-	return affine_move_task(rq, &rf, p, dest_cpu, flags);
+	return affine_move_task(rq, p, &rf, dest_cpu, flags);
 
 out:
 	task_rq_unlock(rq, p, &rf);
@@ -3609,6 +3666,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	init_numa_balancing(clone_flags, p);
 #ifdef CONFIG_SMP
 	p->wake_entry.u_flags = CSD_TYPE_TTWU;
+	p->migration_pending = NULL;
 #endif
 }
 
diff --git a/localversion-rt b/localversion-rt
index 9e7cd66d9f44f..483ad771f201a 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt18
+-rt19

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ