lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160901113919.GI10138@twins.programming.kicks-ass.net>
Date:   Thu, 1 Sep 2016 13:39:19 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Oleg Nesterov <oleg@...hat.com>
Cc:     Ingo Molnar <mingo@...nel.org>, Al Viro <viro@...IV.linux.org.uk>,
        Bart Van Assche <bvanassche@....org>,
        Johannes Weiner <hannes@...xchg.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Neil Brown <neilb@...e.de>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] sched/wait: abort_exclusive_wait() should pass
 TASK_NORMAL to wake_up()

On Fri, Aug 26, 2016 at 02:45:28PM +0200, Oleg Nesterov wrote:
> Otherwise this logic only works if mode is "compatible" with another
> exclusive waiter.
> 
> If some wq has both TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE waiters,
> abort_exclusive_wait() won't wait an uninterruptible waiter.
> 
> The main user is __wait_on_bit_lock() and currently it is fine but only
> because TASK_KILLABLE includes TASK_UNINTERRUPTIBLE and we do not have
> lock_page_interruptible() yet.

So mixing INTERRUPTIBLE and UNINTERRUPTIBLE and then not using
TASK_NORMAL for wakeups is a mis-feature/abuse of waitqueues IMO.

That said, people do 'creative' things, so maybe we should add some
debug infra to detect this mis-match.

Something like the below perhaps? It will miss people using the (old)
add_wait_queue() (which are plenty :/) but there's nothing quick we can
do about those.

Completely untested..

---
 include/linux/wait.h | 13 ++++++++++++-
 kernel/sched/wait.c  | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3ff74d764fa..e99ea720c5f9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -39,6 +39,9 @@ struct wait_bit_queue {
 struct __wait_queue_head {
 	spinlock_t		lock;
 	struct list_head	task_list;
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	unsigned int		state;
+#endif
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -48,6 +51,13 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)				\
+	.state = -1,
+#else
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)
+#endif
+
 #define __WAITQUEUE_INITIALIZER(name, tsk) {				\
 	.private	= tsk,						\
 	.func		= default_wake_function,			\
@@ -58,7 +68,8 @@ struct task_struct;
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+	.task_list	= { &(name).task_list, &(name).task_list },	\
+	__DEBUG_WAIT_QUEUE_HEAD_INIT(name) }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6a538a..cb71c56c5e76 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -16,6 +16,9 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c
 	spin_lock_init(&q->lock);
 	lockdep_set_class_and_name(&q->lock, key, name);
 	INIT_LIST_HEAD(&q->task_list);
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	q->state = -1;
+#endif
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
@@ -67,6 +70,16 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 {
 	wait_queue_t *curr, *next;
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	if (q->state != -1) {
+		/*
+		 * WARN if we have INTERRUPTIBLE and UNINTERRUPTIBLE
+		 * waiters and do not use TASK_NORMAL to wake.
+		 */
+		WARN_ON_ONCE(q->state != (mode & TASK_NORMAL));
+	}
+#endif
+
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
 
@@ -156,6 +169,17 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
+static inline void prepare_debug(struct wait_queue_head *q, int state)
+{
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	if (q->state == -1) {
+		q->state = state & TASK_NORMAL;
+	} else {
+		q->state |= state & TASK_NORMAL;
+	}
+#endif
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
@@ -178,6 +202,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue(q, wait);
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -192,6 +217,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue_tail(q, wait);
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
@@ -214,6 +240,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 			__add_wait_queue(q, wait);
 	}
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 
 	return 0;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ