lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070829204048.GQ14130@parisc-linux.org>
Date:	Wed, 29 Aug 2007 14:40:48 -0600
From:	Matthew Wilcox <matthew@....cx>
To:	Linus Torvalds <torvalds@...l.org>
Cc:	linux-kernel@...r.kernel.org
Subject: [RFC] TASK_KILLED


I've long hated the non-killability of tasks accessing a dead
NFS server.  Linus had an idea for fixing this way back in 2002:
http://www.ussg.iu.edu/hypermail/linux/kernel/0208.0/0167.html which
I've prototyped in this patch.

Splitting up TASK_* into separate bits is going to need a lot more
auditing, I think.  It was easier back in 2002, but since then we've added
TASK_STOPPED and TASK_TRACED which also need to be gingerly checked for.

There's some debug code left in here to discourage Linus from just
applying it.

I've only added one real user of the killable concept to this patch --
try_lock_page().  However, this is enough for 'cat */*/*' to be killable
with a ^C when I unplug the ethernet cord between it and the nfs server.

I have another version of this patch which makes TASK_KILLABLE a separate
state on par with TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE, but I
don't like it as much as this one.  I'll post it if there's demand.

diff --git a/fs/proc/array.c b/fs/proc/array.c
index ee4814d..6a3c876 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -140,13 +140,8 @@ static const char *task_state_array[] = {
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-	unsigned int state = (tsk->state & (TASK_RUNNING |
-					    TASK_INTERRUPTIBLE |
-					    TASK_UNINTERRUPTIBLE |
-					    TASK_STOPPED |
-					    TASK_TRACED)) |
-			(tsk->exit_state & (EXIT_ZOMBIE |
-					    EXIT_DEAD));
+	unsigned int state = (tsk->state & TASK_REPORT) |
+			(tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD));
 	const char **p = &task_state_array[0];
 
 	while (state) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 507ddff..29e3f21 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -220,7 +220,7 @@ Einval:
 
 static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 {
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_KILLABLE);
 	if (!kiocbIsKicked(iocb))
 		schedule();
 	else
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8a83537..56675e4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -154,6 +154,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 }
 
 extern void FASTCALL(__lock_page(struct page *page));
+extern int FASTCALL(__try_lock_page(struct page *page));
 extern void FASTCALL(__lock_page_nosync(struct page *page));
 extern void FASTCALL(unlock_page(struct page *page));
 
@@ -168,6 +169,18 @@ static inline void lock_page(struct page *page)
 }
 
 /*
+ * try_lock_page is like lock_page but sleeps killably.  It returns
+ * 1 if it locked the page and 0 if it was killed while waiting.
+ */
+static inline int try_lock_page(struct page *page)
+{
+	might_sleep();
+	if (TestSetPageLocked(page))
+		return __try_lock_page(page);
+	return 1;
+}
+
+/*
  * lock_page_nosync should only be used if we can't pin the page's inode.
  * Doesn't play quite so well with block device plugging.
  */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd6a032..c0c7d7c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -165,16 +165,35 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
  * mistake.
  */
 #define TASK_RUNNING		0
-#define TASK_INTERRUPTIBLE	1
-#define TASK_UNINTERRUPTIBLE	2
-#define TASK_STOPPED		4
-#define TASK_TRACED		8
+#define __TASK_INTERRUPTIBLE	1
+#define __TASK_UNINTERRUPTIBLE	2
+#define __TASK_STOPPED		4
+#define __TASK_TRACED		8
 /* in tsk->exit_state */
 #define EXIT_ZOMBIE		16
 #define EXIT_DEAD		32
 /* in tsk->state again */
 #define TASK_NONINTERACTIVE	64
 #define TASK_DEAD		128
+#define TASK_WAKEKILL		256
+#define TASK_WAKESIGNAL		512
+#define TASK_LOADAVG		1024
+
+/* Convenience macros for the sake of set_task_state */
+#define TASK_INTERRUPTIBLE	(TASK_WAKESIGNAL | __TASK_INTERRUPTIBLE)
+#define TASK_UNINTERRUPTIBLE	(TASK_LOADAVG | __TASK_UNINTERRUPTIBLE)
+#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
+
+/* Convenience macros for the sake of wake_up */
+#define TASK_NORMAL		(__TASK_INTERRUPTIBLE | __TASK_UNINTERRUPTIBLE)
+#define TASK_ALL		(TASK_WAKESIGNAL | TASK_WAKEKILL | TASK_LOADAVG)
+
+/* get_task_state() */
+#define TASK_REPORT		(TASK_RUNNING | __TASK_INTERRUPTIBLE | \
+				__TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+				__TASK_TRACED)
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0e68628..282efc3 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -155,11 +155,17 @@ wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
 #define wake_up(x)			__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
 #define wake_up_all(x)			__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL)
-#define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
-#define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
-#define wake_up_interruptible_all(x)	__wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
-#define	wake_up_locked(x)		__wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
-#define wake_up_interruptible_sync(x)   __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_locked(x)		__wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
+
+#define wake_up_interruptible(x)	__wake_up(x, TASK_WAKESIGNAL, 1, NULL)
+#define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_WAKESIGNAL, nr, NULL)
+#define wake_up_interruptible_all(x)	__wake_up(x, TASK_WAKESIGNAL, 0, NULL)
+#define wake_up_interruptible_sync(x)	__wake_up_sync((x), TASK_WAKESIGNAL, 1)
+
+#define wake_up_killable(x)		__wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, 1, NULL)
+#define wake_up_killable_nr(x, nr)	__wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, nr, NULL)
+#define wake_up_killable_all(x)		__wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, 0, NULL)
+#define wake_up_killable_sync(x)	__wake_up_sync((x), TASK_WAKESIGNAL | TASK_WAKEKILL, 1)
 
 #define __wait_event(wq, condition) 					\
 do {									\
diff --git a/kernel/sched.c b/kernel/sched.c
index 9fe473a..6901784 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -944,7 +944,7 @@ static int effective_prio(struct task_struct *p)
  */
 static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
 {
-	if (p->state == TASK_UNINTERRUPTIBLE)
+	if (p->state & TASK_LOADAVG)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, wakeup);
@@ -958,7 +958,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
 {
 	update_rq_clock(rq);
 
-	if (p->state == TASK_UNINTERRUPTIBLE)
+	if (p->state & TASK_LOADAVG)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, 0);
@@ -970,7 +970,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
  */
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	if (p->state == TASK_UNINTERRUPTIBLE)
+	if (p->state & TASK_LOADAVG)
 		rq->nr_uninterruptible++;
 
 	dequeue_task(rq, p, sleep);
@@ -1566,8 +1566,7 @@ out:
 
 int fastcall wake_up_process(struct task_struct *p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-				 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+	return try_to_wake_up(p, TASK_ALL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
 
@@ -3489,7 +3488,7 @@ need_resched_nonpreemptible:
 	__update_rq_clock(rq);
 
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+		if (unlikely((prev->state & __TASK_INTERRUPTIBLE) &&
 				unlikely(signal_pending(prev)))) {
 			prev->state = TASK_RUNNING;
 		} else {
@@ -3707,8 +3706,7 @@ void fastcall complete(struct completion *x)
 
 	spin_lock_irqsave(&x->wait.lock, flags);
 	x->done++;
-	__wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-			 1, 0, NULL);
+	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
 	spin_unlock_irqrestore(&x->wait.lock, flags);
 }
 EXPORT_SYMBOL(complete);
@@ -3719,8 +3717,7 @@ void fastcall complete_all(struct completion *x)
 
 	spin_lock_irqsave(&x->wait.lock, flags);
 	x->done += UINT_MAX/2;
-	__wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-			 0, 0, NULL);
+	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
 	spin_unlock_irqrestore(&x->wait.lock, flags);
 }
 EXPORT_SYMBOL(complete_all);
diff --git a/kernel/signal.c b/kernel/signal.c
index ad63109..ce76369 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -459,15 +459,15 @@ void signal_wake_up(struct task_struct *t, int resume)
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
 
 	/*
-	 * For SIGKILL, we want to wake it up in the stopped/traced case.
-	 * We don't check t->state here because there is a race with it
+	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
+	 * case. We don't check t->state here because there is a race with it
 	 * executing another processor and just now entering stopped state.
 	 * By using wake_up_state, we ensure the process will wake up and
 	 * handle its death signal.
 	 */
 	mask = TASK_INTERRUPTIBLE;
 	if (resume)
-		mask |= TASK_STOPPED | TASK_TRACED;
+		mask |= TASK_WAKEKILL;
 	if (!wake_up_state(t, mask))
 		kick_process(t);
 }
@@ -626,7 +626,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 			state = TASK_STOPPED;
 			if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
 				set_tsk_thread_flag(t, TIF_SIGPENDING);
-				state |= TASK_INTERRUPTIBLE;
+				state |= TASK_WAKESIGNAL;
 			}
 			wake_up_state(t, state);
 
@@ -841,7 +841,7 @@ static inline int wants_signal(int sig, struct task_struct *p)
 		return 0;
 	if (sig == SIGKILL)
 		return 1;
-	if (p->state & (TASK_STOPPED | TASK_TRACED))
+	if (p->state & (__TASK_STOPPED | __TASK_TRACED))
 		return 0;
 	return task_curr(p) || !signal_pending(p);
 }
@@ -1446,7 +1446,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	BUG_ON(sig == -1);
 
  	/* do_notify_parent_cldstop should have been called instead.  */
- 	BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
+ 	BUG_ON(tsk->state & (__TASK_STOPPED|__TASK_TRACED));
 
 	BUG_ON(!tsk->ptrace &&
 	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
@@ -1713,7 +1713,7 @@ static int do_signal_stop(int signr)
 			 * so this check has no races.
 			 */
 			if (!t->exit_state &&
-			    !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+			    !(t->state & (__TASK_STOPPED|__TASK_TRACED))) {
 				stop_count++;
 				signal_wake_up(t, 0);
 			}
diff --git a/mm/filemap.c b/mm/filemap.c
index 90b657b..3cb88ff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -170,6 +170,12 @@ static int sync_page(void *word)
 	return 0;
 }
 
+static int sync_page_killable(void *word)
+{
+	sync_page(word);
+	return signal_pending(current);
+}
+
 /**
  * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
  * @mapping:	address space structure to write
@@ -574,6 +580,14 @@ void fastcall __lock_page(struct page *page)
 }
 EXPORT_SYMBOL(__lock_page);
 
+int fastcall __try_lock_page(struct page *page)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+
+	return !__wait_on_bit_lock(page_waitqueue(page), &wait,
+					sync_page_killable, TASK_KILLABLE);
+}
+
 /*
  * Variant of lock_page that does not require the caller to hold a reference
  * on the page's mapping.
@@ -975,7 +989,10 @@ page_ok:
 
 page_not_up_to_date:
 		/* Get exclusive access to the page ... */
-		lock_page(page);
+		if (!try_lock_page(page)) {
+			printk(KERN_EMERG "Taking first path to killed\n");
+			goto readpage_eio;
+		}
 
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
@@ -1003,7 +1020,10 @@ readpage:
 		}
 
 		if (!PageUptodate(page)) {
-			lock_page(page);
+			if (!try_lock_page(page)) {
+				printk(KERN_EMERG "Taking second path to killed\n");
+				goto readpage_eio;
+			}
 			if (!PageUptodate(page)) {
 				if (page->mapping == NULL) {
 					/*
@@ -1014,15 +1034,16 @@ readpage:
 					goto find_page;
 				}
 				unlock_page(page);
-				error = -EIO;
 				shrink_readahead_size_eio(filp, &ra);
-				goto readpage_error;
+				goto readpage_eio;
 			}
 			unlock_page(page);
 		}
 
 		goto page_ok;
 
+readpage_eio:
+		error = -EIO;
 readpage_error:
 		/* UHHUH! A synchronous read error occurred. Report it */
 		desc->error = error;

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ