[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1188479497.6755.12.camel@heimdal.trondhjem.org>
Date: Thu, 30 Aug 2007 09:11:37 -0400
From: Trond Myklebust <trond.myklebust@....uio.no>
To: Matthew Wilcox <matthew@....cx>
Cc: Linus Torvalds <torvalds@...l.org>, linux-kernel@...r.kernel.org
Subject: Re: [RFC] TASK_KILLED
On Wed, 2007-08-29 at 14:40 -0600, Matthew Wilcox wrote:
> I've long hated the non-killability of tasks accessing a dead
> NFS server. Linus had an idea for fixing this way back in 2002:
> http://www.ussg.iu.edu/hypermail/linux/kernel/0208.0/0167.html which
> I've prototyped in this patch.
>
> Splitting up TASK_* into separate bits is going to need a lot more
> auditing, I think. It was easier back in 2002, but since then we've added
> TASK_STOPPED and TASK_TRACED which also need to be gingerly checked for.
>
> There's some debug code left in here to discourage Linus from just
> applying it.
>
> I've only added one real user of the killable concept to this patch --
> try_lock_page(). However, this is enough for 'cat */*/*' to be killable
> with a ^C when I unplug the ethernet cord between it and the nfs server.
>
> I have another version of this patch which makes TASK_KILLABLE a separate
> state on par with TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE, but I
> don't like it as much as this one. I'll post it if there's demand.
>
> diff --git a/fs/proc/array.c b/fs/proc/array.c
> index ee4814d..6a3c876 100644
> --- a/fs/proc/array.c
> +++ b/fs/proc/array.c
> @@ -140,13 +140,8 @@ static const char *task_state_array[] = {
>
> static inline const char *get_task_state(struct task_struct *tsk)
> {
> - unsigned int state = (tsk->state & (TASK_RUNNING |
> - TASK_INTERRUPTIBLE |
> - TASK_UNINTERRUPTIBLE |
> - TASK_STOPPED |
> - TASK_TRACED)) |
> - (tsk->exit_state & (EXIT_ZOMBIE |
> - EXIT_DEAD));
> + unsigned int state = (tsk->state & TASK_REPORT) |
> + (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD));
> const char **p = &task_state_array[0];
>
> while (state) {
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 507ddff..29e3f21 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -220,7 +220,7 @@ Einval:
>
> static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
> {
> - set_current_state(TASK_UNINTERRUPTIBLE);
> + set_current_state(TASK_KILLABLE);
> if (!kiocbIsKicked(iocb))
> schedule();
> else
Won't this change just cause functions like do_sync_read() and
do_sync_write() to loop forever when you kill the process?
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 8a83537..56675e4 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -154,6 +154,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
> }
>
> extern void FASTCALL(__lock_page(struct page *page));
> +extern int FASTCALL(__try_lock_page(struct page *page));
> extern void FASTCALL(__lock_page_nosync(struct page *page));
> extern void FASTCALL(unlock_page(struct page *page));
>
> @@ -168,6 +169,18 @@ static inline void lock_page(struct page *page)
> }
>
> /*
> + * try_lock_page is like lock_page but sleeps killably. It returns
> + * 1 if it locked the page and 0 if it was killed while waiting.
> + */
> +static inline int try_lock_page(struct page *page)
> +{
> + might_sleep();
> + if (TestSetPageLocked(page))
> + return __try_lock_page(page);
> + return 1;
> +}
> +
> +/*
> * lock_page_nosync should only be used if we can't pin the page's inode.
> * Doesn't play quite so well with block device plugging.
> */
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index bd6a032..c0c7d7c 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -165,16 +165,35 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
> * mistake.
> */
> #define TASK_RUNNING 0
> -#define TASK_INTERRUPTIBLE 1
> -#define TASK_UNINTERRUPTIBLE 2
> -#define TASK_STOPPED 4
> -#define TASK_TRACED 8
> +#define __TASK_INTERRUPTIBLE 1
> +#define __TASK_UNINTERRUPTIBLE 2
> +#define __TASK_STOPPED 4
> +#define __TASK_TRACED 8
> /* in tsk->exit_state */
> #define EXIT_ZOMBIE 16
> #define EXIT_DEAD 32
> /* in tsk->state again */
> #define TASK_NONINTERACTIVE 64
> #define TASK_DEAD 128
> +#define TASK_WAKEKILL 256
> +#define TASK_WAKESIGNAL 512
> +#define TASK_LOADAVG 1024
> +
> +/* Convenience macros for the sake of set_task_state */
> +#define TASK_INTERRUPTIBLE (TASK_WAKESIGNAL | __TASK_INTERRUPTIBLE)
> +#define TASK_UNINTERRUPTIBLE (TASK_LOADAVG | __TASK_UNINTERRUPTIBLE)
> +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
> +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
> +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
> +
> +/* Convenience macros for the sake of wake_up */
> +#define TASK_NORMAL (__TASK_INTERRUPTIBLE | __TASK_UNINTERRUPTIBLE)
> +#define TASK_ALL (TASK_WAKESIGNAL | TASK_WAKEKILL | TASK_LOADAVG)
> +
> +/* get_task_state() */
> +#define TASK_REPORT (TASK_RUNNING | __TASK_INTERRUPTIBLE | \
> + __TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
> + __TASK_TRACED)
>
> #define __set_task_state(tsk, state_value) \
> do { (tsk)->state = (state_value); } while (0)
> diff --git a/include/linux/wait.h b/include/linux/wait.h
> index 0e68628..282efc3 100644
> --- a/include/linux/wait.h
> +++ b/include/linux/wait.h
> @@ -155,11 +155,17 @@ wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
> #define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
> #define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
> #define wake_up_all(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL)
> -#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
> -#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
> -#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
> -#define wake_up_locked(x) __wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
> -#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
> +#define wake_up_locked(x) __wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
> +
> +#define wake_up_interruptible(x) __wake_up(x, TASK_WAKESIGNAL, 1, NULL)
> +#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_WAKESIGNAL, nr, NULL)
> +#define wake_up_interruptible_all(x) __wake_up(x, TASK_WAKESIGNAL, 0, NULL)
> +#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_WAKESIGNAL, 1)
> +
> +#define wake_up_killable(x) __wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, 1, NULL)
> +#define wake_up_killable_nr(x, nr) __wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, nr, NULL)
> +#define wake_up_killable_all(x) __wake_up(x, TASK_WAKESIGNAL | TASK_WAKEKILL, 0, NULL)
> +#define wake_up_killable_sync(x) __wake_up_sync((x), TASK_WAKESIGNAL | TASK_WAKEKILL, 1)
>
> #define __wait_event(wq, condition) \
> do { \
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 9fe473a..6901784 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -944,7 +944,7 @@ static int effective_prio(struct task_struct *p)
> */
> static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
> {
> - if (p->state == TASK_UNINTERRUPTIBLE)
> + if (p->state & TASK_LOADAVG)
> rq->nr_uninterruptible--;
>
> enqueue_task(rq, p, wakeup);
> @@ -958,7 +958,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
> {
> update_rq_clock(rq);
>
> - if (p->state == TASK_UNINTERRUPTIBLE)
> + if (p->state & TASK_LOADAVG)
> rq->nr_uninterruptible--;
>
> enqueue_task(rq, p, 0);
> @@ -970,7 +970,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
> */
> static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
> {
> - if (p->state == TASK_UNINTERRUPTIBLE)
> + if (p->state & TASK_LOADAVG)
> rq->nr_uninterruptible++;
>
> dequeue_task(rq, p, sleep);
> @@ -1566,8 +1566,7 @@ out:
>
> int fastcall wake_up_process(struct task_struct *p)
> {
> - return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
> - TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
> + return try_to_wake_up(p, TASK_ALL, 0);
> }
> EXPORT_SYMBOL(wake_up_process);
>
> @@ -3489,7 +3488,7 @@ need_resched_nonpreemptible:
> __update_rq_clock(rq);
>
> if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
> - if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
> + if (unlikely((prev->state & __TASK_INTERRUPTIBLE) &&
> unlikely(signal_pending(prev)))) {
> prev->state = TASK_RUNNING;
> } else {
> @@ -3707,8 +3706,7 @@ void fastcall complete(struct completion *x)
>
> spin_lock_irqsave(&x->wait.lock, flags);
> x->done++;
> - __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
> - 1, 0, NULL);
> + __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
> spin_unlock_irqrestore(&x->wait.lock, flags);
> }
> EXPORT_SYMBOL(complete);
> @@ -3719,8 +3717,7 @@ void fastcall complete_all(struct completion *x)
>
> spin_lock_irqsave(&x->wait.lock, flags);
> x->done += UINT_MAX/2;
> - __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
> - 0, 0, NULL);
> + __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
> spin_unlock_irqrestore(&x->wait.lock, flags);
> }
> EXPORT_SYMBOL(complete_all);
> diff --git a/kernel/signal.c b/kernel/signal.c
> index ad63109..ce76369 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -459,15 +459,15 @@ void signal_wake_up(struct task_struct *t, int resume)
> set_tsk_thread_flag(t, TIF_SIGPENDING);
>
> /*
> - * For SIGKILL, we want to wake it up in the stopped/traced case.
> - * We don't check t->state here because there is a race with it
> + * For SIGKILL, we want to wake it up in the stopped/traced/killable
> + * case. We don't check t->state here because there is a race with it
> * executing another processor and just now entering stopped state.
> * By using wake_up_state, we ensure the process will wake up and
> * handle its death signal.
> */
> mask = TASK_INTERRUPTIBLE;
> if (resume)
> - mask |= TASK_STOPPED | TASK_TRACED;
> + mask |= TASK_WAKEKILL;
> if (!wake_up_state(t, mask))
> kick_process(t);
> }
> @@ -626,7 +626,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
> state = TASK_STOPPED;
> if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
> set_tsk_thread_flag(t, TIF_SIGPENDING);
> - state |= TASK_INTERRUPTIBLE;
> + state |= TASK_WAKESIGNAL;
> }
> wake_up_state(t, state);
>
> @@ -841,7 +841,7 @@ static inline int wants_signal(int sig, struct task_struct *p)
> return 0;
> if (sig == SIGKILL)
> return 1;
> - if (p->state & (TASK_STOPPED | TASK_TRACED))
> + if (p->state & (__TASK_STOPPED | __TASK_TRACED))
> return 0;
> return task_curr(p) || !signal_pending(p);
> }
> @@ -1446,7 +1446,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
> BUG_ON(sig == -1);
>
> /* do_notify_parent_cldstop should have been called instead. */
> - BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
> + BUG_ON(tsk->state & (__TASK_STOPPED|__TASK_TRACED));
>
> BUG_ON(!tsk->ptrace &&
> (tsk->group_leader != tsk || !thread_group_empty(tsk)));
> @@ -1713,7 +1713,7 @@ static int do_signal_stop(int signr)
> * so this check has no races.
> */
> if (!t->exit_state &&
> - !(t->state & (TASK_STOPPED|TASK_TRACED))) {
> + !(t->state & (__TASK_STOPPED|__TASK_TRACED))) {
> stop_count++;
> signal_wake_up(t, 0);
> }
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 90b657b..3cb88ff 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -170,6 +170,12 @@ static int sync_page(void *word)
> return 0;
> }
>
> +static int sync_page_killable(void *word)
> +{
> + sync_page(word);
> + return signal_pending(current);
> +}
> +
> /**
> * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
> * @mapping: address space structure to write
> @@ -574,6 +580,14 @@ void fastcall __lock_page(struct page *page)
> }
> EXPORT_SYMBOL(__lock_page);
>
> +int fastcall __try_lock_page(struct page *page)
> +{
> + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
> +
> + return !__wait_on_bit_lock(page_waitqueue(page), &wait,
> + sync_page_killable, TASK_KILLABLE);
> +}
> +
> /*
> * Variant of lock_page that does not require the caller to hold a reference
> * on the page's mapping.
> @@ -975,7 +989,10 @@ page_ok:
>
> page_not_up_to_date:
> /* Get exclusive access to the page ... */
> - lock_page(page);
> + if (!try_lock_page(page)) {
> + printk(KERN_EMERG "Taking first path to killed\n");
> + goto readpage_eio;
> + }
>
> /* Did it get truncated before we got the lock? */
> if (!page->mapping) {
> @@ -1003,7 +1020,10 @@ readpage:
> }
>
> if (!PageUptodate(page)) {
> - lock_page(page);
> + if (!try_lock_page(page)) {
> + printk(KERN_EMERG "Taking second path to killed\n");
> + goto readpage_eio;
> + }
> if (!PageUptodate(page)) {
> if (page->mapping == NULL) {
> /*
> @@ -1014,15 +1034,16 @@ readpage:
> goto find_page;
> }
> unlock_page(page);
> - error = -EIO;
> shrink_readahead_size_eio(filp, &ra);
> - goto readpage_error;
> + goto readpage_eio;
> }
> unlock_page(page);
> }
>
> goto page_ok;
>
> +readpage_eio:
> + error = -EIO;
> readpage_error:
> /* UHHUH! A synchronous read error occurred. Report it */
> desc->error = error;
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists