[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ffd90972-d664-b024-f537-8d02f91d12d7@kernel.dk>
Date: Sat, 17 Oct 2020 14:32:30 -0600
From: Jens Axboe <axboe@...nel.dk>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] task_work: cleanup notification modes
On 10/17/20 2:18 PM, Thomas Gleixner wrote:
> On Sat, Oct 17 2020 at 09:36, Jens Axboe wrote:
>> diff --git a/kernel/task_work.c b/kernel/task_work.c
>> index 613b2d634af8..d82c224ab5d5 100644
>> --- a/kernel/task_work.c
>> +++ b/kernel/task_work.c
>> @@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
>> * task_work_add - ask the @task to execute @work->func()
>> * @task: the task which should run the callback
>> * @work: the callback to run
>> - * @notify: send the notification if true
>> + * @notify: how to notify the targeted task
>> *
>> - * Queue @work for task_work_run() below and notify the @task if @notify.
>> + * Queue @work for task_work_run() below and notify the @task if @notify
>> + * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the
>
> s/work/works/
>
>> + * it will interrupt the targeted task and run the task_work. @TWA_RESUME
>> + * work is run only when the task exits the kernel and returns to user mode.
>
> It's also run before entering a guest. At least on x86, but all other
> architectures should do the same.
>
>> * Fails if the @task is exiting/exited and thus it can't process this @work.
>> * Otherwise @work->func() will be called when the @task returns from kernel
>> * mode or exits.
>> *
>> - * This is like the signal handler which runs in kernel mode, but it doesn't
>> - * try to wake up the @task.
>> + * If the targeted task is exiting, then an error is returned and the work item
>> + * is not queued. It's up to the caller to arrange for an alternative mechanism
>> + * in tht case.
>
> s/tht/that/
>
> Looks good otherwise.
Thanks! Fixed the two typos, and also included the guest mode. If you're fine
with it now, would be great to have your reviewed-by or similar.
commit 4799fc5b3bc3520a61d3afd714d11672e45c81cb
Author: Jens Axboe <axboe@...nel.dk>
Date: Fri Oct 16 09:02:26 2020 -0600
task_work: cleanup notification modes
A previous commit changed the notification mode from true/false to an
int, allowing notify-no, notify-yes, or signal-notify. This was
backwards compatible in the sense that any existing true/false user
would translate to either 0 (on notification sent) or 1, the latter
which mapped to TWA_RESUME. TWA_SIGNAL was assigned a value of 2.
Clean this up properly, and define a proper enum for the notification
mode. Now we have:
- TWA_NONE. This is 0, same as before the original change, meaning no
notification requested.
- TWA_RESUME. This is 1, same as before the original change, meaning
that we use TIF_NOTIFY_RESUME.
- TWA_SIGNAL. This uses TIF_SIGPENDING/JOBCTL_TASK_WORK for the
notification.
Clean up all the callers, switching their 0/1/false/true to using the
appropriate TWA_* mode for notifications.
Fixes: e91b48162332 ("task_work: teach task_work_add() to do signal_wake_up()")
Signed-off-by: Jens Axboe <axboe@...nel.dk>
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1c08cb9eb9f6..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1277,7 +1277,7 @@ static void queue_task_work(struct mce *m, int kill_it)
else
current->mce_kill_me.func = kill_me_maybe;
- task_work_add(current, ¤t->mce_kill_me, true);
+ task_work_add(current, ¤t->mce_kill_me, TWA_RESUME);
}
/*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index b494187632b2..af323e2e3100 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -561,7 +561,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
* callback has been invoked.
*/
atomic_inc(&rdtgrp->waitcount);
- ret = task_work_add(tsk, &callback->work, true);
+ ret = task_work_add(tsk, &callback->work, TWA_RESUME);
if (ret) {
/*
* Task is exiting. Drop the refcount and free the callback.
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..8360f8d6be65 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -879,7 +879,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
estatus_node->task_work.func = ghes_kick_task_work;
estatus_node->task_work_cpu = smp_processor_id();
ret = task_work_add(current, &estatus_node->task_work,
- true);
+ TWA_RESUME);
if (ret)
estatus_node->task_work.func = NULL;
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4b9476521da6..b5117576792b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2229,7 +2229,7 @@ static void binder_deferred_fd_close(int fd)
__close_fd_get_file(fd, &twcb->file);
if (twcb->file) {
filp_close(twcb->file, current->files);
- task_work_add(current, &twcb->twork, true);
+ task_work_add(current, &twcb->twork, TWA_RESUME);
} else {
kfree(twcb);
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..709ada3151da 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,7 +339,7 @@ void fput_many(struct file *file, unsigned int refs)
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_u.fu_rcuhead, ____fput);
- if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
+ if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME))
return;
/*
* After this task has run exit_task_work(),
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2e1dc354cd08..16e1d0e0d9b9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1756,7 +1756,7 @@ static void __io_free_req(struct io_kiocb *req)
struct task_struct *tsk;
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
}
}
}
@@ -1905,7 +1905,8 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
{
struct task_struct *tsk = req->task;
struct io_ring_ctx *ctx = req->ctx;
- int ret, notify;
+ enum task_work_notify_mode notify;
+ int ret;
if (tsk->flags & PF_EXITING)
return -ESRCH;
@@ -1916,7 +1917,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
* processing task_work. There's no reliable way to tell if TWA_RESUME
* will do the job.
*/
- notify = 0;
+ notify = TWA_NONE;
if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
notify = TWA_SIGNAL;
@@ -1985,7 +1986,7 @@ static void io_req_task_queue(struct io_kiocb *req)
init_task_work(&req->task_work, io_req_task_cancel);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
}
@@ -3199,7 +3200,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
/* queue just for cancelation */
init_task_work(&req->task_work, io_req_task_cancel);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
return 1;
@@ -4765,7 +4766,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
return 1;
diff --git a/fs/namespace.c b/fs/namespace.c
index 294e05a13d17..1a75336668a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1191,7 +1191,7 @@ static void mntput_no_expire(struct mount *mnt)
struct task_struct *task = current;
if (likely(!(task->flags & PF_KTHREAD))) {
init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
- if (!task_work_add(task, &mnt->mnt_rcu, true))
+ if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
return;
}
if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0fb93aafa478..0d848a1e9e62 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -13,9 +13,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
twork->func = func;
}
-#define TWA_RESUME 1
-#define TWA_SIGNAL 2
-int task_work_add(struct task_struct *task, struct callback_head *twork, int);
+enum task_work_notify_mode {
+ TWA_NONE,
+ TWA_RESUME,
+ TWA_SIGNAL,
+};
+
+int task_work_add(struct task_struct *task, struct callback_head *twork,
+ enum task_work_notify_mode mode);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0e18aaf23a7b..00b0358739ab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1823,7 +1823,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
t->utask->dup_xol_addr = area->vaddr;
init_task_work(&t->utask->dup_xol_work, dup_xol_work);
- task_work_add(t, &t->utask->dup_xol_work, true);
+ task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
}
/*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5df903fccb60..c460e0496006 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1162,7 +1162,7 @@ static int irq_thread(void *data)
handler_fn = irq_thread_fn;
init_task_work(&on_exit_work, irq_thread_dtor);
- task_work_add(current, &on_exit_work, false);
+ task_work_add(current, &on_exit_work, TWA_NONE);
irq_thread_check_affinity(desc, action);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa4c6227cd6d..e17012be4d14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2928,7 +2928,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
curr->node_stamp += period;
if (!time_before(jiffies, curr->mm->numa_next_scan))
- task_work_add(curr, work, true);
+ task_work_add(curr, work, TWA_RESUME);
}
}
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 613b2d634af8..8d6e1217c451 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* task_work_add - ask the @task to execute @work->func()
* @task: the task which should run the callback
* @work: the callback to run
- * @notify: send the notification if true
+ * @notify: how to notify the targeted task
*
- * Queue @work for task_work_run() below and notify the @task if @notify.
- * Fails if the @task is exiting/exited and thus it can't process this @work.
- * Otherwise @work->func() will be called when the @task returns from kernel
- * mode or exits.
+ * Queue @work for task_work_run() below and notify the @task if @notify
+ * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
+ * it will interrupt the targeted task and run the task_work. @TWA_RESUME
+ * work is run only when the task exits the kernel and returns to user mode,
+ * or before entering guest mode. Fails if the @task is exiting/exited and thus
+ * it can't process this @work. Otherwise @work->func() will be called when the
+ * @task goes through one of the aforementioned transitions, or exits.
*
- * This is like the signal handler which runs in kernel mode, but it doesn't
- * try to wake up the @task.
+ * If the targeted task is exiting, then an error is returned and the work item
+ * is not queued. It's up to the caller to arrange for an alternative mechanism
+ * in that case.
*
- * Note: there is no ordering guarantee on works queued here.
+ * Note: there is no ordering guarantee on works queued here. The task_work
+ * list is LIFO.
*
* RETURNS:
* 0 if succeeds or -ESRCH.
*/
-int
-task_work_add(struct task_struct *task, struct callback_head *work, int notify)
+int task_work_add(struct task_struct *task, struct callback_head *work,
+ enum task_work_notify_mode notify)
{
struct callback_head *head;
unsigned long flags;
@@ -38,6 +43,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
} while (cmpxchg(&task->task_works, head, work) != head);
switch (notify) {
+ case TWA_NONE:
+ break;
case TWA_RESUME:
set_notify_resume(task);
break;
@@ -54,6 +61,9 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
unlock_task_sighand(task, &flags);
}
break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
return 0;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e26bbccda7cc..61a614c21b9b 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1693,7 +1693,7 @@ long keyctl_session_to_parent(void)
/* the replacement session keyring is applied just prior to userspace
* restarting */
- ret = task_work_add(parent, newwork, true);
+ ret = task_work_add(parent, newwork, TWA_RESUME);
if (!ret)
newwork = NULL;
unlock:
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 536c99646f6a..06e226166aab 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -99,7 +99,7 @@ static void report_access(const char *access, struct task_struct *target,
info->access = access;
info->target = target;
info->agent = agent;
- if (task_work_add(current, &info->work, true) == 0)
+ if (task_work_add(current, &info->work, TWA_RESUME) == 0)
return; /* success */
WARN(1, "report_access called from exiting task");
--
Jens Axboe
Powered by blists - more mailing lists