[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250126142034.GA28135@redhat.com>
Date: Sun, 26 Jan 2025 15:20:34 +0100
From: Oleg Nesterov <oleg@...hat.com>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Paolo Bonzini <pbonzini@...hat.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Christian Brauner <brauner@...nel.org>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [GIT PULL] KVM changes for Linux 6.14
On 01/25, Linus Torvalds wrote:
>
> Keith pinpointed the user space logic to fork_remap():
>
> https://github.com/google/minijail/blob/main/rust/minijail/src/lib.rs#L987
>
> and honestly, I do think it makes sense for user space to ask "am I
> single-threaded" (which is presumably the thing that breaks), and the
> code for that is pretty simple:
>
> fn is_single_threaded() -> io::Result<bool> {
> match count_dir_entries("/proc/self/task") {
> Ok(1) => Ok(true),
> Ok(_) => Ok(false),
> Err(e) => Err(e),
> }
> }
>
> and I really don't think user space is "wrong".
>
> So the fact that a kernel helper thread that runs async in the
> background and does random background infrastructure things that do
> not really affect user space should probably simply not break this
> kind of simple (and admittedly simplistic) user space logic.
>
> Should we just add some flag to say "don't show this thread in this
> context"?
Not sure I understand... Looking at is_single_threaded() above I guess
something like below should work (incomplete, in particular we need to
chang first_tid() as well).
But a PF_HIDDEN sub-thread will still be visible via /proc/$pid_of_PF_HIDDEN
> We obviously still want to see it for management purposes,
> so it's not like the thing should be entirely invisible,
Can you explain?
Oleg.
--- x/include/linux/sched.h
+++ x/include/linux/sched.h
@@ -1685,7 +1685,7 @@ extern struct pid *cad_pid;
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
-#define PF__HOLE__00010000 0x00010000
+#define PF_HIDDEN 0x00010000
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
--- x/include/linux/sched/task.h
+++ x/include/linux/sched/task.h
@@ -31,6 +31,7 @@ struct kernel_clone_args {
u32 io_thread:1;
u32 user_worker:1;
u32 no_files:1;
+ u32 hidden:1;
unsigned long stack;
unsigned long stack_size;
unsigned long tls;
--- x/kernel/fork.c
+++ x/kernel/fork.c
@@ -2237,6 +2237,8 @@ __latent_entropy struct task_struct *cop
}
if (args->io_thread)
p->flags |= PF_IO_WORKER;
+ if (args->hidden)
+ p->flags |= PF_HIDDEN;
if (args->name)
strscpy_pad(p->comm, args->name, sizeof(p->comm));
--- x/kernel/vhost_task.c
+++ x/kernel/vhost_task.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop);
*/
struct vhost_task *vhost_task_create(bool (*fn)(void *),
void (*handle_sigkill)(void *), void *arg,
- const char *name)
+ bool hidden, const char *name)
{
struct kernel_clone_args args = {
.flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM |
@@ -125,6 +125,7 @@ struct vhost_task *vhost_task_create(boo
.exit_signal = 0,
.fn = vhost_task_fn,
.name = name,
+ .hidden = hidden,
.user_worker = 1,
.no_files = 1,
};
--- x/fs/proc/base.c
+++ x/fs/proc/base.c
@@ -3906,9 +3906,12 @@ static struct task_struct *next_tid(stru
struct task_struct *pos = NULL;
rcu_read_lock();
if (pid_alive(start)) {
- pos = __next_thread(start);
- if (pos)
- get_task_struct(pos);
+ for (pos = start; (pos = __next_thread(pos)); ) {
+ if (!(pos->flags & PF_HIDDEN)) {
+ get_task_struct(pos);
+ break;
+ }
+ }
}
rcu_read_unlock();
put_task_struct(start);
Powered by blists - more mailing lists