lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <871qvbutex.fsf_-_@email.froward.int.ebiederm.org>
Date:   Sun, 26 Jun 2022 14:16:38 -0500
From:   "Eric W. Biederman" <ebiederm@...ssion.com>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     Christian Brauner <brauner@...nel.org>, Tejun Heo <tj@...nel.org>,
        Petr Mladek <pmladek@...e.com>,
        Lai Jiangshan <jiangshanlai@...il.com>,
        Michal Hocko <mhocko@...e.com>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Oleg Nesterov <oleg@...hat.com>
Subject: [PATCH 3/3] kthread: Stop abusing TASK_UNINTERRUPTIBLE (INCOMPLETE)


Instead leave the task as a new unscheduled task and require the
caller to call wake_up_new_task.

Signed-off-by: "Eric W. Biederman" <ebiederm@...ssion.com>
---
 arch/arm/common/bL_switcher.c             |   2 +-
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c |   4 +-
 drivers/block/mtip32xx/mtip32xx.c         |   2 +-
 drivers/firmware/psci/psci_checker.c      |   2 +-
 drivers/firmware/stratix10-svc.c          |   4 +-
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c         |   2 +-
 drivers/scsi/bnx2i/bnx2i_init.c           |   2 +-
 drivers/scsi/qedi/qedi_main.c             |   2 +-
 include/linux/kthread.h                   |   4 +-
 kernel/bpf/cpumap.c                       |   2 +-
 kernel/dma/map_benchmark.c                |   2 +-
 kernel/kthread.c                          | 114 ++++++++++------------
 kernel/smpboot.c                          |   1 +
 kernel/workqueue.c                        |   2 +-
 net/core/pktgen.c                         |   2 +-
 net/sunrpc/svc.c                          |   2 +-
 16 files changed, 72 insertions(+), 77 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 9a9aa53547a6..15c9841c3c15 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -311,7 +311,7 @@ static struct task_struct *bL_switcher_thread_create(int cpu, void *arg)
 				      cpu_to_node(cpu), "kswitcher_%d", cpu);
 	if (!IS_ERR(task)) {
 		kthread_bind(task, cpu);
-		wake_up_process(task);
+		wake_up_new_task(task);
 	} else
 		pr_err("%s failed for CPU %d\n", __func__, cpu);
 	return task;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index db813f819ad6..ba09f5cf1773 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1206,7 +1206,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
 		goto out;
 	}
 	kthread_bind(thread, cpu);
-	wake_up_process(thread);
+	wake_up_new_task(thread);
 
 	ret = wait_event_interruptible(plr->lock_thread_wq,
 				       plr->thread_done == 1);
@@ -1304,7 +1304,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
 	}
 
 	kthread_bind(thread, plr->cpu);
-	wake_up_process(thread);
+	wake_up_new_task(thread);
 
 	ret = wait_event_interruptible(plr->lock_thread_wq,
 				       plr->thread_done == 1);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 27386a572ba4..b2f5b30a27aa 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3649,7 +3649,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 		rv = -EFAULT;
 		goto kthread_run_error;
 	}
-	wake_up_process(dd->mtip_svc_handler);
+	wake_up_new_task(dd->mtip_svc_handler);
 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
 		rv = wait_for_rebuild;
 
diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c
index 116eb465cdb4..52fcd122e2b6 100644
--- a/drivers/firmware/psci/psci_checker.c
+++ b/drivers/firmware/psci/psci_checker.c
@@ -418,7 +418,7 @@ static int suspend_tests(void)
 	 * wait for the completion of suspend_threads_started.
 	 */
 	for (i = 0; i < nb_threads; ++i)
-		wake_up_process(threads[i]);
+		wake_up_new_task(threads[i]);
 	complete_all(&suspend_threads_started);
 
 	wait_for_completion(&suspend_threads_done);
diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c
index 14663f671323..64d07aaa68bf 100644
--- a/drivers/firmware/stratix10-svc.c
+++ b/drivers/firmware/stratix10-svc.c
@@ -581,7 +581,7 @@ static int svc_get_sh_memory(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
-	wake_up_process(sh_memory_task);
+	wake_up_new_task(sh_memory_task);
 
 	if (!wait_for_completion_timeout(&sh_memory->sync_complete, 10 * HZ)) {
 		dev_err(dev,
@@ -834,7 +834,7 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg)
 				return -EINVAL;
 			}
 		kthread_bind(chan->ctrl->task, cpu);
-		wake_up_process(chan->ctrl->task);
+		wake_up_new_task(chan->ctrl->task);
 	}
 
 	pr_debug("%s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__,
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 05ddbb9bb7d8..1b3af0e01d67 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2622,7 +2622,7 @@ static int bnx2fc_cpu_online(unsigned int cpu)
 	/* bind thread to the cpu */
 	kthread_bind(thread, cpu);
 	p->iothread = thread;
-	wake_up_process(thread);
+	wake_up_new_task(thread);
 	return 0;
 }
 
diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 2b3f0c10478e..cd4976bb86fc 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -424,7 +424,7 @@ static int bnx2i_cpu_online(unsigned int cpu)
 	/* bind thread to the cpu */
 	kthread_bind(thread, cpu);
 	p->iothread = thread;
-	wake_up_process(thread);
+	wake_up_new_task(thread);
 	return 0;
 }
 
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 83ffba7f51da..97ced4f12d6e 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1967,7 +1967,7 @@ static int qedi_cpu_online(unsigned int cpu)
 
 	kthread_bind(thread, cpu);
 	p->iothread = thread;
-	wake_up_process(thread);
+	wake_up_new_task(thread);
 	return 0;
 }
 
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 30e5bec81d2b..fa5e24df84bf 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -53,7 +53,7 @@ bool kthread_is_per_cpu(struct task_struct *k);
 	struct task_struct *__k						   \
 		= kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
 	if (!IS_ERR(__k))						   \
-		wake_up_process(__k);					   \
+		wake_up_new_task(__k);					   \
 	__k;								   \
 })
 
@@ -77,7 +77,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
 
 	p = kthread_create_on_cpu(threadfn, data, cpu, namefmt);
 	if (!IS_ERR(p))
-		wake_up_process(p);
+		wake_up_new_task(p);
 
 	return p;
 }
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index f4860ac756cd..e72bbb766f01 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -475,7 +475,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
 
 	/* Make sure kthread runs on a single CPU */
 	kthread_bind(rcpu->kthread, cpu);
-	wake_up_process(rcpu->kthread);
+	wake_up_new_task(rcpu->kthread);
 
 	return rcpu;
 
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 0520a8f4fb1d..b28e33c07c92 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -134,7 +134,7 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 
 	for (i = 0; i < threads; i++) {
 		get_task_struct(tsk[i]);
-		wake_up_process(tsk[i]);
+		wake_up_new_task(tsk[i]);
 	}
 
 	msleep_interruptible(map->bparam.seconds * 1000);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8529f6b1582b..1769b5118694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -329,51 +329,12 @@ EXPORT_SYMBOL(kthread_complete_and_exit);
 
 static int kthread(void *_create)
 {
-	static const struct sched_param param = { .sched_priority = 0 };
-	/* Copy data: it's on kthread's stack */
-	struct kthread_create_info *create = _create;
-	int (*threadfn)(void *data) = create->threadfn;
-	void *data = create->data;
-	struct completion *done;
-	struct kthread *self;
-	int ret;
-
-	self = to_kthread(current);
+	struct kthread *self = to_kthread(current);
+	int ret = -EINTR;
 
-	/* If user was SIGKILLed, I release the structure. */
-	done = xchg(&create->done, NULL);
-	if (!done) {
-		kfree(create);
-		kthread_exit(-EINTR);
-	}
-
-	self->threadfn = threadfn;
-	self->data = data;
-
-	/*
-	 * The new thread inherited kthreadd's priority and CPU mask. Reset
-	 * back to default in case they have been changed.
-	 */
-	sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
-	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
-
-	/* OK, tell user we're spawned, wait for stop or wakeup */
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	create->result = current;
-	/*
-	 * Thread is going to call schedule(), do not preempt it,
-	 * or the creator may spend more time in wait_task_inactive().
-	 */
-	preempt_disable();
-	complete(done);
-	schedule_preempt_disabled();
-	preempt_enable();
-
-	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
 		cgroup_kthread_ready();
-		__kthread_parkme(self);
-		ret = threadfn(data);
+		ret = self->threadfn(self->data);
 	}
 	kthread_exit(ret);
 }
@@ -391,25 +352,41 @@ int tsk_fork_get_node(struct task_struct *tsk)
 static void create_kthread(struct kthread_create_info *create)
 {
 	struct task_struct *new;
+	struct completion *done;
 
 #ifdef CONFIG_NUMA
 	current->pref_node_fork = create->node;
 #endif
 	/* We want our own signal handler (we take no signals by default). */
 	new = new_kthread(kthread, create, NUMA_NO_NODE);
+	create->result = new;
+	/* Claim the completion */
+	done = xchg(&create->done, NULL);
 	if (IS_ERR(new)) {
-		/* If user was SIGKILLed, I release the structure. */
-		struct completion *done = xchg(&create->done, NULL);
+		if (done)
+			complete(done);
+	} else if (done) {
+		static const struct sched_param param = { .sched_priority = 0 };
+		struct kthread *kthread = to_kthread(new);
 
-		if (!done) {
-			kfree(create);
-			return;
-		}
-		create->result = ERR_CAST(new);
-		complete(done);
-	} else {
-		wake_up_new_task(new);
+		kthread->threadfn = create->threadfn;
+		kthread->data = create->data;
+
+		/*
+		 * The new thread inherited kthreadd's priority and CPU mask. Reset
+		 * back to default in case they have been changed.
+		 */
+		sched_setscheduler_nocheck(new, SCHED_NORMAL, &param);
+		set_cpus_allowed_ptr(new, housekeeping_cpumask(HK_TYPE_KTHREAD));
+
+		/* OK, tell user we're spawned, wait for stop or wakeup */
+		//wake_up_new_task(new);
 	}
+	/* If user was SIGKILLed, release the structure. */
+	if (!done)
+		kfree(create);
+	else
+		complete(done);
 }
 
 static __printf(4, 0)
@@ -518,11 +495,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create_on_node);
 
-static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
+static void kthread_bind_mask_parked(struct task_struct *p, const struct cpumask *mask)
 {
 	unsigned long flags;
 
-	if (!wait_task_inactive(p, state)) {
+	if (!wait_task_inactive(p, TASK_PARKED)) {
 		WARN_ON(1);
 		return;
 	}
@@ -534,14 +511,31 @@ static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mas
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
-static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state)
+static void kthread_bind_mask_new(struct task_struct *new, const struct cpumask *mask)
+{
+	unsigned long flags;
+
+	/*
+	 * FIXME: verify that p is a new task that
+	 * has not yet been passed through
+	 * wake_up_new_task
+	 */
+
+	/* It's safe because new has never been scheduled. */
+	raw_spin_lock_irqsave(&new->pi_lock, flags);
+	do_set_cpus_allowed(new, mask);
+	new->flags |= PF_NO_SETAFFINITY;
+	raw_spin_unlock_irqrestore(&new->pi_lock, flags);
+}
+
+static void __kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-	__kthread_bind_mask(p, cpumask_of(cpu), state);
+	kthread_bind_mask_new(p, cpumask_of(cpu));
 }
 
 void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
 {
-	__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
+	kthread_bind_mask_new(p, mask);
 }
 
 /**
@@ -555,7 +549,7 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
  */
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
+	__kthread_bind(p, cpu);
 }
 EXPORT_SYMBOL(kthread_bind);
 
@@ -629,7 +623,7 @@ void kthread_unpark(struct task_struct *k)
 	 * The binding was lost and we need to set it again.
 	 */
 	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-		__kthread_bind(k, kthread->cpu, TASK_PARKED);
+		kthread_bind_mask_parked(k, cpumask_of(kthread->cpu));
 
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	/*
@@ -863,7 +857,7 @@ __kthread_create_worker(int cpu, unsigned int flags,
 
 	worker->flags = flags;
 	worker->task = task;
-	wake_up_process(task);
+	wake_up_new_task(task);
 	return worker;
 
 fail_task:
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index b9f54544e749..79b8d05164d6 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -192,6 +192,7 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 	 * Park the thread so that it could start right on the CPU
 	 * when it is available.
 	 */
+	wake_up_new_task(tsk);
 	kthread_park(tsk);
 	get_task_struct(tsk);
 	*per_cpu_ptr(ht->store, cpu) = tsk;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ea50f6be843..2d16a933f452 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1961,7 +1961,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	raw_spin_lock_irq(&pool->lock);
 	worker->pool->nr_workers++;
 	worker_enter_idle(worker);
-	wake_up_process(worker->task);
+	wake_up_new_task(worker->task);
 	raw_spin_unlock_irq(&pool->lock);
 
 	return worker;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 84b62cd7bc57..40efd9b678fa 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3864,7 +3864,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
 
 	t->net = pn;
 	get_task_struct(p);
-	wake_up_process(p);
+	wake_up_new_task(p);
 	wait_for_completion(&t->start_done);
 
 	return 0;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7c9a0d0b1230..addbba323b9c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -769,7 +769,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 			svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
 
 		svc_sock_update_bufs(serv);
-		wake_up_process(task);
+		wake_up_new_task(task);
 	} while (nrservs > 0);
 
 	return 0;
-- 
2.35.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ