lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230209102730.974465-1-qiang1.zhang@intel.com>
Date:   Thu,  9 Feb 2023 18:27:30 +0800
From:   Zqiang <qiang1.zhang@...el.com>
To:     paulmck@...nel.org, frederic@...nel.org, quic_neeraju@...cinc.com,
        joel@...lfernandes.org, qiuxu.zhuo@...el.com
Cc:     rcu@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH v2] rcu: Keeping rcu-related kthreads running on housekeeping CPUS

For kernels built with CONFIG_NO_HZ_FULL=y and CONFIG_RCU_NOCB_CPU=y,
when passing cpulist to "isolcpus=", "nohz_full=" and "rcu_nocbs="
bootparams, after system starting, the rcu-related kthreads(include
rcu_gp, rcuog*, rcuop* kthreads etc) will running on housekeeping
CPUs, but for cpulist contains CPU0, the result will deferent, these
rcu-related kthreads will be restricted to running on CPU0.

Although invoke kthread_create() to spwan rcu-related kthreads and
when it's starting, invoke set_cpus_allowed_ptr() to allowed cpumaks
is housekeeping_cpumask(HK_TYPE_KTHREAD), but due to these rcu-related
kthreads are created before starting other CPUS, that is to say, at
this time, only CPU0 is online, when these rcu-related kthreads running
and set allowed cpumaks is housekeeping cpumask, if find that only CPU0
is online and CPU0 exists in "isolcpus=", "nohz_full=" and "rcu_nocbs="
bootparams, invoke set_cpus_allowed_ptr() will return error.

set_cpus_allowed_ptr()
 ->__set_cpus_allowed_ptr()
   ->__set_cpus_allowed_ptr_locked
     {
                bool kthread = p->flags & PF_KTHREAD;
                ....
                if (kthread || is_migration_disabled(p))
                        cpu_valid_mask = cpu_online_mask;
                ....
                dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, ctx->new_mask);
                if (dest_cpu >= nr_cpu_ids) {
                        ret = -EINVAL;
                        goto out;
                }
                ....
     }

At this time, only CPU0 is set in the cpu_online_mask, the ctx->new_mask
is housekeeping cpumask and not contains CPU0, this will result dest_cpu
is illegal cpu value, the set_cpus_allowed_ptr() will return -EINVAL and
failed to set housekeeping cpumask.

This commit therefore add additional cpus_allowed_ptr() call in CPU hotplug
path. and reset the CPU affinity of rcuboost, rcuog, rcuop kthreads after
all other CPUs are online.

Signed-off-by: Zqiang <qiang1.zhang@...el.com>
---

 runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4"
 bootparams="console=ttyS0 isolcpus=0,1 nohz_full=0,1 rcu_nocbs=0,1" -d

 root@...ux86-64:~# ps -eo pid,psr,comm | grep rcu
    3   0 rcu_gp
    6   0 kworker/0:0-rcu_gp
   12   0 rcu_tasks_kthread
   13   0 rcu_tasks_rude_kthread
   14   0 rcu_tasks_trace_kthread
   16   0 rcu_preempt
   17   0 rcuog/0
   18   0 rcuop/0
   19   2 rcub/0
   20   0 rcu_exp_gp_kthread_worker
   21   0 rcu_exp_par_gp_kthread_worker
   29   0 rcuop/1
   35   0 rcuog/2
   36   0 rcuop/2
   42   2 rcuop/3
 root@...ux86-64:~#
 root@...ux86-64:~#
 root@...ux86-64:~#
 root@...ux86-64:~# cat /proc/16/status | grep Cpus
 Cpus_allowed:   1
 Cpus_allowed_list:      0

 Applay this patch:
 
 root@...ux86-64:/# ps -eo pid,psr,comm | grep rcu
    3   0 rcu_gp
    6   0 kworker/0:0-rcu_gp
   12   3 rcu_tasks_kthread
   13   2 rcu_tasks_rude_kthread
   14   3 rcu_tasks_trace_kthread
   16   3 rcu_preempt
   17   3 rcuog/0
   18   2 rcuop/0
   19   2 rcub/0
   20   3 rcu_exp_gp_kthread_worker
   21   0 rcu_exp_par_gp_kthread_worker
   29   3 rcuop/1
   35   0 rcuog/2
   36   0 rcuop/2
   42   2 rcuop/3
 root@...ux86-64:/# cat /proc/16/status | grep Cpus
 Cpus_allowed:   c
 Cpus_allowed_list:      2-3

 kernel/rcu/rcu.h         |  4 +++
 kernel/rcu/tasks.h       | 26 +++++++++++++----
 kernel/rcu/tree.c        | 61 +++++++++++++++++++++++++++++++++++++---
 kernel/rcu/tree.h        |  1 -
 kernel/rcu/tree_nocb.h   | 18 +++++++++++-
 kernel/rcu/tree_plugin.h |  9 ------
 6 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index a3adcf9a9919..1cad82e93304 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -503,8 +503,10 @@ void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);
 #ifdef CONFIG_TASKS_RCU_GENERIC
 void show_rcu_tasks_gp_kthreads(void);
+void rcu_tasks_generic_setaffinity(int cpu);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void show_rcu_tasks_gp_kthreads(void) {}
+void rcu_tasks_generic_setaffinity(int cpu) {}
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 void rcu_request_urgent_qs_task(struct task_struct *t);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
@@ -597,6 +599,7 @@ static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
 static inline void rcu_fwd_progress_check(unsigned long j) { }
 static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
 static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
+void rcu_kthread_setaffinity(struct task_struct *tsk, int outgoing) { }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
 unsigned long rcu_get_gp_seq(void);
@@ -607,6 +610,7 @@ void show_rcu_gp_kthreads(void);
 int rcu_get_gp_kthreads_prio(void);
 void rcu_fwd_progress_check(unsigned long j);
 void rcu_force_quiescent_state(void);
+void rcu_kthread_setaffinity(struct task_struct *tsk, int outgoing);
 extern struct workqueue_struct *rcu_gp_wq;
 #ifdef CONFIG_RCU_EXP_KTHREAD
 extern struct kthread_worker *rcu_exp_gp_kworker;
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index baf7ec178155..cebc02198ef7 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -545,10 +545,6 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 {
 	struct rcu_tasks *rtp = arg;
 
-	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
-	housekeeping_affine(current, HK_TYPE_RCU);
-	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
-
 	/*
 	 * Each pass through the following loop makes one check for
 	 * newly arrived callbacks, and, if there are some, waits for
@@ -586,9 +582,11 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 {
 	struct task_struct *t;
 
-	t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
+	t = kthread_create(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name))
 		return;
+	WRITE_ONCE(rtp->kthread_ptr, t);
+	wake_up_process(t);
 	smp_mb(); /* Ensure others see full kthread. */
 }
 
@@ -1969,6 +1967,24 @@ void __init rcu_init_tasks_generic(void)
 	rcu_tasks_initiate_self_tests();
 }
 
+void rcu_tasks_generic_setaffinity(int cpu)
+{
+#ifdef CONFIG_TASKS_RCU
+	if (rcu_tasks.kthread_ptr)
+		rcu_kthread_setaffinity(rcu_tasks.kthread_ptr, cpu);
+#endif
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+	if (rcu_tasks_rude.kthread_ptr)
+		rcu_kthread_setaffinity(rcu_tasks_rude.kthread_ptr, cpu);
+#endif
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+	if (rcu_tasks_trace.kthread_ptr)
+		rcu_kthread_setaffinity(rcu_tasks_trace.kthread_ptr, cpu);
+#endif
+}
+
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void rcu_tasks_bootup_oddness(void) {}
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ee27a03d7576..d1575d74346e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -154,6 +154,8 @@ static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
 static bool rcu_init_invoked(void);
 static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
 static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
+static void rcu_exp_gp_kworkers_setaffinity(int cpu);
+static void rcu_nocb_kthread_setaffinity(int cpu, int outgoing);
 
 /*
  * rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
@@ -1781,7 +1783,6 @@ static noinline void rcu_gp_cleanup(void)
  */
 static int __noreturn rcu_gp_kthread(void *unused)
 {
-	rcu_bind_gp_kthread();
 	for (;;) {
 
 		/* Handle grace-period start. */
@@ -4297,6 +4298,30 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
 }
 
+void rcu_kthread_setaffinity(struct task_struct *tsk, int outgoing)
+{
+	cpumask_var_t mask;
+
+	if (!tsk)
+		return;
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return;
+	cpumask_copy(mask, housekeeping_cpumask(HK_TYPE_RCU));
+	if (outgoing >= 0 && cpumask_test_cpu(outgoing, mask))
+		cpumask_clear_cpu(outgoing, mask);
+	cpumask_and(mask, cpu_online_mask, mask);
+	if (cpumask_empty(mask))
+		cpumask_copy(mask, cpu_possible_mask);
+	set_cpus_allowed_ptr(tsk, mask);
+	free_cpumask_var(mask);
+}
+
+static void rcu_gp_kthread_setaffinity(int cpu)
+{
+	if (rcu_state.gp_kthread)
+		rcu_kthread_setaffinity(rcu_state.gp_kthread, cpu);
+}
+
 /*
  * Near the end of the CPU-online process.  Pretty much all services
  * enabled, and the CPU is now very much alive.
@@ -4316,7 +4341,10 @@ int rcutree_online_cpu(unsigned int cpu)
 		return 0; /* Too early in boot for scheduler work. */
 	sync_sched_exp_online_cleanup(cpu);
 	rcutree_affinity_setting(cpu, -1);
-
+	rcu_gp_kthread_setaffinity(-1);
+	rcu_nocb_kthread_setaffinity(cpu, -1);
+	rcu_tasks_generic_setaffinity(-1);
+	rcu_exp_gp_kworkers_setaffinity(-1);
 	// Stop-machine done, so allow nohz_full to disable tick.
 	tick_dep_clear(TICK_DEP_BIT_RCU);
 	return 0;
@@ -4339,7 +4367,10 @@ int rcutree_offline_cpu(unsigned int cpu)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	rcutree_affinity_setting(cpu, cpu);
-
+	rcu_gp_kthread_setaffinity(cpu);
+	rcu_nocb_kthread_setaffinity(cpu, cpu);
+	rcu_tasks_generic_setaffinity(cpu);
+	rcu_exp_gp_kworkers_setaffinity(cpu);
 	// nohz_full CPUs need the tick for stop-machine to work quickly
 	tick_dep_set(TICK_DEP_BIT_RCU);
 	return 0;
@@ -4550,6 +4581,14 @@ static void __init rcu_start_exp_gp_kworkers(void)
 				   &param);
 }
 
+static void rcu_exp_gp_kworkers_setaffinity(int cpu)
+{
+	if (rcu_exp_gp_kworker)
+		rcu_kthread_setaffinity(rcu_exp_gp_kworker->task, cpu);
+	if (rcu_exp_par_gp_kworker)
+		rcu_kthread_setaffinity(rcu_exp_par_gp_kworker->task, cpu);
+}
+
 static inline void rcu_alloc_par_gp_wq(void)
 {
 }
@@ -4559,7 +4598,9 @@ struct workqueue_struct *rcu_par_gp_wq;
 static void __init rcu_start_exp_gp_kworkers(void)
 {
 }
-
+static void rcu_exp_gp_kworkers_setaffinity(int cpu)
+{
+}
 static inline void rcu_alloc_par_gp_wq(void)
 {
 	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
@@ -4609,6 +4650,18 @@ static int __init rcu_spawn_gp_kthread(void)
 }
 early_initcall(rcu_spawn_gp_kthread);
 
+static int __init rcu_boost_resetaffinity(void)
+{
+	struct rcu_node *rnp;
+	int cpu;
+
+	rcu_for_each_leaf_node(rnp)
+		rcu_boost_kthread_setaffinity(rnp, -1);
+	for_each_possible_cpu(cpu)
+		rcu_nocb_kthread_setaffinity(cpu, -1);
+	return 0;
+}
+core_initcall(rcu_boost_resetaffinity);
 /*
  * This function is invoked towards the end of the scheduler's
  * initialization process.  Before this is called, the idle task might
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 192536916f9a..391e3fae4ff5 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -495,7 +495,6 @@ do {								\
 #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
-static void rcu_bind_gp_kthread(void);
 static bool rcu_nohz_full_cpu(void);
 
 /* Forward declarations for tree_stall.h */
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index f2280616f9d5..a9cd07ccf959 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1494,6 +1494,20 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 	mutex_unlock(&rcu_state.barrier_mutex);
 }
 
+static void rcu_nocb_kthread_setaffinity(int cpu, int outgoing)
+{
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	struct rcu_data *rdp_gp;
+
+	if (rdp->nocb_cb_kthread)
+		rcu_kthread_setaffinity(rdp->nocb_cb_kthread, outgoing);
+	rdp_gp = rdp->nocb_gp_rdp;
+	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
+	if (rdp_gp->nocb_gp_kthread)
+		rcu_kthread_setaffinity(rdp_gp->nocb_gp_kthread, outgoing);
+	mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
+}
+
 /* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */
 static int rcu_nocb_gp_stride = -1;
 module_param(rcu_nocb_gp_stride, int, 0444);
@@ -1754,7 +1768,9 @@ static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
 static void rcu_spawn_cpu_nocb_kthread(int cpu)
 {
 }
-
+static void rcu_nocb_kthread_setaffinity(int cpu, int outgoing)
+{
+}
 static void show_rcu_nocb_state(struct rcu_data *rdp)
 {
 }
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7b0fe741a088..fdde71ebb83e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1294,12 +1294,3 @@ static bool rcu_nohz_full_cpu(void)
 	return false;
 }
 
-/*
- * Bind the RCU grace-period kthreads to the housekeeping CPU.
- */
-static void rcu_bind_gp_kthread(void)
-{
-	if (!tick_nohz_full_enabled())
-		return;
-	housekeeping_affine(current, HK_TYPE_RCU);
-}
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ