lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220525221055.1152307-5-frederic@kernel.org>
Date:   Thu, 26 May 2022 00:10:55 +0200
From:   Frederic Weisbecker <frederic@...nel.org>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Frederic Weisbecker <frederic@...nel.org>,
        Tejun Heo <tj@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        "Paul E . McKenney" <paulmck@...nel.org>,
        Paul Gortmaker <paul.gortmaker@...driver.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Phil Auld <pauld@...hat.com>,
        Zefan Li <lizefan.x@...edance.com>,
        Waiman Long <longman@...hat.com>,
        Daniel Bristot de Oliveira <bristot@...nel.org>,
        Nicolas Saenz Julienne <nsaenz@...nel.org>,
        rcu@...r.kernel.org
Subject: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions

Introduce a new "isolation.rcu_nocb" file within a cgroup2/cpuset
directory which provides support for a set of CPUs to either enable ("1")
or disable ("0") RCU callbacks offloading (aka. RCU NOCB). This can
overwrite previous boot settings towards "rcu_nocbs=" kernel parameter.

The file is only writeable on "root" type partitions to exclude any
overlap. The deepest root type partition has the highest priority.
This means that given the following setting:

                    Top cpuset (CPUs: 0-7)
                    cpuset.isolation.rcu_nocb = 0
                              |
                              |
                    Subdirectory A (CPUs: 5-7)
                    cpuset.cpus.partition = root
                    cpuset.isolation.rcu_nocb = 0
                              |
                              |
                    Subdirectory B (CPUs: 7)
                    cpuset.cpus.partition = root
                    cpuset.isolation.rcu_nocb = 1

the result is that only CPU 7 is in rcu_nocb mode.

Note that "rcu_nocbs" kernel parameter must be passed on boot, even
without a cpulist, so that nocb support is enabled.

Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
Cc: Zefan Li <lizefan.x@...edance.com>
Cc: Tejun Heo <tj@...nel.org>
Cc: Johannes Weiner <hannes@...xchg.org>
Cc: Paul E. McKenney <paulmck@...nel.org>
Cc: Phil Auld <pauld@...hat.com>
Cc: Nicolas Saenz Julienne <nsaenz@...nel.org>
Cc: Marcelo Tosatti <mtosatti@...hat.com>
Cc: Paul Gortmaker <paul.gortmaker@...driver.com>
Cc: Waiman Long <longman@...hat.com>
Cc: Daniel Bristot de Oliveira <bristot@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
---
 kernel/cgroup/cpuset.c | 95 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 3 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9390bfd9f1cd..2d9f019bb590 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -225,6 +225,7 @@ typedef enum {
 	CS_SCHED_LOAD_BALANCE,
 	CS_SPREAD_PAGE,
 	CS_SPREAD_SLAB,
+	CS_RCU_NOCB,
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
@@ -268,6 +269,11 @@ static inline int is_spread_slab(const struct cpuset *cs)
 	return test_bit(CS_SPREAD_SLAB, &cs->flags);
 }
 
+static inline int is_rcu_nocb(const struct cpuset *cs)
+{
+	return test_bit(CS_RCU_NOCB, &cs->flags);
+}
+
 static inline int is_partition_root(const struct cpuset *cs)
 {
 	return cs->partition_root_state > 0;
@@ -590,6 +596,62 @@ static inline void free_cpuset(struct cpuset *cs)
 	kfree(cs);
 }
 
+#ifdef CONFIG_RCU_NOCB_CPU
+static int cpuset_rcu_nocb_apply(struct cpuset *root)
+{
+	int err;
+
+	if (is_rcu_nocb(root))
+		err = housekeeping_cpumask_set(root->effective_cpus, HK_TYPE_RCU);
+	else
+		err = housekeeping_cpumask_clear(root->effective_cpus, HK_TYPE_RCU);
+
+	return err;
+}
+
+static int cpuset_rcu_nocb_update(struct cpuset *cur, struct cpuset *trialcs)
+{
+	struct cgroup_subsys_state *des_css;
+	struct cpuset *des;
+	int err;
+
+	if (cur->partition_root_state != PRS_ENABLED)
+		return -EINVAL;
+
+	err = cpuset_rcu_nocb_apply(trialcs);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	cpuset_for_each_descendant_pre(des, des_css, cur) {
+		if (des == cur)
+			continue;
+		if (des->partition_root_state == PRS_ENABLED)
+			break;
+		spin_lock_irq(&callback_lock);
+		if (is_rcu_nocb(trialcs))
+			set_bit(CS_RCU_NOCB, &des->flags);
+		else
+			clear_bit(CS_RCU_NOCB, &des->flags);
+		spin_unlock_irq(&callback_lock);
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+#else
+static inline int cpuset_rcu_nocb_apply(struct cpuset *root)
+{
+	return 0;
+}
+
+static inline int cpuset_rcu_nocb_update(struct cpuset *cur,
+					 struct cpuset *trialcs)
+{
+	return 0;
+}
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+
 /*
  * validate_change_legacy() - Validate conditions specific to legacy (v1)
  *                            behavior.
@@ -1655,6 +1717,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	if (cs->partition_root_state) {
 		struct cpuset *parent = parent_cs(cs);
 
+		WARN_ON_ONCE(cpuset_rcu_nocb_apply(parent) < 0);
+		WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0);
+
 		/*
 		 * For partition root, update the cpumasks of sibling
 		 * cpusets if they use parent's effective_cpus.
@@ -2012,6 +2077,12 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
 			|| (is_spread_page(cs) != is_spread_page(trialcs)));
 
+	if (is_rcu_nocb(cs) != is_rcu_nocb(trialcs)) {
+		err = cpuset_rcu_nocb_update(cs, trialcs);
+		if (err < 0)
+			goto out;
+	}
+
 	spin_lock_irq(&callback_lock);
 	cs->flags = trialcs->flags;
 	spin_unlock_irq(&callback_lock);
@@ -2365,6 +2436,7 @@ typedef enum {
 	FILE_MEMORY_PRESSURE,
 	FILE_SPREAD_PAGE,
 	FILE_SPREAD_SLAB,
+	FILE_RCU_NOCB,
 } cpuset_filetype_t;
 
 static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -2406,6 +2478,9 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	case FILE_SPREAD_SLAB:
 		retval = update_flag(CS_SPREAD_SLAB, cs, val);
 		break;
+	case FILE_RCU_NOCB:
+		retval = update_flag(CS_RCU_NOCB, cs, val);
+		break;
 	default:
 		retval = -EINVAL;
 		break;
@@ -2573,6 +2648,8 @@ static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
 		return is_spread_page(cs);
 	case FILE_SPREAD_SLAB:
 		return is_spread_slab(cs);
+	case FILE_RCU_NOCB:
+		return is_rcu_nocb(cs);
 	default:
 		BUG();
 	}
@@ -2803,7 +2880,14 @@ static struct cftype dfl_files[] = {
 		.private = FILE_SUBPARTS_CPULIST,
 		.flags = CFTYPE_DEBUG,
 	},
-
+#ifdef CONFIG_RCU_NOCB_CPU
+	{
+		.name = "isolation.rcu_nocb",
+		.read_u64 = cpuset_read_u64,
+		.write_u64 = cpuset_write_u64,
+		.private = FILE_RCU_NOCB,
+	},
+#endif
 	{ }	/* terminate */
 };
 
@@ -2861,6 +2945,8 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 		set_bit(CS_SPREAD_PAGE, &cs->flags);
 	if (is_spread_slab(parent))
 		set_bit(CS_SPREAD_SLAB, &cs->flags);
+	if (is_rcu_nocb(parent))
+		set_bit(CS_RCU_NOCB, &cs->flags);
 
 	cpuset_inc();
 
@@ -3227,12 +3313,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
 	if (mems_updated)
 		check_insane_mems_config(&new_mems);
 
-	if (is_in_v2_mode())
+	if (is_in_v2_mode()) {
 		hotplug_update_tasks(cs, &new_cpus, &new_mems,
 				     cpus_updated, mems_updated);
-	else
+		if (cpus_updated)
+			WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0);
+	} else {
 		hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
 					    cpus_updated, mems_updated);
+	}
 
 	percpu_up_write(&cpuset_rwsem);
 }
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ