lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221019031551.24312-1-zhouchuyi@bytedance.com>
Date:   Wed, 19 Oct 2022 11:15:51 +0800
From:   Chuyi Zhou <zhouchuyi@...edance.com>
To:     mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        vincent.guittot@...aro.org
Cc:     linux-kernel@...r.kernel.org, htejun@...il.com,
        lizefan.x@...edance.com, vschneid@...hat.com, bsegall@...gle.com,
        Chuyi Zhou <zhouchuyi@...edance.com>,
        Abel Wu <wuyun.abel@...edance.com>
Subject: [RESEND] sched/fair: Add min_ratio for cfs bandwidth_control

Tasks may be throttled when holding locks for a long time by current
cfs bandwidth control mechanism once users set a too small quota/period
ratio, which can result whole system get stuck[1].

In order to prevent the above situation from happening, this patch adds
sysctl_sched_cfs_bandwidth_min_ratio in /proc/sys/kernel, which indicates
the minimum percentage of quota/period users can set. The default value is
zero and users can set quota and period without triggering this constraint.

Link[1]:https://lore.kernel.org/lkml/5987be34-b527-4ff5-a17d-5f6f0dc94d6d@huawei.com/T/
Signed-off-by: Chuyi Zhou <zhouchuyi@...edance.com>
Suggested-by: Abel Wu <wuyun.abel@...edance.com>
---
 include/linux/sched/sysctl.h |  4 ++++
 kernel/sched/core.c          | 23 +++++++++++++++++++++++
 kernel/sysctl.c              | 10 ++++++++++
 3 files changed, 37 insertions(+)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 303ee7dd0c7e..dedb18648f0e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -21,6 +21,10 @@ enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_END,
 };
 
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_min_ratio;
+#endif
+
 #define NUMA_BALANCING_DISABLED		0x0
 #define NUMA_BALANCING_NORMAL		0x1
 #define NUMA_BALANCING_MEMORY_TIERING	0x2
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5800b0623ff3..8f6cfd889e37 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10504,6 +10504,12 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
 }
 
 #ifdef CONFIG_CFS_BANDWIDTH
+/*
+ * The minimum of quota/period ratio users can set, default is zero and users can set
+ * quota and period without triggering this constraint.
+ */
+unsigned int sysctl_sched_cfs_bandwidth_min_ratio;
+
 static DEFINE_MUTEX(cfs_constraints_mutex);
 
 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
@@ -10513,6 +10519,20 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
+static int check_cfs_bandwidth_min_ratio(u64 period, u64 quota)
+{
+	u64 ratio;
+
+	if (!sysctl_sched_cfs_bandwidth_min_ratio)
+		return 0;
+
+	ratio = div64_u64(quota * 100, period);
+	if (ratio < sysctl_sched_cfs_bandwidth_min_ratio)
+		return -1;
+
+	return 0;
+}
+
 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 				u64 burst)
 {
@@ -10548,6 +10568,9 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 				     burst + quota > max_cfs_runtime))
 		return -EINVAL;
 
+	if (quota != RUNTIME_INF && check_cfs_bandwidth_min_ratio(period, quota))
+		return -EINVAL;
+
 	/*
 	 * Prevent race between setting of cfs_rq->runtime_enabled and
 	 * unthrottle_offline_cfs_rqs().
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 188c305aeb8b..7d9743e8e514 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1652,6 +1652,16 @@ static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 	},
 #endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.procname	= "sched_cfs_bandwidth_min_ratio",
+		.data		= &sysctl_sched_cfs_bandwidth_min_ratio,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif /* CONFIG_CFS_BANDWIDTH */
 	{
 		.procname	= "panic",
 		.data		= &panic_timeout,
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ