lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20250116081234.16222-1-15645113830zzh@gmail.com>
Date: Thu, 16 Jan 2025 16:12:35 +0800
From: zihan zhou <15645113830zzh@...il.com>
To: vincent.guittot@...aro.org
Cc: 15645113830zzh@...il.com,
	bsegall@...gle.com,
	dietmar.eggemann@....com,
	juri.lelli@...hat.com,
	linux-kernel@...r.kernel.org,
	mgorman@...e.de,
	mingo@...hat.com,
	peterz@...radead.org,
	rostedt@...dmis.org,
	vschneid@...hat.com,
	yaowenchao@...com,
	yaozhenguo@...com
Subject: Re: [PATCH V2] sched: Forward deadline for early tick

Thanks for your reply!

> default slice = 0.75 msec * (1 + ilog(ncpus)) with ncpus capped at 8
> which means that we have a default slice of
> 0.75 for 1 cpu
> 1.50 up to 3 cpus
> 2.25 up to 7 cpus
> 3.00 for 8 cpus and above
> 
> For HZ=250 and HZ=100, all values are "ok". By "ok", I mean that task
> will not get an extra tick but their runtime remains far higher than
> their slice. The only config that has an issue is HZ=1000 with 8 cpus
> or more.
> 
> Using 0.70 instead of 0.75 should not change much for other configs
> and would fix this config too with a default slice equals 2.8ms.
> 0.70 for 1 cpu
> 1.40 up to 3 cpus
> 2.10 up to 7 cpus
> 2.8 for 8 cpus and above

It seems that changing the default slice is a good idea that won't have
too much impact and solves the main problem. I strongly agree with it,
which is more reasonable than the forward deadline.

> That being said the problem remains the same if a task sets its custom
> slice being a multiple of ticks or the time stolen by interrupts is
> higher than 66us per tick in average

Additionally, can we add some warnings on this basis? We do not prevent
slices from being integer multiples of the tick, but we do not recommend
doing it.

Here is the patch I have written based on the above logic, looking forward
to your feedback.


Signed-off-by: zihan zhou <15645113830zzh@...il.com>
---
 kernel/sched/debug.c    | 48 ++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/fair.c     |  6 +++---
 kernel/sched/syscalls.c |  4 ++++
 3 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index a1be00a988bf..da17d19082ba 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -166,6 +166,52 @@ static const struct file_operations sched_feat_fops = {
 	.release	= single_release,
 };
 
+
+static ssize_t sched_base_slice_write(struct file *filp, const char __user *ubuf,
+				   size_t cnt, loff_t *ppos)
+{
+	char buf[16];
+	unsigned int base_slice;
+
+	if (cnt > 15)
+		cnt = 15;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+	buf[cnt] = '\0';
+
+	if (kstrtouint(buf, 10, &base_slice))
+		return -EINVAL;
+
+
+	sysctl_sched_base_slice = base_slice;
+
+	if (sysctl_sched_base_slice % TICK_NSEC == 0)
+		pr_warn("It is not recommended to set the slice to an integer multiple of the tick\n");
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static int sched_base_slice_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%d\n", sysctl_sched_base_slice);
+	return 0;
+}
+
+static int sched_base_slice_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_base_slice_show, NULL);
+}
+
+static const struct file_operations sched_base_slice_fops = {
+	.open		= sched_base_slice_open,
+	.write		= sched_base_slice_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 #ifdef CONFIG_SMP
 
 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
@@ -505,7 +551,7 @@ static __init int sched_init_debug(void)
 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
-	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
+	debugfs_create_file("base_slice_ns", 0644, debugfs_sched, NULL, &sched_base_slice_fops);
 
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3e9ca38512de..27f7cf205741 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -71,10 +71,10 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
 /*
  * Minimal preemption granularity for CPU-bound tasks:
  *
- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
-unsigned int sysctl_sched_base_slice			= 750000ULL;
-static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
+unsigned int sysctl_sched_base_slice			= 700000ULL;
+static unsigned int normalized_sysctl_sched_base_slice	= 700000ULL;
 
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index ff0e5ab4e37c..d0f90d61398f 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -309,6 +309,10 @@ static void __setscheduler_params(struct task_struct *p,
 			p->se.slice = clamp_t(u64, attr->sched_runtime,
 					      NSEC_PER_MSEC/10,   /* HZ=1000 * 10 */
 					      NSEC_PER_MSEC*100); /* HZ=100  / 10 */
+
+			if (p->se.slice % TICK_NSEC == 0)
+				pr_warn("It is not recommended to set the slice to an integer multiple of the tick\n");
+
 		} else {
 			p->se.custom_slice = 0;
 			p->se.slice = sysctl_sched_base_slice;
-- 
2.33.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ