lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20241108063103.4434-1-00107082@163.com>
Date: Fri,  8 Nov 2024 14:31:03 +0800
From: David Wang <00107082@....com>
To: mingo@...hat.com,
	peterz@...radead.org,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org
Cc: linux-kernel@...r.kernel.org,
	David Wang <00107082@....com>
Subject: [PATCH] kernel:sched:stats:/proc/schedstat: use seq_put_decimal_ull for decimal values

seq_printf is costy, lots of decimal values are yield via seq_printf
when reading /proc/schedstat; Profiling indicates seq_printf takes more
than 90% of samples of show_schedstat:
	show_schedstat(98.974% 667134/674048)
	    seq_printf(97.798% 652441/667134)
		vsnprintf(97.810% 638155/652441)
		    format_decode(23.720% 151368/638155)
		    number(16.797% 107191/638155)
		    memcpy_orig(4.610% 29422/638155)
		    srso_return_thunk(2.738% 17475/638155)
		    bitmap_string.isra.0(0.928% 5921/638155)
		    __memcpy(0.407% 2599/638155)
		    pointer(0.089% 571/638155)
		    srso_safe_ret(0.003% 16/638155)
	    __rcu_read_unlock(0.097% 648/667134)
	    __rcu_read_lock(0.097% 647/667134)

And one million rounds of open/read/close /proc/schedstat took:

	real	1m12.713s
	user	0m0.232s
	sys	1m12.440s
One average, each open/read/close sequence tooks 0.072ms.

With this patch, performance is significantly improved:

	real	0m30.141s
	user	0m0.320s
	sys	0m29.820s
One average, each open/read/close sequence tooks 0.029ms, ~60%
improvement.

Signed-off-by: David Wang <00107082@....com>
---
 kernel/sched/stats.c | 62 +++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..21d8c2edbc43 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -122,15 +122,18 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		rq = cpu_rq(cpu);
 
 		/* runqueue-specific stats */
-		seq_printf(seq,
-		    "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
-		    cpu, rq->yld_count,
-		    rq->sched_count, rq->sched_goidle,
-		    rq->ttwu_count, rq->ttwu_local,
-		    rq->rq_cpu_time,
-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
-		seq_printf(seq, "\n");
+		seq_puts(seq, "cpu");
+		seq_put_decimal_ull(seq, "", cpu);
+		seq_put_decimal_ull(seq, " ", rq->yld_count);
+		seq_put_decimal_ull(seq, " ", 0);
+		seq_put_decimal_ull(seq, " ", rq->sched_count);
+		seq_put_decimal_ull(seq, " ", rq->sched_goidle);
+		seq_put_decimal_ull(seq, " ", rq->ttwu_count);
+		seq_put_decimal_ull(seq, " ", rq->ttwu_local);
+		seq_put_decimal_ull(seq, " ", rq->rq_cpu_time);
+		seq_put_decimal_ull(seq, " ", rq->rq_sched_info.run_delay);
+		seq_put_decimal_ull(seq, " ", rq->rq_sched_info.pcount);
+		seq_putc(seq, '\n');
 
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
@@ -138,26 +141,33 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			seq_printf(seq, "domain%d %*pb", dcount++,
+			seq_puts(seq, "domain");
+			seq_put_decimal_ull(seq, "", dcount++);
+			seq_printf(seq, " %*pb",
 				   cpumask_pr_args(sched_domain_span(sd)));
 			for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
-				    sd->lb_count[itype],
-				    sd->lb_balanced[itype],
-				    sd->lb_failed[itype],
-				    sd->lb_imbalance[itype],
-				    sd->lb_gained[itype],
-				    sd->lb_hot_gained[itype],
-				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_count[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_balanced[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_failed[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_imbalance[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_gained[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_hot_gained[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_nobusyq[itype]);
+				seq_put_decimal_ull(seq, " ", sd->lb_nobusyg[itype]);
 			}
-			seq_printf(seq,
-				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
-			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
-			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
-			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
-			    sd->ttwu_move_balance);
+			seq_put_decimal_ull(seq, " ", sd->alb_count);
+			seq_put_decimal_ull(seq, " ", sd->alb_failed);
+			seq_put_decimal_ull(seq, " ", sd->alb_pushed);
+			seq_put_decimal_ull(seq, " ", sd->sbe_count);
+			seq_put_decimal_ull(seq, " ", sd->sbe_balanced);
+			seq_put_decimal_ull(seq, " ", sd->sbe_pushed);
+			seq_put_decimal_ull(seq, " ", sd->sbf_count);
+			seq_put_decimal_ull(seq, " ", sd->sbf_balanced);
+			seq_put_decimal_ull(seq, " ", sd->sbf_pushed);
+			seq_put_decimal_ull(seq, " ", sd->ttwu_wake_remote);
+			seq_put_decimal_ull(seq, " ", sd->ttwu_move_affine);
+			seq_put_decimal_ull(seq, " ", sd->ttwu_move_balance);
+			seq_putc(seq, '\n');
 		}
 		rcu_read_unlock();
 #endif
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ