lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <aNvu6uDQN7FSr1Gp@shell.ilvokhin.com>
Date: Tue, 30 Sep 2025 14:53:30 +0000
From: Dmitry Ilvokhin <d@...okhin.com>
To: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>
Cc: linux-kernel@...r.kernel.org
Subject: [PATCH] sched/stats: Optimize /proc/schedstat printing

Function seq_printf supports rich format string for decimals printing,
but there is no need for it in /proc/schedstat, since majority of the
data is space separared decimals. Use seq_put_decimal_ull instead as
faster alternative.

Performance counter stats (truncated) for sh -c 'cat /proc/schedstat >
/dev/null' before and after applying the patch from machine with 72 CPUs
are below.

Before:

      2.94 msec task-clock               #    0.820 CPUs utilized
         1      context-switches         #  340.551 /sec
         0      cpu-migrations           #    0.000 /sec
       340      page-faults              #  115.787 K/sec
10,327,200      instructions             #    1.89  insn per cycle
                                         #    0.10  stalled cycles per insn
 5,458,307      cycles                   #    1.859 GHz
 1,052,733      stalled-cycles-frontend  #   19.29% frontend cycles idle
 2,066,321      branches                 #  703.687 M/sec
    25,621      branch-misses            #    1.24% of all branches

0.00357974 +- 0.00000209 seconds time elapsed  ( +-  0.06% )

After:

      2.50 msec task-clock              #    0.785 CPUs utilized
         1      context-switches        #  399.780 /sec
         0      cpu-migrations          #    0.000 /sec
       340      page-faults             #  135.925 K/sec
 7,371,867      instructions            #    1.59  insn per cycle
                                        #    0.13  stalled cycles per insn
 4,647,053      cycles                  #    1.858 GHz
   986,487      stalled-cycles-frontend #   21.23% frontend cycles idle
 1,591,374      branches                #  636.199 M/sec
    28,973      branch-misses           #    1.82% of all branches

0.00318461 +- 0.00000295 seconds time elapsed  ( +-  0.09% )

This is ~11% (relative) improvement in time elapsed.

Signed-off-by: Dmitry Ilvokhin <d@...okhin.com>
---
 kernel/sched/stats.c | 86 ++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index d1c9429a4ac5..b304f821e8ff 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -98,6 +98,56 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
 	}
 }
 
+static void show_runqueue_stats(struct seq_file *seq, int cpu, struct rq *rq)
+{
+	seq_printf(seq, "cpu%d", cpu);
+	seq_put_decimal_ull(seq, " ", rq->yld_count);
+	seq_put_decimal_ull(seq, " ", 0);
+	seq_put_decimal_ull(seq, " ", rq->sched_count);
+	seq_put_decimal_ull(seq, " ", rq->sched_goidle);
+	seq_put_decimal_ull(seq, " ", rq->ttwu_count);
+	seq_put_decimal_ull(seq, " ", rq->ttwu_local);
+	seq_put_decimal_ull(seq, " ", rq->rq_cpu_time);
+	seq_put_decimal_ull(seq, " ", rq->rq_sched_info.run_delay);
+	seq_put_decimal_ull(seq, " ", rq->rq_sched_info.pcount);
+	seq_putc(seq, '\n');
+}
+
+static void show_domain_stats(struct seq_file *seq, int dcount,
+			      struct sched_domain *sd)
+{
+	enum cpu_idle_type itype;
+
+	seq_printf(seq, "domain%d %s %*pb", dcount, sd->name,
+		   cpumask_pr_args(sched_domain_span(sd)));
+	for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
+		seq_put_decimal_ull(seq, " ", sd->lb_count[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_balanced[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_failed[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_imbalance_load[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_imbalance_util[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_imbalance_task[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_imbalance_misfit[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_gained[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_hot_gained[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_nobusyq[itype]);
+		seq_put_decimal_ull(seq, " ", sd->lb_nobusyg[itype]);
+	}
+	seq_put_decimal_ull(seq, " ", sd->alb_count);
+	seq_put_decimal_ull(seq, " ", sd->alb_failed);
+	seq_put_decimal_ull(seq, " ", sd->alb_pushed);
+	seq_put_decimal_ull(seq, " ", sd->sbe_count);
+	seq_put_decimal_ull(seq, " ", sd->sbe_balanced);
+	seq_put_decimal_ull(seq, " ", sd->sbe_pushed);
+	seq_put_decimal_ull(seq, " ", sd->sbf_count);
+	seq_put_decimal_ull(seq, " ", sd->sbf_balanced);
+	seq_put_decimal_ull(seq, " ", sd->sbf_pushed);
+	seq_put_decimal_ull(seq, " ", sd->ttwu_wake_remote);
+	seq_put_decimal_ull(seq, " ", sd->ttwu_move_affine);
+	seq_put_decimal_ull(seq, " ", sd->ttwu_move_balance);
+	seq_putc(seq, '\n');
+}
+
 /*
  * Current schedstat API version.
  *
@@ -121,44 +171,12 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		rq = cpu_rq(cpu);
 
 		/* runqueue-specific stats */
-		seq_printf(seq,
-		    "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
-		    cpu, rq->yld_count,
-		    rq->sched_count, rq->sched_goidle,
-		    rq->ttwu_count, rq->ttwu_local,
-		    rq->rq_cpu_time,
-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
-		seq_printf(seq, "\n");
+		show_runqueue_stats(seq, cpu, rq);
 
 		/* domain-specific stats */
 		rcu_read_lock();
 		for_each_domain(cpu, sd) {
-			enum cpu_idle_type itype;
-
-			seq_printf(seq, "domain%d %s %*pb", dcount++, sd->name,
-				   cpumask_pr_args(sched_domain_span(sd)));
-			for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
-				    sd->lb_count[itype],
-				    sd->lb_balanced[itype],
-				    sd->lb_failed[itype],
-				    sd->lb_imbalance_load[itype],
-				    sd->lb_imbalance_util[itype],
-				    sd->lb_imbalance_task[itype],
-				    sd->lb_imbalance_misfit[itype],
-				    sd->lb_gained[itype],
-				    sd->lb_hot_gained[itype],
-				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype]);
-			}
-			seq_printf(seq,
-				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
-			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
-			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
-			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
-			    sd->ttwu_move_balance);
+			show_domain_stats(seq, dcount++, sd);
 		}
 		rcu_read_unlock();
 	}
-- 
2.47.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ