[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20241108063103.4434-1-00107082@163.com>
Date: Fri, 8 Nov 2024 14:31:03 +0800
From: David Wang <00107082@....com>
To: mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org
Cc: linux-kernel@...r.kernel.org,
David Wang <00107082@....com>
Subject: [PATCH] kernel:sched:stats:/proc/schedstat: use seq_put_decimal_ull for decimal values
seq_printf is costy, lots of decimal values are yield via seq_printf
when reading /proc/schedstat; Profiling indicates seq_printf takes more
than 90% of samples of show_schedstat:
show_schedstat(98.974% 667134/674048)
seq_printf(97.798% 652441/667134)
vsnprintf(97.810% 638155/652441)
format_decode(23.720% 151368/638155)
number(16.797% 107191/638155)
memcpy_orig(4.610% 29422/638155)
srso_return_thunk(2.738% 17475/638155)
bitmap_string.isra.0(0.928% 5921/638155)
__memcpy(0.407% 2599/638155)
pointer(0.089% 571/638155)
srso_safe_ret(0.003% 16/638155)
__rcu_read_unlock(0.097% 648/667134)
__rcu_read_lock(0.097% 647/667134)
And one million rounds of open/read/close /proc/schedstat took:
real 1m12.713s
user 0m0.232s
sys 1m12.440s
One average, each open/read/close sequence tooks 0.072ms.
With this patch, performance is significantly improved:
real 0m30.141s
user 0m0.320s
sys 0m29.820s
One average, each open/read/close sequence tooks 0.029ms, ~60%
improvement.
Signed-off-by: David Wang <00107082@....com>
---
kernel/sched/stats.c | 62 +++++++++++++++++++++++++-------------------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..21d8c2edbc43 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -122,15 +122,18 @@ static int show_schedstat(struct seq_file *seq, void *v)
rq = cpu_rq(cpu);
/* runqueue-specific stats */
- seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
- cpu, rq->yld_count,
- rq->sched_count, rq->sched_goidle,
- rq->ttwu_count, rq->ttwu_local,
- rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
- seq_printf(seq, "\n");
+ seq_puts(seq, "cpu");
+ seq_put_decimal_ull(seq, "", cpu);
+ seq_put_decimal_ull(seq, " ", rq->yld_count);
+ seq_put_decimal_ull(seq, " ", 0);
+ seq_put_decimal_ull(seq, " ", rq->sched_count);
+ seq_put_decimal_ull(seq, " ", rq->sched_goidle);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_count);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_local);
+ seq_put_decimal_ull(seq, " ", rq->rq_cpu_time);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.run_delay);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.pcount);
+ seq_putc(seq, '\n');
#ifdef CONFIG_SMP
/* domain-specific stats */
@@ -138,26 +141,33 @@ static int show_schedstat(struct seq_file *seq, void *v)
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
- seq_printf(seq, "domain%d %*pb", dcount++,
+ seq_puts(seq, "domain");
+ seq_put_decimal_ull(seq, "", dcount++);
+ seq_printf(seq, " %*pb",
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u",
- sd->lb_count[itype],
- sd->lb_balanced[itype],
- sd->lb_failed[itype],
- sd->lb_imbalance[itype],
- sd->lb_gained[itype],
- sd->lb_hot_gained[itype],
- sd->lb_nobusyq[itype],
- sd->lb_nobusyg[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_count[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_balanced[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_failed[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_hot_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyq[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyg[itype]);
}
- seq_printf(seq,
- " %u %u %u %u %u %u %u %u %u %u %u %u\n",
- sd->alb_count, sd->alb_failed, sd->alb_pushed,
- sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
- sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
- sd->ttwu_wake_remote, sd->ttwu_move_affine,
- sd->ttwu_move_balance);
+ seq_put_decimal_ull(seq, " ", sd->alb_count);
+ seq_put_decimal_ull(seq, " ", sd->alb_failed);
+ seq_put_decimal_ull(seq, " ", sd->alb_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbe_count);
+ seq_put_decimal_ull(seq, " ", sd->sbe_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbe_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbf_count);
+ seq_put_decimal_ull(seq, " ", sd->sbf_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbf_pushed);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_wake_remote);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_affine);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_balance);
+ seq_putc(seq, '\n');
}
rcu_read_unlock();
#endif
--
2.39.2
Powered by blists - more mailing lists