[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230420095610.GA153295@ziqianlu-desk2>
Date: Thu, 20 Apr 2023 17:56:10 +0800
From: Aaron Lu <aaron.lu@...el.com>
To: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
CC: Peter Zijlstra <peterz@...radead.org>,
<linux-kernel@...r.kernel.org>, Olivier Dion <odion@...icios.com>,
<michael.christie@...cle.com>
Subject: Re: [RFC PATCH v9 2/2] sched: Fix performance regression introduced
by mm_cid
On Wed, Apr 19, 2023 at 11:50:12AM -0400, Mathieu Desnoyers wrote:
> Introduce per-mm/cpu current concurrency id (mm_cid) to fix a PostgreSQL
> sysbench regression reported by Aaron Lu.
mm_cid_get() dropped to 5.x% after I disable CONFIG_DEBUG_PREEMPT, using
__this_cpu_X() doesn't help, I suppose that is because __this_cpu_X()
still needs to fetch mm->pcpu_cid.
Annotate mm_cid_get():
│ static inline int mm_cid_get(struct mm_struct *mm)
│ {
0.05 │ push %rbp
0.02 │ mov %rsp,%rbp
│ push %r15
│ push %r14
│ push %r13
│ push %r12
│ push %rbx
0.02 │ sub $0x10,%rsp
│ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
71.30 │ mov 0x60(%rdi),%r12
│ struct cpumask *cpumask;
│ int cid;
│
│ lockdep_assert_rq_held(rq);
│ cpumask = mm_cidmask(mm);
│ cid = __this_cpu_read(pcpu_cid->cid);
28.44 │ mov %gs:0x8(%r12),%edx
│ if (mm_cid_is_valid(cid)) {
sched_mm_cid_migrate_to() is 4.x% and its annotation :
│ dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq));
│ mov -0x30(%rbp),%rax
54.53 │ mov 0x60(%r13),%rbx
19.61 │ movslq 0xaf0(%rax),%r15
The reason why accessing mm->pcpu_cid is so costly is still a myth to
me...
BTW, I used below diff to mitigate the incorrect rq lock issue I
described in my reply to v8:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6e2dd8f4ee3..f16418731866 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11662,7 +11662,7 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
return;
}
/* Move src_cid to dst cpu. */
- mm_cid_snapshot_time(mm);
+ mm_cid_snapshot_time(mm, cpu_of(dst_rq));
WRITE_ONCE(dst_pcpu_cid->cid, src_cid);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d1d470441422..8b6a0c8ed3d1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3348,12 +3348,13 @@ static inline int __mm_cid_try_get(struct mm_struct *mm)
* Save a snapshot of the current runqueue time of this cpu
* with the per-cpu cid value, allowing to estimate how recently it was used.
*/
-static inline void mm_cid_snapshot_time(struct mm_struct *mm)
+static inline void mm_cid_snapshot_time(struct mm_struct *mm, int cpu)
{
- struct rq *rq = this_rq();
+ struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu);
+ struct rq *rq = cpu_rq(cpu);
lockdep_assert_rq_held(rq);
- __this_cpu_write(mm->pcpu_cid->time, rq->clock);
+ WRITE_ONCE(pcpu_cid->time, rq->clock);
}
static inline int __mm_cid_get(struct mm_struct *mm)
@@ -3404,7 +3405,7 @@ static inline int __mm_cid_get(struct mm_struct *mm)
unlock:
raw_spin_unlock(&cid_lock);
end:
- mm_cid_snapshot_time(mm);
+ mm_cid_snapshot_time(mm, raw_smp_processor_id());
return cid;
}
@@ -3419,7 +3420,7 @@ static inline int mm_cid_get(struct mm_struct *mm)
cpumask = mm_cidmask(mm);
cid = __this_cpu_read(pcpu_cid->cid);
if (mm_cid_is_valid(cid)) {
- mm_cid_snapshot_time(mm);
+ mm_cid_snapshot_time(mm, raw_smp_processor_id());
return cid;
}
if (mm_cid_is_lazy_put(cid)) {
@@ -3467,7 +3468,7 @@ static inline void switch_mm_cid(struct task_struct *prev,
*/
}
if (prev->mm_cid_active) {
- mm_cid_snapshot_time(prev->mm);
+ mm_cid_snapshot_time(prev->mm, raw_smp_processor_id());
mm_cid_put_lazy(prev);
prev->mm_cid = -1;
}
Powered by blists - more mailing lists