[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230508213147.853677542@infradead.org>
Date: Mon, 08 May 2023 23:19:58 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: bigeasy@...utronix.de
Cc: mark.rutland@....com, maz@...nel.org, catalin.marinas@....com,
will@...nel.org, chenhuacai@...nel.org, kernel@...0n.name,
hca@...ux.ibm.com, gor@...ux.ibm.com, agordeev@...ux.ibm.com,
borntraeger@...ux.ibm.com, svens@...ux.ibm.com,
pbonzini@...hat.com, wanpengli@...cent.com, vkuznets@...hat.com,
tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com,
jgross@...e.com, boris.ostrovsky@...cle.com,
daniel.lezcano@...aro.org, kys@...rosoft.com,
haiyangz@...rosoft.com, wei.liu@...nel.org, decui@...rosoft.com,
rafael@...nel.org, peterz@...radead.org, longman@...hat.com,
boqun.feng@...il.com, pmladek@...e.com, senozhatsky@...omium.org,
rostedt@...dmis.org, john.ogness@...utronix.de,
juri.lelli@...hat.com, vincent.guittot@...aro.org,
dietmar.eggemann@....com, bsegall@...gle.com, mgorman@...e.de,
bristot@...hat.com, vschneid@...hat.com, jstultz@...gle.com,
sboyd@...nel.org, linux-kernel@...r.kernel.org,
loongarch@...ts.linux.dev, linux-s390@...r.kernel.org,
kvm@...r.kernel.org, linux-hyperv@...r.kernel.org,
linux-pm@...r.kernel.org
Subject: [RFC][PATCH 7/9] x86/tsc: Provide sched_clock_noinstr()
With the intent to provide local_clock_noinstr(), a variant of
local_clock() that's safe to be called from noinstr code (with the
assumption that any such code will already be non-preemptible),
prepare for things by providing a noinstr sched_clock_noinstr()
function.
Specifically, preempt_enable_*() calls out to schedule(), which upsets
noinstr validation efforts.
vmlinux.o: warning: objtool: native_sched_clock+0x96: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section
vmlinux.o: warning: objtool: kvm_clock_read+0x22: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
arch/x86/kernel/kvmclock.c | 4 +--
arch/x86/kernel/tsc.c | 38 ++++++++++++++++++++++++++++---------
arch/x86/xen/time.c | 3 --
drivers/clocksource/hyperv_timer.c | 4 +--
include/clocksource/hyperv_timer.h | 4 +--
5 files changed, 36 insertions(+), 17 deletions(-)
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -71,7 +71,7 @@ static int kvm_set_wallclock(const struc
return -ENODEV;
}
-static noinstr u64 kvm_clock_read(void)
+static u64 kvm_clock_read(void)
{
u64 ret;
@@ -88,7 +88,7 @@ static u64 kvm_clock_get_cycles(struct c
static noinstr u64 kvm_sched_clock_read(void)
{
- return kvm_clock_read() - kvm_sched_clock_offset;
+ return pvclock_clocksource_read_nowd(this_cpu_pvti()) - kvm_sched_clock_offset;
}
static inline void kvm_sched_clock_init(bool stable)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -69,12 +69,10 @@ static int __init tsc_early_khz_setup(ch
}
early_param("tsc_early_khz", tsc_early_khz_setup);
-__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void __cyc2ns_read(struct cyc2ns_data *data)
{
int seq, idx;
- preempt_disable_notrace();
-
do {
seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
@@ -86,6 +84,12 @@ __always_inline void cyc2ns_read_begin(s
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}
+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+{
+ preempt_disable_notrace();
+ __cyc2ns_read(data);
+}
+
__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
@@ -115,18 +119,25 @@ __always_inline void cyc2ns_read_end(voi
* -johnstul@...ibm.com "math is hard, lets go shopping!"
*/
-static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
- cyc2ns_read_begin(&data);
+ __cyc2ns_read(&data);
ns = data.cyc2ns_offset;
ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
- cyc2ns_read_end();
+ return ns;
+}
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ unsigned long long ns;
+ preempt_disable_notrace();
+ ns = __cycles_2_ns(cyc);
+ preempt_enable_notrace();
return ns;
}
@@ -223,7 +234,7 @@ noinstr u64 native_sched_clock(void)
u64 tsc_now = rdtsc();
/* return the value in ns */
- return cycles_2_ns(tsc_now);
+ return __cycles_2_ns(tsc_now);
}
/*
@@ -250,7 +261,7 @@ u64 native_sched_clock_from_tsc(u64 tsc)
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
-noinstr u64 sched_clock(void)
+noinstr u64 sched_clock_noinstr(void)
{
return paravirt_sched_clock();
}
@@ -260,11 +271,20 @@ bool using_native_sched_clock(void)
return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
-u64 sched_clock(void) __attribute__((alias("native_sched_clock")));
+u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock")));
bool using_native_sched_clock(void) { return true; }
#endif
+notrace u64 sched_clock(void)
+{
+ u64 now;
+ preempt_disable_notrace();
+ now = sched_clock_noinstr();
+ preempt_enable_notrace();
+ return now;
+}
+
int check_tsc_unstable(void)
{
return tsc_unstable;
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -66,11 +66,10 @@ static noinstr u64 xen_sched_clock(void)
struct pvclock_vcpu_time_info *src;
u64 ret;
- preempt_disable_notrace();
src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read_nowd(src);
ret -= xen_sched_clock_offset;
- preempt_enable_notrace();
+
return ret;
}
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -408,9 +408,9 @@ static u64 notrace read_hv_clock_tsc_cs(
return read_hv_clock_tsc();
}
-static u64 notrace read_hv_sched_clock_tsc(void)
+static u64 noinstr read_hv_sched_clock_tsc(void)
{
- return (read_hv_clock_tsc() - hv_sched_clock_offset) *
+ return (hv_read_tsc_page(hv_get_tsc_page()) - hv_sched_clock_offset) *
(NSEC_PER_SEC / HV_CLOCK_HZ);
}
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -38,7 +38,7 @@ extern void hv_remap_tsc_clocksource(voi
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline notrace u64
+static __always_inline notrace u64
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
{
u64 scale, offset;
@@ -85,7 +85,7 @@ hv_read_tsc_page_tsc(const struct ms_hyp
return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
}
-static inline notrace u64
+static __always_inline notrace u64
hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
{
u64 cur_tsc;
Powered by blists - more mailing lists