[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fbb509e8-0bd6-480f-be32-fd0895255a21@suse.com>
Date: Tue, 22 Apr 2025 10:38:57 +0200
From: Jürgen Groß <jgross@...e.com>
To: "Xin Li (Intel)" <xin@...or.com>, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org, linux-perf-users@...r.kernel.org,
linux-hyperv@...r.kernel.org, virtualization@...ts.linux.dev,
linux-pm@...r.kernel.org, linux-edac@...r.kernel.org,
xen-devel@...ts.xenproject.org, linux-acpi@...r.kernel.org,
linux-hwmon@...r.kernel.org, netdev@...r.kernel.org,
platform-driver-x86@...r.kernel.org
Cc: tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com, acme@...nel.org,
andrew.cooper3@...rix.com, peterz@...radead.org, namhyung@...nel.org,
mark.rutland@....com, alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
irogers@...gle.com, adrian.hunter@...el.com, kan.liang@...ux.intel.com,
wei.liu@...nel.org, ajay.kaher@...adcom.com,
bcm-kernel-feedback-list@...adcom.com, tony.luck@...el.com,
pbonzini@...hat.com, vkuznets@...hat.com, seanjc@...gle.com,
luto@...nel.org, boris.ostrovsky@...cle.com, kys@...rosoft.com,
haiyangz@...rosoft.com, decui@...rosoft.com
Subject: Re: [RFC PATCH v2 06/34] x86/msr: Use the alternatives mechanism to
read PMC
On 22.04.25 10:21, Xin Li (Intel) wrote:
> To eliminate the indirect call overhead introduced by the pv_ops API,
> use the alternatives mechanism to read PMC:
Which indirect call overhead? The indirect call is patched via the
alternative mechanism to a direct one.
>
> 1) When built with !CONFIG_XEN_PV, X86_FEATURE_XENPV becomes a
> disabled feature, preventing the Xen PMC read code from being
> built and ensuring the native code is executed unconditionally.
Without CONFIG_XEN_PV CONFIG_PARAVIRT_XXL is not selected, resulting in
native code anyway.
>
> 2) When built with CONFIG_XEN_PV:
>
> 2.1) If not running on the Xen hypervisor (!X86_FEATURE_XENPV),
> the kernel runtime binary is patched to unconditionally
> jump to the native PMC read code.
>
> 2.2) If running on the Xen hypervisor (X86_FEATURE_XENPV), the
> kernel runtime binary is patched to unconditionally jump
> to the Xen PMC read code.
>
> Consequently, remove the pv_ops PMC read API.
I don't see the value of this patch.
It adds more #ifdef and code lines without any real gain.
In case the x86 maintainers think it is still worth it, I won't object.
Juergen
>
> Signed-off-by: Xin Li (Intel) <xin@...or.com>
> ---
> arch/x86/include/asm/msr.h | 31 ++++++++++++++++++++-------
> arch/x86/include/asm/paravirt.h | 5 -----
> arch/x86/include/asm/paravirt_types.h | 2 --
> arch/x86/kernel/paravirt.c | 1 -
> arch/x86/xen/enlighten_pv.c | 2 --
> drivers/net/vmxnet3/vmxnet3_drv.c | 2 +-
> 6 files changed, 24 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
> index 01dc8e61ef97..33cf506e2fd6 100644
> --- a/arch/x86/include/asm/msr.h
> +++ b/arch/x86/include/asm/msr.h
> @@ -8,6 +8,7 @@
>
> #include <asm/asm.h>
> #include <asm/errno.h>
> +#include <asm/cpufeature.h>
> #include <asm/cpumask.h>
> #include <uapi/asm/msr.h>
> #include <asm/shared/msr.h>
> @@ -73,6 +74,10 @@ static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
> static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
> #endif
>
> +#ifdef CONFIG_XEN_PV
> +extern u64 xen_read_pmc(int counter);
> +#endif
> +
> /*
> * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR
> * accessors and should not have any tracing or other functionality piggybacking
> @@ -170,16 +175,32 @@ native_write_msr_safe(u32 msr, u32 low, u32 high)
> extern int rdmsr_safe_regs(u32 regs[8]);
> extern int wrmsr_safe_regs(u32 regs[8]);
>
> -static inline u64 native_read_pmc(int counter)
> +static __always_inline u64 native_rdpmcq(int counter)
> {
> DECLARE_ARGS(val, low, high);
>
> - asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
> + asm_inline volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
> +
> if (tracepoint_enabled(rdpmc))
> do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
> +
> return EAX_EDX_VAL(val, low, high);
> }
>
> +static __always_inline u64 rdpmcq(int counter)
> +{
> +#ifdef CONFIG_XEN_PV
> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> + return xen_read_pmc(counter);
> +#endif
> +
> + /*
> + * 1) When built with !CONFIG_XEN_PV.
> + * 2) When built with CONFIG_XEN_PV but not running on Xen hypervisor.
> + */
> + return native_rdpmcq(counter);
> +}
> +
> #ifdef CONFIG_PARAVIRT_XXL
> #include <asm/paravirt.h>
> #else
> @@ -233,12 +254,6 @@ static inline int rdmsrq_safe(u32 msr, u64 *p)
> *p = native_read_msr_safe(msr, &err);
> return err;
> }
> -
> -static __always_inline u64 rdpmcq(int counter)
> -{
> - return native_read_pmc(counter);
> -}
> -
> #endif /* !CONFIG_PARAVIRT_XXL */
>
> /* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 590824916394..c7689f5f70d6 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -239,11 +239,6 @@ static inline int rdmsrq_safe(unsigned msr, u64 *p)
> return err;
> }
>
> -static __always_inline u64 rdpmcq(int counter)
> -{
> - return PVOP_CALL1(u64, cpu.read_pmc, counter);
> -}
> -
> static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
> {
> PVOP_VCALL2(cpu.alloc_ldt, ldt, entries);
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index 631c306ce1ff..475f508531d6 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -101,8 +101,6 @@ struct pv_cpu_ops {
> u64 (*read_msr_safe)(unsigned int msr, int *err);
> int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
>
> - u64 (*read_pmc)(int counter);
> -
> void (*start_context_switch)(struct task_struct *prev);
> void (*end_context_switch)(struct task_struct *next);
> #endif
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index 1ccd05d8999f..28d195ad7514 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -132,7 +132,6 @@ struct paravirt_patch_template pv_ops = {
> .cpu.write_msr = native_write_msr,
> .cpu.read_msr_safe = native_read_msr_safe,
> .cpu.write_msr_safe = native_write_msr_safe,
> - .cpu.read_pmc = native_read_pmc,
> .cpu.load_tr_desc = native_load_tr_desc,
> .cpu.set_ldt = native_set_ldt,
> .cpu.load_gdt = native_load_gdt,
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 846b5737d320..9fbe187aff00 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -1236,8 +1236,6 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
> .read_msr_safe = xen_read_msr_safe,
> .write_msr_safe = xen_write_msr_safe,
>
> - .read_pmc = xen_read_pmc,
> -
> .load_tr_desc = paravirt_nop,
> .set_ldt = xen_set_ldt,
> .load_gdt = xen_load_gdt,
> diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
> index 7edd0b5e0e77..8af3b4d7ef4d 100644
> --- a/drivers/net/vmxnet3/vmxnet3_drv.c
> +++ b/drivers/net/vmxnet3/vmxnet3_drv.c
> @@ -151,7 +151,7 @@ static u64
> vmxnet3_get_cycles(int pmc)
> {
> #ifdef CONFIG_X86
> - return native_read_pmc(pmc);
> + return native_rdpmcq(pmc);
> #else
> return 0;
> #endif
Download attachment "OpenPGP_0xB0DE9DD628BF132F.asc" of type "application/pgp-keys" (3684 bytes)
Download attachment "OpenPGP_signature.asc" of type "application/pgp-signature" (496 bytes)
Powered by blists - more mailing lists