[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <874iqbfj1n.wl-maz@kernel.org>
Date: Sun, 30 Nov 2025 18:54:12 +0000
From: Marc Zyngier <maz@...nel.org>
To: Vincent Donnefort <vdonnefort@...gle.com>
Cc: rostedt@...dmis.org,
mhiramat@...nel.org,
mathieu.desnoyers@...icios.com,
linux-trace-kernel@...r.kernel.org,
oliver.upton@...ux.dev,
joey.gouly@....com,
suzuki.poulose@....com,
yuzenghui@...wei.com,
kvmarm@...ts.linux.dev,
linux-arm-kernel@...ts.infradead.org,
jstultz@...gle.com,
qperret@...gle.com,
will@...nel.org,
aneesh.kumar@...nel.org,
kernel-team@...roid.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 25/28] KVM: arm64: Add event support to the pKVM hyp and trace remote
On Fri, 07 Nov 2025 09:38:37 +0000,
Vincent Donnefort <vdonnefort@...gle.com> wrote:
>
> Allow the creation of hypervisor and trace remote events with a single
> macro HYP_EVENT(). That macro expands in the kernel side to add all
> the required declarations (based on REMOTE_EVENT()) as well as in the
> hypervisor side to create the trace_<event>() function.
>
> Signed-off-by: Vincent Donnefort <vdonnefort@...gle.com>
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 4faabf398881..f7b29eae7010 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -95,6 +95,7 @@ enum __kvm_host_smccc_func {
> __KVM_HOST_SMCCC_FUNC___pkvm_enable_tracing,
> __KVM_HOST_SMCCC_FUNC___pkvm_reset_tracing,
> __KVM_HOST_SMCCC_FUNC___pkvm_swap_reader_tracing,
> + __KVM_HOST_SMCCC_FUNC___pkvm_enable_event,
nit: add 'tracing' to the name of the function, like its little
friends. Saves us from wondering whether this is about PMU events or
not...
> };
>
> #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
> diff --git a/arch/arm64/include/asm/kvm_define_hypevents.h b/arch/arm64/include/asm/kvm_define_hypevents.h
> new file mode 100644
> index 000000000000..0ef5a9eefcbe
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_define_hypevents.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef HYP_EVENT_FILE
> +# undef __ARM64_KVM_HYPEVENTS_H_
> +# define REMOTE_EVENT_INCLUDE_FILE arch/arm64/include/asm/kvm_hypevents.h
> +#else
> +# define REMOTE_EVENT_INCLUDE_FILE HYP_EVENT_FILE
> +#endif
I'm feeling a bit sick here. Can you please document here how the
whole repainting trickery works, how the event equivalence works, and
what the whole thing depends on? I *really* don't want to have to
reverse engineer this stuff when it will break.
> +
> +#define REMOTE_EVENT_SECTION "_hyp_events"
> +
> +#define HE_STRUCT(__args) __args
> +#define HE_PRINTK(__args...) __args
> +#define he_field re_field
> +
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
> + REMOTE_EVENT(__name, 0, RE_STRUCT(__struct), RE_PRINTK(__printk))
> +
> +#define HYP_EVENT_MULTI_READ
> +
> +#include <trace/define_remote_events.h>
> diff --git a/arch/arm64/include/asm/kvm_hypevents.h b/arch/arm64/include/asm/kvm_hypevents.h
> new file mode 100644
> index 000000000000..d6e033c96c52
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_hypevents.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#if !defined(__ARM64_KVM_HYPEVENTS_H_) || defined(HYP_EVENT_MULTI_READ)
> +#define __ARM64_KVM_HYPEVENTS_H_
> +
> +#ifdef __KVM_NVHE_HYPERVISOR__
> +#include <nvhe/trace.h>
> +#endif
> +
> +#endif
> diff --git a/arch/arm64/include/asm/kvm_hyptrace.h b/arch/arm64/include/asm/kvm_hyptrace.h
> index 9c30a479bc36..d6e0953a07d6 100644
> --- a/arch/arm64/include/asm/kvm_hyptrace.h
> +++ b/arch/arm64/include/asm/kvm_hyptrace.h
> @@ -10,4 +10,17 @@ struct hyp_trace_desc {
> struct trace_buffer_desc trace_buffer_desc;
>
> };
> +
> +struct hyp_event_id {
> + unsigned short id;
> + void *data;
> +};
> +
> +extern struct remote_event __hyp_events_start[];
> +extern struct remote_event __hyp_events_end[];
> +
> +/* hyp_event section used by the hypervisor */
> +extern struct hyp_event_id __hyp_event_ids_start[];
> +extern struct hyp_event_id __hyp_event_ids_end[];
> +
> #endif
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 5369763606e7..c0efa9aa541b 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -137,6 +137,10 @@ KVM_NVHE_ALIAS(__hyp_data_start);
> KVM_NVHE_ALIAS(__hyp_data_end);
> KVM_NVHE_ALIAS(__hyp_rodata_start);
> KVM_NVHE_ALIAS(__hyp_rodata_end);
> +#ifdef CONFIG_PKVM_TRACING
> +KVM_NVHE_ALIAS(__hyp_event_ids_start);
> +KVM_NVHE_ALIAS(__hyp_event_ids_end);
> +#endif
>
> /* pKVM static key */
> KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index ad6133b89e7a..0e201a3c8de5 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -13,12 +13,23 @@
> *(__kvm_ex_table) \
> __stop___kvm_ex_table = .;
>
> +#ifdef CONFIG_PKVM_TRACING
> +#define HYPERVISOR_EVENT_IDS \
> + . = ALIGN(PAGE_SIZE); \
> + __hyp_event_ids_start = .; \
> + *(HYP_SECTION_NAME(.event_ids)) \
> + __hyp_event_ids_end = .;
> +#else
> +#define HYPERVISOR_EVENT_IDS
> +#endif
> +
> #define HYPERVISOR_RODATA_SECTIONS \
> HYP_SECTION_NAME(.rodata) : { \
> . = ALIGN(PAGE_SIZE); \
> __hyp_rodata_start = .; \
> *(HYP_SECTION_NAME(.data..ro_after_init)) \
> *(HYP_SECTION_NAME(.rodata)) \
> + HYPERVISOR_EVENT_IDS \
> . = ALIGN(PAGE_SIZE); \
> __hyp_rodata_end = .; \
> }
> @@ -307,6 +318,13 @@ SECTIONS
>
> HYPERVISOR_DATA_SECTION
>
> +#ifdef CONFIG_PKVM_TRACING
> + .data.hyp_events : {
> + __hyp_events_start = .;
> + *(SORT(_hyp_events.*))
> + __hyp_events_end = .;
> + }
> +#endif
> /*
> * Data written with the MMU off but read with the MMU on requires
> * cache lines to be invalidated, discarding up to a Cache Writeback
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/define_events.h b/arch/arm64/kvm/hyp/include/nvhe/define_events.h
> new file mode 100644
> index 000000000000..2298b49cb355
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/include/nvhe/define_events.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef HYP_EVENT_FILE
> +# define __HYP_EVENT_FILE <asm/kvm_hypevents.h>
> +#else
> +# define __HYP_EVENT_FILE __stringify(HYP_EVENT_FILE)
> +#endif
> +
> +#undef HYP_EVENT
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
> + atomic_t __ro_after_init __name##_enabled = ATOMIC_INIT(0); \
> + struct hyp_event_id hyp_event_id_##__name \
> + __section(".hyp.event_ids."#__name) = { \
> + .data = (void *)&__name##_enabled, \
> + }
> +
> +#define HYP_EVENT_MULTI_READ
> +#include __HYP_EVENT_FILE
> +#undef HYP_EVENT_MULTI_READ
> +
> +#undef HYP_EVENT
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/trace.h b/arch/arm64/kvm/hyp/include/nvhe/trace.h
> index 0d2732f0d406..f7b286e92853 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/trace.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/trace.h
> @@ -1,21 +1,52 @@
> /* SPDX-License-Identifier: GPL-2.0-only */
> #ifndef __ARM64_KVM_HYP_NVHE_TRACE_H
> #define __ARM64_KVM_HYP_NVHE_TRACE_H
> +
> +#include <linux/trace_remote_event.h>
> +
> #include <asm/kvm_hyptrace.h>
>
> +#define HE_PROTO(__args...) __args
> +
> #ifdef CONFIG_PKVM_TRACING
> void *tracing_reserve_entry(unsigned long length);
> void tracing_commit_entry(void);
>
> +#define HE_ASSIGN(__args...) __args
> +#define HE_STRUCT RE_STRUCT
> +#define he_field re_field
> +
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
> + REMOTE_EVENT_FORMAT(__name, __struct); \
> + extern atomic_t __name##_enabled; \
> + extern struct hyp_event_id hyp_event_id_##__name; \
> + static __always_inline void trace_##__name(__proto) \
> + { \
> + struct remote_event_format_##__name *__entry; \
> + size_t length = sizeof(*__entry); \
> + \
> + if (!atomic_read(&__name##_enabled)) \
> + return; \
> + __entry = tracing_reserve_entry(length); \
> + if (!__entry) \
> + return; \
> + __entry->hdr.id = hyp_event_id_##__name.id; \
> + __assign \
> + tracing_commit_entry(); \
> + }
> +
> void __pkvm_update_clock_tracing(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
> int __pkvm_load_tracing(unsigned long desc_va, size_t desc_size);
> void __pkvm_unload_tracing(void);
> int __pkvm_enable_tracing(bool enable);
> int __pkvm_reset_tracing(unsigned int cpu);
> int __pkvm_swap_reader_tracing(unsigned int cpu);
> +int __pkvm_enable_event(unsigned short id, bool enable);
> #else
> static inline void *tracing_reserve_entry(unsigned long length) { return NULL; }
> static inline void tracing_commit_entry(void) { }
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
> + static inline void trace_##__name(__proto) {}
>
> static inline
> void __pkvm_update_clock_tracing(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
> @@ -24,5 +55,6 @@ static inline void __pkvm_unload_tracing(void) { }
> static inline int __pkvm_enable_tracing(bool enable) { return -ENODEV; }
> static inline int __pkvm_reset_tracing(unsigned int cpu) { return -ENODEV; }
> static inline int __pkvm_swap_reader_tracing(unsigned int cpu) { return -ENODEV; }
> +static inline int __pkvm_enable_event(unsigned short id, bool enable) { return -ENODEV; }
> #endif
> #endif
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index 504c3b9caef8..b77959e963f3 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -29,7 +29,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
> hyp-obj-y += ../../../kernel/smccc-call.o
> hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> -hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o trace.o ../../../../../kernel/trace/simple_ring_buffer.o
> +hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o trace.o ../../../../../kernel/trace/simple_ring_buffer.o events.o
> hyp-obj-y += $(lib-objs)
>
> ##
> diff --git a/arch/arm64/kvm/hyp/nvhe/events.c b/arch/arm64/kvm/hyp/nvhe/events.c
> new file mode 100644
> index 000000000000..5905b42cb0d0
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/events.c
> @@ -0,0 +1,36 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 Google LLC
> + * Author: Vincent Donnefort <vdonnefort@...gle.com>
> + */
> +
> +#include <nvhe/mm.h>
> +#include <nvhe/trace.h>
> +
> +#include <nvhe/define_events.h>
> +
> +extern struct hyp_event_id __hyp_event_ids_start[];
> +extern struct hyp_event_id __hyp_event_ids_end[];
Isn't that already declared in an include file?
> +
> +int __pkvm_enable_event(unsigned short id, bool enable)
> +{
> + struct hyp_event_id *event_id = __hyp_event_ids_start;
> + atomic_t *enable_key;
> +
> + for (; (unsigned long)event_id < (unsigned long)__hyp_event_ids_end;
> + event_id++) {
> + if (event_id->id != id)
> + continue;
> +
> + enable_key = (atomic_t *)event_id->data;
> + enable_key = hyp_fixmap_map(__hyp_pa(enable_key));
> +
> + atomic_set(enable_key, enable);
> +
> + hyp_fixmap_unmap();
> +
> + return 0;
> + }
> +
> + return -EINVAL;
> +}
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index 8adad701fc76..5e4b519e5204 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -634,6 +634,14 @@ static void handle___pkvm_swap_reader_tracing(struct kvm_cpu_context *host_ctxt)
> cpu_reg(host_ctxt, 1) = __pkvm_swap_reader_tracing(cpu);
> }
>
> +static void handle___pkvm_enable_event(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(unsigned short, id, host_ctxt, 1);
> + DECLARE_REG(bool, enable, host_ctxt, 2);
> +
> + cpu_reg(host_ctxt, 1) = __pkvm_enable_event(id, enable);
> +}
> +
> typedef void (*hcall_t)(struct kvm_cpu_context *);
>
> #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
> @@ -681,6 +689,7 @@ static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__pkvm_enable_tracing),
> HANDLE_FUNC(__pkvm_reset_tracing),
> HANDLE_FUNC(__pkvm_swap_reader_tracing),
> + HANDLE_FUNC(__pkvm_enable_event),
> };
>
> static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> index d724f6d69302..a68411bf4bef 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> @@ -16,6 +16,12 @@ SECTIONS {
> HYP_SECTION(.text)
> HYP_SECTION(.data..ro_after_init)
> HYP_SECTION(.rodata)
> +#ifdef CONFIG_PKVM_TRACING
> + . = ALIGN(PAGE_SIZE);
> + BEGIN_HYP_SECTION(.event_ids)
> + *(SORT(.hyp.event_ids.*))
> + END_HYP_SECTION
> +#endif
>
> /*
> * .hyp..data..percpu needs to be page aligned to maintain the same
> diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
> index 1062b4310f8c..73539f5b5e42 100644
> --- a/arch/arm64/kvm/hyp_trace.c
> +++ b/arch/arm64/kvm/hyp_trace.c
> @@ -307,7 +307,7 @@ static int hyp_trace_reset(unsigned int cpu, void *priv)
>
> static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
> {
> - return 0;
> + return kvm_call_hyp_nvhe(__pkvm_enable_event, id, enable);
> }
>
> static int hyp_trace_clock_show(struct seq_file *m, void *v)
> @@ -334,10 +334,27 @@ static struct trace_remote_callbacks trace_remote_callbacks = {
> .enable_event = hyp_trace_enable_event,
> };
>
> +#include <asm/kvm_define_hypevents.h>
> +
> +static void hyp_trace_init_events(void)
> +{
> + struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
> + struct remote_event *event = __hyp_events_start;
> + int id = 0;
> +
> + /* Events on both sides hypervisor are sorted */
> + for (; (unsigned long)event < (unsigned long)__hyp_events_end;
It feels very bizarre that you have to cast anything here. Aren't the
two variables of the same type, part of the same array, and shouldn't
pointer arithmetic apply?
> + event++, hyp_event_id++, id++)
> + event->id = hyp_event_id->id = id;
> +}
> +
> int hyp_trace_init(void)
> {
> if (!is_protected_kvm_enabled())
> return 0;
>
> - return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, NULL, 0);
> + hyp_trace_init_events();
> +
> + return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
> + __hyp_events_start, __hyp_events_end - __hyp_events_start);
> }
> diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
> index 4f2b67d1bfec..e54cc3e75dc5 100644
> --- a/kernel/trace/trace_remote.c
> +++ b/kernel/trace/trace_remote.c
> @@ -1040,7 +1040,7 @@ static int remote_event_format_show(struct seq_file *s, void *unused)
> while (field->name) {
> seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
> field->type, field->name, offset, field->size,
> - !field->is_signed);
> + field->is_signed);
> offset += field->size;
> field++;
> }
> @@ -1071,7 +1071,7 @@ static int remote_event_callback(const char *name, umode_t *mode, void **data,
>
> if (!strcmp(name, "format")) {
> *mode = TRACEFS_MODE_READ;
> - *fops = &remote_event_id_fops;
> + *fops = &remote_event_format_fops;
> return 1;
> }
>
Thanks,
M.
--
Jazz isn't dead. It just smells funny.
Powered by blists - more mailing lists