lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <874iqbfj1n.wl-maz@kernel.org>
Date: Sun, 30 Nov 2025 18:54:12 +0000
From: Marc Zyngier <maz@...nel.org>
To: Vincent Donnefort <vdonnefort@...gle.com>
Cc: rostedt@...dmis.org,
	mhiramat@...nel.org,
	mathieu.desnoyers@...icios.com,
	linux-trace-kernel@...r.kernel.org,
	oliver.upton@...ux.dev,
	joey.gouly@....com,
	suzuki.poulose@....com,
	yuzenghui@...wei.com,
	kvmarm@...ts.linux.dev,
	linux-arm-kernel@...ts.infradead.org,
	jstultz@...gle.com,
	qperret@...gle.com,
	will@...nel.org,
	aneesh.kumar@...nel.org,
	kernel-team@...roid.com,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 25/28] KVM: arm64: Add event support to the pKVM hyp and trace remote

On Fri, 07 Nov 2025 09:38:37 +0000,
Vincent Donnefort <vdonnefort@...gle.com> wrote:
> 
> Allow the creation of hypervisor and trace remote events with a single
> macro HYP_EVENT(). That macro expands in the kernel side to add all
> the required declarations (based on REMOTE_EVENT()) as well as in the
> hypervisor side to create the trace_<event>() function.
> 
> Signed-off-by: Vincent Donnefort <vdonnefort@...gle.com>
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 4faabf398881..f7b29eae7010 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -95,6 +95,7 @@ enum __kvm_host_smccc_func {
>  	__KVM_HOST_SMCCC_FUNC___pkvm_enable_tracing,
>  	__KVM_HOST_SMCCC_FUNC___pkvm_reset_tracing,
>  	__KVM_HOST_SMCCC_FUNC___pkvm_swap_reader_tracing,
> +	__KVM_HOST_SMCCC_FUNC___pkvm_enable_event,

nit: add 'tracing' to the name of the function, like its little
friends. Saves us from wondering whether this is about PMU events or
not...

>  };
>  
>  #define DECLARE_KVM_VHE_SYM(sym)	extern char sym[]
> diff --git a/arch/arm64/include/asm/kvm_define_hypevents.h b/arch/arm64/include/asm/kvm_define_hypevents.h
> new file mode 100644
> index 000000000000..0ef5a9eefcbe
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_define_hypevents.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef HYP_EVENT_FILE
> +# undef __ARM64_KVM_HYPEVENTS_H_
> +# define REMOTE_EVENT_INCLUDE_FILE arch/arm64/include/asm/kvm_hypevents.h
> +#else
> +# define REMOTE_EVENT_INCLUDE_FILE HYP_EVENT_FILE
> +#endif

I'm feeling a bit sick here. Can you please document here how the
whole repainting trickery works, how the event equivalence works, and
what the whole thing depends on? I *really* don't want to have to
reverse engineer this stuff when it will break.

> +
> +#define REMOTE_EVENT_SECTION "_hyp_events"
> +
> +#define HE_STRUCT(__args)		__args
> +#define HE_PRINTK(__args...)		__args
> +#define he_field			re_field
> +
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
> +	REMOTE_EVENT(__name, 0, RE_STRUCT(__struct), RE_PRINTK(__printk))
> +
> +#define HYP_EVENT_MULTI_READ
> +
> +#include <trace/define_remote_events.h>
> diff --git a/arch/arm64/include/asm/kvm_hypevents.h b/arch/arm64/include/asm/kvm_hypevents.h
> new file mode 100644
> index 000000000000..d6e033c96c52
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_hypevents.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#if !defined(__ARM64_KVM_HYPEVENTS_H_) || defined(HYP_EVENT_MULTI_READ)
> +#define __ARM64_KVM_HYPEVENTS_H_
> +
> +#ifdef __KVM_NVHE_HYPERVISOR__
> +#include <nvhe/trace.h>
> +#endif
> +
> +#endif
> diff --git a/arch/arm64/include/asm/kvm_hyptrace.h b/arch/arm64/include/asm/kvm_hyptrace.h
> index 9c30a479bc36..d6e0953a07d6 100644
> --- a/arch/arm64/include/asm/kvm_hyptrace.h
> +++ b/arch/arm64/include/asm/kvm_hyptrace.h
> @@ -10,4 +10,17 @@ struct hyp_trace_desc {
>  	struct trace_buffer_desc	trace_buffer_desc;
>  
>  };
> +
> +struct hyp_event_id {
> +	unsigned short	id;
> +	void		*data;
> +};
> +
> +extern struct remote_event __hyp_events_start[];
> +extern struct remote_event __hyp_events_end[];
> +
> +/* hyp_event section used by the hypervisor */
> +extern struct hyp_event_id __hyp_event_ids_start[];
> +extern struct hyp_event_id __hyp_event_ids_end[];
> +
>  #endif
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 5369763606e7..c0efa9aa541b 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -137,6 +137,10 @@ KVM_NVHE_ALIAS(__hyp_data_start);
>  KVM_NVHE_ALIAS(__hyp_data_end);
>  KVM_NVHE_ALIAS(__hyp_rodata_start);
>  KVM_NVHE_ALIAS(__hyp_rodata_end);
> +#ifdef CONFIG_PKVM_TRACING
> +KVM_NVHE_ALIAS(__hyp_event_ids_start);
> +KVM_NVHE_ALIAS(__hyp_event_ids_end);
> +#endif
>  
>  /* pKVM static key */
>  KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index ad6133b89e7a..0e201a3c8de5 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -13,12 +13,23 @@
>  	*(__kvm_ex_table)					\
>  	__stop___kvm_ex_table = .;
>  
> +#ifdef CONFIG_PKVM_TRACING
> +#define HYPERVISOR_EVENT_IDS 					\
> +	. = ALIGN(PAGE_SIZE);					\
> +	__hyp_event_ids_start = .;				\
> +	*(HYP_SECTION_NAME(.event_ids))				\
> +	__hyp_event_ids_end = .;
> +#else
> +#define HYPERVISOR_EVENT_IDS
> +#endif
> +
>  #define HYPERVISOR_RODATA_SECTIONS				\
>  	HYP_SECTION_NAME(.rodata) : {				\
>  		. = ALIGN(PAGE_SIZE);				\
>  		__hyp_rodata_start = .;				\
>  		*(HYP_SECTION_NAME(.data..ro_after_init))	\
>  		*(HYP_SECTION_NAME(.rodata))			\
> +		HYPERVISOR_EVENT_IDS				\
>  		. = ALIGN(PAGE_SIZE);				\
>  		__hyp_rodata_end = .;				\
>  	}
> @@ -307,6 +318,13 @@ SECTIONS
>  
>  	HYPERVISOR_DATA_SECTION
>  
> +#ifdef CONFIG_PKVM_TRACING
> +	.data.hyp_events : {
> +		__hyp_events_start = .;
> +		*(SORT(_hyp_events.*))
> +		__hyp_events_end = .;
> +	}
> +#endif
>  	/*
>  	 * Data written with the MMU off but read with the MMU on requires
>  	 * cache lines to be invalidated, discarding up to a Cache Writeback
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/define_events.h b/arch/arm64/kvm/hyp/include/nvhe/define_events.h
> new file mode 100644
> index 000000000000..2298b49cb355
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/include/nvhe/define_events.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef HYP_EVENT_FILE
> +# define __HYP_EVENT_FILE <asm/kvm_hypevents.h>
> +#else
> +# define __HYP_EVENT_FILE __stringify(HYP_EVENT_FILE)
> +#endif
> +
> +#undef HYP_EVENT
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk)	\
> +	atomic_t __ro_after_init __name##_enabled = ATOMIC_INIT(0);	\
> +	struct hyp_event_id hyp_event_id_##__name			\
> +	__section(".hyp.event_ids."#__name) = {				\
> +		.data = (void *)&__name##_enabled,			\
> +	}
> +
> +#define HYP_EVENT_MULTI_READ
> +#include __HYP_EVENT_FILE
> +#undef HYP_EVENT_MULTI_READ
> +
> +#undef HYP_EVENT
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/trace.h b/arch/arm64/kvm/hyp/include/nvhe/trace.h
> index 0d2732f0d406..f7b286e92853 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/trace.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/trace.h
> @@ -1,21 +1,52 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
>  #ifndef __ARM64_KVM_HYP_NVHE_TRACE_H
>  #define __ARM64_KVM_HYP_NVHE_TRACE_H
> +
> +#include <linux/trace_remote_event.h>
> +
>  #include <asm/kvm_hyptrace.h>
>  
> +#define HE_PROTO(__args...)	__args
> +
>  #ifdef CONFIG_PKVM_TRACING
>  void *tracing_reserve_entry(unsigned long length);
>  void tracing_commit_entry(void);
>  
> +#define HE_ASSIGN(__args...)	__args
> +#define HE_STRUCT		RE_STRUCT
> +#define he_field		re_field
> +
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk)		\
> +	REMOTE_EVENT_FORMAT(__name, __struct);					\
> +	extern atomic_t __name##_enabled;					\
> +	extern struct hyp_event_id hyp_event_id_##__name;			\
> +	static __always_inline void trace_##__name(__proto)			\
> +	{									\
> +		struct remote_event_format_##__name *__entry;			\
> +		size_t length = sizeof(*__entry);				\
> +										\
> +		if (!atomic_read(&__name##_enabled))				\
> +			return;							\
> +		__entry = tracing_reserve_entry(length);			\
> +		if (!__entry)							\
> +			return;							\
> +		__entry->hdr.id = hyp_event_id_##__name.id;			\
> +		__assign							\
> +		tracing_commit_entry();						\
> +	}
> +
>  void __pkvm_update_clock_tracing(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
>  int __pkvm_load_tracing(unsigned long desc_va, size_t desc_size);
>  void __pkvm_unload_tracing(void);
>  int __pkvm_enable_tracing(bool enable);
>  int __pkvm_reset_tracing(unsigned int cpu);
>  int __pkvm_swap_reader_tracing(unsigned int cpu);
> +int __pkvm_enable_event(unsigned short id, bool enable);
>  #else
>  static inline void *tracing_reserve_entry(unsigned long length) { return NULL; }
>  static inline void tracing_commit_entry(void) { }
> +#define HYP_EVENT(__name, __proto, __struct, __assign, __printk)      \
> +	static inline void trace_##__name(__proto) {}
>  
>  static inline
>  void __pkvm_update_clock_tracing(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
> @@ -24,5 +55,6 @@ static inline void __pkvm_unload_tracing(void) { }
>  static inline int __pkvm_enable_tracing(bool enable) { return -ENODEV; }
>  static inline int __pkvm_reset_tracing(unsigned int cpu) { return -ENODEV; }
>  static inline int __pkvm_swap_reader_tracing(unsigned int cpu) { return -ENODEV; }
> +static inline int __pkvm_enable_event(unsigned short id, bool enable)  { return -ENODEV; }
>  #endif
>  #endif
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index 504c3b9caef8..b77959e963f3 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -29,7 +29,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
>  	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
>  hyp-obj-y += ../../../kernel/smccc-call.o
>  hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> -hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o trace.o ../../../../../kernel/trace/simple_ring_buffer.o
> +hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o trace.o ../../../../../kernel/trace/simple_ring_buffer.o events.o
>  hyp-obj-y += $(lib-objs)
>  
>  ##
> diff --git a/arch/arm64/kvm/hyp/nvhe/events.c b/arch/arm64/kvm/hyp/nvhe/events.c
> new file mode 100644
> index 000000000000..5905b42cb0d0
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/events.c
> @@ -0,0 +1,36 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 Google LLC
> + * Author: Vincent Donnefort <vdonnefort@...gle.com>
> + */
> +
> +#include <nvhe/mm.h>
> +#include <nvhe/trace.h>
> +
> +#include <nvhe/define_events.h>
> +
> +extern struct hyp_event_id __hyp_event_ids_start[];
> +extern struct hyp_event_id __hyp_event_ids_end[];

Isn't that already declared in an include file?

> +
> +int __pkvm_enable_event(unsigned short id, bool enable)
> +{
> +	struct hyp_event_id *event_id = __hyp_event_ids_start;
> +	atomic_t *enable_key;
> +
> +	for (; (unsigned long)event_id < (unsigned long)__hyp_event_ids_end;
> +	     event_id++) {
> +		if (event_id->id != id)
> +			continue;
> +
> +		enable_key = (atomic_t *)event_id->data;
> +		enable_key = hyp_fixmap_map(__hyp_pa(enable_key));
> +
> +		atomic_set(enable_key, enable);
> +
> +		hyp_fixmap_unmap();
> +
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index 8adad701fc76..5e4b519e5204 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -634,6 +634,14 @@ static void handle___pkvm_swap_reader_tracing(struct kvm_cpu_context *host_ctxt)
>  	cpu_reg(host_ctxt, 1) = __pkvm_swap_reader_tracing(cpu);
>  }
>  
> +static void handle___pkvm_enable_event(struct kvm_cpu_context *host_ctxt)
> +{
> +	DECLARE_REG(unsigned short, id, host_ctxt, 1);
> +	DECLARE_REG(bool, enable, host_ctxt, 2);
> +
> +	cpu_reg(host_ctxt, 1) = __pkvm_enable_event(id, enable);
> +}
> +
>  typedef void (*hcall_t)(struct kvm_cpu_context *);
>  
>  #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
> @@ -681,6 +689,7 @@ static const hcall_t host_hcall[] = {
>  	HANDLE_FUNC(__pkvm_enable_tracing),
>  	HANDLE_FUNC(__pkvm_reset_tracing),
>  	HANDLE_FUNC(__pkvm_swap_reader_tracing),
> +	HANDLE_FUNC(__pkvm_enable_event),
>  };
>  
>  static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> index d724f6d69302..a68411bf4bef 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> @@ -16,6 +16,12 @@ SECTIONS {
>  	HYP_SECTION(.text)
>  	HYP_SECTION(.data..ro_after_init)
>  	HYP_SECTION(.rodata)
> +#ifdef CONFIG_PKVM_TRACING
> +	. = ALIGN(PAGE_SIZE);
> +	BEGIN_HYP_SECTION(.event_ids)
> +		*(SORT(.hyp.event_ids.*))
> +	END_HYP_SECTION
> +#endif
>  
>  	/*
>  	 * .hyp..data..percpu needs to be page aligned to maintain the same
> diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
> index 1062b4310f8c..73539f5b5e42 100644
> --- a/arch/arm64/kvm/hyp_trace.c
> +++ b/arch/arm64/kvm/hyp_trace.c
> @@ -307,7 +307,7 @@ static int hyp_trace_reset(unsigned int cpu, void *priv)
>  
>  static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
>  {
> -	return 0;
> +	return kvm_call_hyp_nvhe(__pkvm_enable_event, id, enable);
>  }
>  
>  static int hyp_trace_clock_show(struct seq_file *m, void *v)
> @@ -334,10 +334,27 @@ static struct trace_remote_callbacks trace_remote_callbacks = {
>  	.enable_event		= hyp_trace_enable_event,
>  };
>  
> +#include <asm/kvm_define_hypevents.h>
> +
> +static void hyp_trace_init_events(void)
> +{
> +	struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
> +	struct remote_event *event = __hyp_events_start;
> +	int id = 0;
> +
> +	/* Events on both sides hypervisor are sorted */
> +	for (; (unsigned long)event < (unsigned long)__hyp_events_end;

It feels very bizarre that you have to cast anything here. Aren't the
two variables of the same type, part of the same array, and shouldn't
pointer arithmetic apply?

> +		event++, hyp_event_id++, id++)
> +		event->id = hyp_event_id->id = id;
> +}
> +
>  int hyp_trace_init(void)
>  {
>  	if (!is_protected_kvm_enabled())
>  		return 0;
>  
> -	return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, NULL, 0);
> +	hyp_trace_init_events();
> +
> +	return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
> +				     __hyp_events_start, __hyp_events_end - __hyp_events_start);
>  }
> diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
> index 4f2b67d1bfec..e54cc3e75dc5 100644
> --- a/kernel/trace/trace_remote.c
> +++ b/kernel/trace/trace_remote.c
> @@ -1040,7 +1040,7 @@ static int remote_event_format_show(struct seq_file *s, void *unused)
>  	while (field->name) {
>  		seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
>  			   field->type, field->name, offset, field->size,
> -			   !field->is_signed);
> +			   field->is_signed);
>  		offset += field->size;
>  		field++;
>  	}
> @@ -1071,7 +1071,7 @@ static int remote_event_callback(const char *name, umode_t *mode, void **data,
>  
>  	if (!strcmp(name, "format")) {
>  		*mode = TRACEFS_MODE_READ;
> -		*fops = &remote_event_id_fops;
> +		*fops = &remote_event_format_fops;
>  		return 1;
>  	}
>  

Thanks,

	M.

-- 
Jazz isn't dead. It just smells funny.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ