linux-kernel - Re: [PATCH v2 2/2] KVM: selftests: Test TPR / CR8 sync and interrupt masking

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ck4k7g7pd77ojfptkp4yg6qz66queg2n6eo4o54ezhdbv4rvgn@mpuss5twpxhi>
Date: Wed, 10 Sep 2025 19:03:38 +0530
From: Naveen N Rao <naveen@...nel.org>
To: "Maciej S. Szmigiero" <mail@...iej.szmigiero.name>
Cc: Paolo Bonzini <pbonzini@...hat.com>, 
	Sean Christopherson <seanjc@...gle.com>, Maxim Levitsky <mlevitsk@...hat.com>, 
	Suravee Suthikulpanit <Suravee.Suthikulpanit@....com>, Alejandro Jimenez <alejandro.j.jimenez@...cle.com>, 
	kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/2] KVM: selftests: Test TPR / CR8 sync and interrupt
 masking

On Mon, Aug 25, 2025 at 06:44:29PM +0200, Maciej S. Szmigiero wrote:
> From: "Maciej S. Szmigiero" <maciej.szmigiero@...cle.com>
> 
> Add a few extra TPR / CR8 tests to x86's xapic_state_test to see if:
> * TPR is 0 on reset,
> * TPR, PPR and CR8 are equal inside the guest,
> * TPR and CR8 read equal by the host after a VMExit
> * TPR borderline values set by the host correctly mask interrupts in the
> guest.
> 
> These hopefully will catch the most obvious cases of improper TPR sync or
> interrupt masking.
> 
> Do these tests both in x2APIC and xAPIC modes.
> The x2APIC mode uses SELF_IPI register to trigger interrupts to give it a
> bit of exercise too.
> 
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@...cle.com>
> ---
>  .../testing/selftests/kvm/include/x86/apic.h  |   5 +
>  .../selftests/kvm/x86/xapic_state_test.c      | 265 +++++++++++++++++-
>  2 files changed, 267 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
> index 80fe9f69b38d..67285e533e14 100644
> --- a/tools/testing/selftests/kvm/include/x86/apic.h
> +++ b/tools/testing/selftests/kvm/include/x86/apic.h
> @@ -27,7 +27,11 @@
>  #define	APIC_LVR	0x30
>  #define		GET_APIC_ID_FIELD(x)	(((x) >> 24) & 0xFF)
>  #define	APIC_TASKPRI	0x80
> +#define		APIC_TASKPRI_TP_SHIFT	4
> +#define		APIC_TASKPRI_TP_MASK	GENMASK(7, 4)
>  #define	APIC_PROCPRI	0xA0
> +#define		APIC_PROCPRI_PP_SHIFT	4
> +#define		APIC_PROCPRI_PP_MASK	GENMASK(7, 4)

These can probably be simplified with get()/set() macros. Something like 
this:
#define	GET_APIC_PRI(x)		(((x) >> 4) & 0xF)
#define	SET_APIC_PRI(x, y)	(((x) & ~0xF0) | (((y) & 0xF) << 4))

>  #define	APIC_EOI	0xB0
>  #define	APIC_SPIV	0xF0
>  #define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
> @@ -67,6 +71,7 @@
>  #define	APIC_TMICT	0x380
>  #define	APIC_TMCCT	0x390
>  #define	APIC_TDCR	0x3E0
> +#define	APIC_SELF_IPI	0x3F0
>  
>  void apic_disable(void);
>  void xapic_enable(void);
> diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
> index fdebff1165c7..968e5e539a1a 100644
> --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
> +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
> @@ -1,9 +1,11 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  #include <fcntl.h>
> +#include <stdatomic.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sys/ioctl.h>
> +#include <unistd.h>
>  
>  #include "apic.h"
>  #include "kvm_util.h"
> @@ -16,6 +18,245 @@ struct xapic_vcpu {
>  	bool has_xavic_errata;
>  };
>  
> +#define IRQ_VECTOR 0x20
> +
> +/* See also the comment at similar assertion in memslot_perf_test.c */
> +static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
> +
> +static atomic_uint tpr_guest_irq_sync_val;
> +
> +static void tpr_guest_irq_sync_flag_reset(void)
> +{
> +	atomic_store_explicit(&tpr_guest_irq_sync_val, 0,
> +			      memory_order_release);
> +}
> +
> +static unsigned int tpr_guest_irq_sync_val_get(void)
> +{
> +	return atomic_load_explicit(&tpr_guest_irq_sync_val,
> +				    memory_order_acquire);
> +}
> +
> +static void tpr_guest_irq_sync_val_inc(void)
> +{
> +	atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1,
> +				  memory_order_acq_rel);
> +}
> +
> +static void tpr_guest_irq_handler_xapic(struct ex_regs *regs)
> +{
> +	tpr_guest_irq_sync_val_inc();
> +
> +	xapic_write_reg(APIC_EOI, 0);
> +}
> +
> +static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs)
> +{
> +	tpr_guest_irq_sync_val_inc();
> +
> +	x2apic_write_reg(APIC_EOI, 0);
> +}
> +
> +static void tpr_guest_irq_queue(bool x2apic)
> +{
> +	if (x2apic) {
> +		x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR);
> +	} else {
> +		uint32_t icr, icr2;
> +
> +		icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED |
> +			IRQ_VECTOR;
> +		icr2 = 0;
> +
> +		xapic_write_reg(APIC_ICR2, icr2);
> +		xapic_write_reg(APIC_ICR, icr);
> +	}
> +}
> +
> +static uint8_t tpr_guest_tpr_get(bool x2apic)
> +{
> +	uint32_t taskpri;
> +
> +	if (x2apic)
> +		taskpri = x2apic_read_reg(APIC_TASKPRI);
> +	else
> +		taskpri = xapic_read_reg(APIC_TASKPRI);

Rather than pass x2apic flag to all these helpers, it might be better to 
have a global is_x2apic, and helpers for reading APIC registers.  See 
tools/testing/selftests/kvm/x86/apic_bus_clock_test.c for an example 
that we should be able to adopt here.

> +
> +	return (taskpri & APIC_TASKPRI_TP_MASK) >> APIC_TASKPRI_TP_SHIFT;
> +}
> +
> +static uint8_t tpr_guest_ppr_get(bool x2apic)
> +{
> +	uint32_t procpri;
> +
> +	if (x2apic)
> +		procpri = x2apic_read_reg(APIC_PROCPRI);
> +	else
> +		procpri = xapic_read_reg(APIC_PROCPRI);
> +
> +	return (procpri & APIC_PROCPRI_PP_MASK) >> APIC_PROCPRI_PP_SHIFT;
> +}
> +
> +static uint8_t tpr_guest_cr8_get(void)
> +{
> +	uint64_t cr8;
> +
> +	asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8));
> +
> +	return cr8 & GENMASK(3, 0);

Why mask off the remaining bits? Shouldn't they all be zero?

> +}
> +
> +static void tpr_guest_check_tpr_ppr_cr8_equal(bool x2apic)
> +{
> +	uint8_t tpr;
> +
> +	tpr = tpr_guest_tpr_get(x2apic);
> +
> +	GUEST_ASSERT_EQ(tpr_guest_ppr_get(x2apic), tpr);
> +	GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr);
> +}
> +
> +static void tpr_guest_code(uint64_t x2apic)
> +{
> +	cli();
> +
> +	if (x2apic)
> +		x2apic_enable();
> +	else
> +		xapic_enable();
> +
> +	tpr_guest_check_tpr_ppr_cr8_equal(x2apic);

Would be good to confirm that the guest reads a zero TPR here on 
startup.

> +
> +	tpr_guest_irq_queue(x2apic);
> +
> +	/* TPR = 0 but IRQ masked by IF=0, should not fire */
> +	udelay(1000);
> +	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0);
> +
> +	sti();
> +
> +	/* IF=1 now, IRQ should fire */
> +	while (tpr_guest_irq_sync_val_get() == 0)
> +		cpu_relax();
> +	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
> +
> +	GUEST_SYNC(0);
> +	tpr_guest_check_tpr_ppr_cr8_equal(x2apic);
> +
> +	tpr_guest_irq_queue(x2apic);
> +
> +	/* IRQ masked by barely high enough TPR now, should not fire */
> +	udelay(1000);
> +	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
> +
> +	GUEST_SYNC(1);
> +	tpr_guest_check_tpr_ppr_cr8_equal(x2apic);
> +
> +	/* TPR barely low enough now to unmask IRQ, should fire */
> +	while (tpr_guest_irq_sync_val_get() == 1)
> +		cpu_relax();
> +	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2);
> +
> +	GUEST_DONE();
> +}

You don't necessarily have to do it, but it would be good to have a test 
where the guest updates the TPR too -- as a way to confirm that V_TPR is 
kept in sync with CR8 and TPR.

> +
> +static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic)
> +{
> +	return (*((u32 *)&xapic->regs[APIC_TASKPRI]) & APIC_TASKPRI_TP_MASK) >>
> +		APIC_TASKPRI_TP_SHIFT;
> +}
> +
> +static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val)
> +{
> +	*((u32 *)&xapic->regs[APIC_TASKPRI]) &= ~APIC_TASKPRI_TP_MASK;
> +	*((u32 *)&xapic->regs[APIC_TASKPRI]) |= val << APIC_TASKPRI_TP_SHIFT;
> +}
> +
> +static uint8_t sregs_tpr(struct kvm_sregs *sregs)
> +{
> +	return sregs->cr8 & GENMASK(3, 0);

Here too.. do we need to mask the reserved bits?

> +}
> +
> +static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic_state xapic;
> +
> +	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
> +
> +	TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0);
> +}
> +
> +static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_sregs sregs;
> +	struct kvm_lapic_state xapic;
> +
> +	vcpu_sregs_get(vcpu, &sregs);
> +	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
> +
> +	TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic));
> +}
> +
> +static void test_tpr_mask_irq(struct kvm_vcpu *vcpu, bool mask)
> +{
> +	struct kvm_lapic_state xapic;
> +	uint8_t tpr;
> +
> +	static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number");
> +	tpr = IRQ_VECTOR / 16;
> +	if (!mask)
> +		tpr--;
> +
> +	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
> +	lapic_tpr_set(&xapic, tpr);
> +	vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
> +}
> +
> +static void test_tpr(struct kvm_vcpu *vcpu, bool x2apic)
> +{
> +	bool run_guest = true;
> +
> +	vcpu_args_set(vcpu, 1, (uint64_t)x2apic);
> +
> +	/* According to the SDM/APM the TPR value at reset is 0 */
> +	test_tpr_check_tpr_zero(vcpu);
> +	test_tpr_check_tpr_cr8_equal(vcpu);
> +
> +	tpr_guest_irq_sync_flag_reset();
> +
> +	while (run_guest) {
> +		struct ucall uc;
> +
> +		alarm(2);
> +		vcpu_run(vcpu);
> +		alarm(0);
> +
> +		switch (get_ucall(vcpu, &uc)) {
> +		case UCALL_ABORT:
> +			REPORT_GUEST_ASSERT(uc);
> +			break;
> +		case UCALL_DONE:
> +			test_tpr_check_tpr_cr8_equal(vcpu);
> +
> +			run_guest = false;
> +			break;
> +		case UCALL_SYNC:
> +			test_tpr_check_tpr_cr8_equal(vcpu);
> +
> +			if (uc.args[1] == 0)
> +				test_tpr_mask_irq(vcpu, true);
> +			else if (uc.args[1] == 1)
> +				test_tpr_mask_irq(vcpu, false);

Having wrappers around that will make this clearer, I think:
	test_tpr_set_tpr()
	test_tpr_clear_tpr()
or such?

> +			else
> +				TEST_FAIL("Unknown SYNC %lu", uc.args[1]);
> +			break;
> +		default:
> +			TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd);
> +			break;
> +		}
> +	}
> +}
> +
>  static void xapic_guest_code(void)
>  {
>  	cli();
> @@ -195,6 +436,12 @@ static void test_apic_id(void)
>  	kvm_vm_free(vm);
>  }
>  
> +static void clear_x2apic_cap_map_apic(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> +	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC);
> +	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
> +}
> +
>  static void test_x2apic_id(void)
>  {
>  	struct kvm_lapic_state lapic = {};
> @@ -230,10 +477,17 @@ int main(int argc, char *argv[])
>  	};
>  	struct kvm_vm *vm;
>  
> +	/* x2APIC tests */
> +
>  	vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
>  	test_icr(&x);
>  	kvm_vm_free(vm);
>  
> +	vm = vm_create_with_one_vcpu(&x.vcpu, tpr_guest_code);
> +	vm_install_exception_handler(vm, IRQ_VECTOR, tpr_guest_irq_handler_x2apic);
> +	test_tpr(x.vcpu, true);

Any reason not to pass in a pointer to x similar to test_icr()?


- Naveen