linux-kernel - Re: [PATCH v1 2/4] KVM: x86: Introduce MSR read/write emulation helpers

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aIzROnILlYuaE2FB@google.com>
Date: Fri, 1 Aug 2025 07:37:46 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: "Xin Li (Intel)" <xin@...or.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org, pbonzini@...hat.com, 
	tglx@...utronix.de, mingo@...hat.com, bp@...en8.de, 
	dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com, 
	chao.gao@...el.com
Subject: Re: [PATCH v1 2/4] KVM: x86: Introduce MSR read/write emulation helpers

On Wed, Jul 30, 2025, Xin Li (Intel) wrote:
> Add helper functions to centralize guest MSR read and write emulation.
> This change consolidates the MSR emulation logic and makes it easier
> to extend support for new MSR-related VM exit reasons introduced with
> the immediate form of MSR instructions.
> 
> Signed-off-by: Xin Li (Intel) <xin@...or.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/x86.c              | 67 +++++++++++++++++++++++----------
>  2 files changed, 49 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f19a76d3ca0e..a854d9a166fe 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -201,6 +201,7 @@ enum kvm_reg {
>  	VCPU_EXREG_SEGMENTS,
>  	VCPU_EXREG_EXIT_INFO_1,
>  	VCPU_EXREG_EXIT_INFO_2,
> +	VCPU_EXREG_EDX_EAX,

I really, really don't want to add a "reg" for this.  It's not an actual register,
and bleeds details of one specific flow throughout KVM.

The only path where KVM _needs_ to differentiate between the "legacy" instructions
and the immediate variants instruction is in the inner RDMSR helper.

For the WRMSR helper, KVM can and should simply pass in @data, not pass in a reg
and then have the helper do an if-else on the reg:

  int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
  {
  	return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu),
  				   kvm_read_edx_eax(vcpu));
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
  
  int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
  {
  	return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm);

And for the RDMSR userspace completion, KVM is already eating an indirect function
call, so the wrappers can simply pass in the appropriate completion helper.  It
does mean having to duplicate the vcpu->run->msr.error check, but we'd have to
duplicate the "r == VCPU_EXREG_EDX_EAX" by sharing a callback, *and* we'd also
need to be very careful about setting the effective register in the other existing
flows that utilize complete_fast_rdmsr.

Then to communicate that the legacy form with implicit destination operands is
being emulated, pass -1 for the register.  It's not the prettiest, but I do like
using "reg invalid" to communicate that the destination is implicit.

  static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg,
  			       int (*complete_rdmsr)(struct kvm_vcpu *))
  {
  	u64 data;
  	int r;
  
  	r = kvm_get_msr_with_filter(vcpu, msr, &data);
  	if (!r) {
  		trace_kvm_msr_read(msr, data);
  
  		if (reg < 0) {
  			kvm_rax_write(vcpu, data & -1u);
  			kvm_rdx_write(vcpu, (data >> 32) & -1u);
  		} else {
  			kvm_register_write(vcpu, reg, data);
  		}
  	} else {
  		/* MSR read failed? See if we should ask user space */
  		if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
  				       complete_rdmsr, r))
  			return 0;
  		trace_kvm_msr_read_ex(msr);
  	}
  
  	return kvm_x86_call(complete_emulated_msr)(vcpu, r);
  }
  
  int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
  {
  	return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1,
  				   complete_fast_rdmsr);
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
  
  int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
  {
  	vcpu->arch.cui_rdmsr_imm_reg = reg;
  
  	return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm);
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm);

>  };
>  
>  enum {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a1c49bc681c4..5086c3b30345 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2024,54 +2024,71 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
>  	return 1;
>  }
>  
> -int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
> +static int kvm_emulate_get_msr(struct kvm_vcpu *vcpu, u32 msr, int reg)

Please keep "rdmsr" and "wrmsr" when dealing emulation of those instructions to
help differentiate from the many other MSR get/set paths.  (ignore the actual
emulator hooks; that code is crusty, but not worth the churn to clean up).

> @@ -2163,9 +2180,8 @@ static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
>  	return 0;
>  }
>  
> -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
> +static fastpath_t handle_set_msr_irqoff(struct kvm_vcpu *vcpu, u32 msr, int reg)

I think it makes sense to (a) add the x86.c code and the vmx.c code in the same
patch, and then (b) add fastpath support in a separate patch to make the initial
(combined x86.c + vmx.c) patch easier to review.  Adding the x86.c plumbing/logic
before the VMX support makes the x86.c change difficult to review, as there are
no users of the new paths, and the VMX changes are quite tiny.  Ignoring the arch
boilerplate, the VMX changes barely add anything relative to the x86.c changes.

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ae2c8c10e5d2..757e4bb89f36 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6003,6 +6003,23 @@ static int handle_notify(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu)
+{
+       return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO))
+}
+
+static int handle_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+       return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+                                    vmx_get_msr_imm_reg(vcpu));
+}
+
+static int handle_wrmsr_imm(struct kvm_vcpu *vcpu)
+{
+       return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+                                    vmx_get_msr_imm_reg(vcpu));
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6061,6 +6078,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_ENCLS]                   = handle_encls,
        [EXIT_REASON_BUS_LOCK]                = handle_bus_lock_vmexit,
        [EXIT_REASON_NOTIFY]                  = handle_notify,
+       [EXIT_REASON_MSR_READ_IMM]            = handle_rdmsr_imm,
+       [EXIT_REASON_MSR_WRITE_IMM]           = handle_wrmsr_imm,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6495,6 +6514,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 #ifdef CONFIG_MITIGATION_RETPOLINE
        if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
                return kvm_emulate_wrmsr(vcpu);
+       else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+               return handle_wrmsr_imm(vcpu);
        else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
                return handle_preemption_timer(vcpu);
        else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)