[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1a0325af-f264-47de-b9f7-da9721366c20@linux.intel.com>
Date: Wed, 23 Apr 2025 17:51:40 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Maxim Levitsky <mlevitsk@...hat.com>, kvm@...r.kernel.org
Cc: Thomas Gleixner <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>,
Paolo Bonzini <pbonzini@...hat.com>, x86@...nel.org,
Sean Christopherson <seanjc@...gle.com>,
Dave Hansen <dave.hansen@...ux.intel.com>, Ingo Molnar <mingo@...hat.com>,
linux-kernel@...r.kernel.org, "H. Peter Anvin" <hpa@...or.com>
Subject: Re: [PATCH 1/3] x86: KVM: VMX: Wrap GUEST_IA32_DEBUGCTL read/write
with access functions
The shortlog "x86: KVM: VMX: Wrap GUEST_IA32_DEBUGCTL read/write with
access functions" doesn't follow Sean's suggestion
(https://github.com/kvm-x86/linux/blob/next/Documentation/process/maintainer-kvm-x86.rst#shortlog).
Please modify. Thanks.
On 4/16/2025 8:25 AM, Maxim Levitsky wrote:
> Instead of reading and writing GUEST_IA32_DEBUGCTL vmcs field directly,
> wrap the logic with get/set functions.
>
> Also move the checks that the guest's supplied value is valid to the new
> 'set' function.
>
> In particular, the above change fixes a minor security issue in which L1
> hypervisor could set the GUEST_IA32_DEBUGCTL, and eventually the host's
> MSR_IA32_DEBUGCTL to any value by performing a VM entry to L2 with
> VM_ENTRY_LOAD_DEBUG_CONTROLS set.
>
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
> arch/x86/kvm/vmx/nested.c | 15 +++++++---
> arch/x86/kvm/vmx/pmu_intel.c | 9 +++---
> arch/x86/kvm/vmx/vmx.c | 58 +++++++++++++++++++++++-------------
> arch/x86/kvm/vmx/vmx.h | 3 ++
> 4 files changed, 57 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index e073e3008b16..b7686569ee09 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2641,6 +2641,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
> bool load_guest_pdptrs_vmcs12 = false;
> + u64 new_debugctl;
>
> if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx)) {
> prepare_vmcs02_rare(vmx, vmcs12);
> @@ -2653,11 +2654,17 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> if (vmx->nested.nested_run_pending &&
> (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> + new_debugctl = vmcs12->guest_ia32_debugctl;
> } else {
> kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
> + new_debugctl = vmx->nested.pre_vmenter_debugctl;
> }
> +
> + if (CC(!vmx_set_guest_debugctl(vcpu, new_debugctl, false))) {
> + *entry_failure_code = ENTRY_FAIL_DEFAULT;
> + return -EINVAL;
> + }
> +
> if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
> vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
> @@ -3520,7 +3527,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>
> if (!vmx->nested.nested_run_pending ||
> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
> - vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + vmx->nested.pre_vmenter_debugctl = vmx_get_guest_debugctl(vcpu);
> if (kvm_mpx_supported() &&
> (!vmx->nested.nested_run_pending ||
> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
> @@ -4788,7 +4795,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
>
> kvm_set_dr(vcpu, 7, 0x400);
> - vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> + vmx_set_guest_debugctl(vcpu, 0, false);
>
> if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
> vmcs12->vm_exit_msr_load_count))
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 8a94b52c5731..f6f448adfb80 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -19,6 +19,7 @@
> #include "lapic.h"
> #include "nested.h"
> #include "pmu.h"
> +#include "vmx.h"
> #include "tdx.h"
>
> /*
> @@ -652,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
> */
> static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
> {
> - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + u64 data = vmx_get_guest_debugctl(vcpu);
>
> if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> data &= ~DEBUGCTLMSR_LBR;
> - vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> + vmx_set_guest_debugctl(vcpu, data, true);
Two questions.
1. why to call vmx_set_guest_debugctl() to do the extra check? currently
IA32_DEBUGCTL MSR is always intercepted and it's already checked at
vmx_set_msr() and seems unnecessary to check here again.
2. why the argument "host_initiated" is true? It looks the data is not from
host.
> }
> }
>
> @@ -729,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
>
> if (!lbr_desc->event) {
> vmx_disable_lbr_msrs_passthrough(vcpu);
> - if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
> + if (vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR)
> goto warn;
> if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
> goto warn;
> @@ -751,7 +752,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
>
> static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
> {
> - if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
> + if (!(vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR))
> intel_pmu_release_guest_lbr_event(vcpu);
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ef2d7208dd20..4237422dc4ed 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2154,7 +2154,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> break;
> case MSR_IA32_DEBUGCTLMSR:
> - msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + msr_info->data = vmx_get_guest_debugctl(vcpu);
> break;
> default:
> find_uret_msr:
> @@ -2194,6 +2194,41 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
> return debugctl;
> }
>
> +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu)
> +{
> + return vmcs_read64(GUEST_IA32_DEBUGCTL);
> +}
> +
> +static void __vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data)
> +{
> + vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> +}
IMO, it seems unnecessary to add these 2 wrappers since the original code
is already intuitive enough and simple. But if you want, please add
"inline" before these 2 wrappers.
> +
> +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
Since most of code in this function checks guest debugctl, better to rename
it to "vmx_check_and_set_guest_debugctl".
> +{
> + u64 invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated);
> +
> + if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
> + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
> + data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> + invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
Add space around above 3 "|".
> + }
> +
> + if (invalid)
> + return false;
> +
> + if (is_guest_mode(vcpu) && (get_vmcs12(vcpu)->vm_exit_controls &
> + VM_EXIT_SAVE_DEBUG_CONTROLS))
> + get_vmcs12(vcpu)->guest_ia32_debugctl = data;
> +
> + if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
> + (data & DEBUGCTLMSR_LBR))
> + intel_pmu_create_guest_lbr_event(vcpu);
> +
> + __vmx_set_guest_debugctl(vcpu, data);
> + return true;
> +}
> +
> /*
> * Writes msr value into the appropriate "register".
> * Returns 0 on success, non-0 otherwise.
> @@ -2263,26 +2298,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> vmcs_writel(GUEST_SYSENTER_ESP, data);
> break;
> case MSR_IA32_DEBUGCTLMSR: {
> - u64 invalid;
> -
> - invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
> - if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
> - kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
> - data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> - invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> - }
> -
> - if (invalid)
> + if (!vmx_set_guest_debugctl(vcpu, data, msr_info->host_initiated))
> return 1;
>
> - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
> - VM_EXIT_SAVE_DEBUG_CONTROLS)
> - get_vmcs12(vcpu)->guest_ia32_debugctl = data;
> -
> - vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> - if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
> - (data & DEBUGCTLMSR_LBR))
> - intel_pmu_create_guest_lbr_event(vcpu);
> return 0;
> }
> case MSR_IA32_BNDCFGS:
> @@ -4795,7 +4813,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
> vmcs_write32(GUEST_SYSENTER_CS, 0);
> vmcs_writel(GUEST_SYSENTER_ESP, 0);
> vmcs_writel(GUEST_SYSENTER_EIP, 0);
> - vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> + __vmx_set_guest_debugctl(&vmx->vcpu, 0);
>
> if (cpu_has_vmx_tpr_shadow()) {
> vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 6d1e40ecc024..8ac46fb47abd 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -404,6 +404,9 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
>
> gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
>
> +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
> +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu);
> +
> static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
> int type, bool value)
> {
Powered by blists - more mailing lists