[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a9f3f64c-2f82-40b0-80c0-ed1482861dc2@redhat.com>
Date: Wed, 4 Jun 2025 18:19:41 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Borislav Petkov <bp@...en8.de>, Xin Li <xin@...or.com>,
Chao Gao <chao.gao@...el.com>, Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: Re: [PATCH 25/28] KVM: nSVM: Access MSRPM in 4-byte chunks only for
merging L0 and L1 bitmaps
On 5/30/25 01:40, Sean Christopherson wrote:
> Access the MSRPM using u32/4-byte chunks (and appropriately adjusted
> offsets) only when merging L0 and L1 bitmaps as part of emulating VMRUN.
> The only reason to batch accesses to MSRPMs is to avoid the overhead of
> uaccess operations (e.g. STAC/CLAC and bounds checks) when reading L1's
> bitmap pointed at by vmcb12. For all other uses, either per-bit accesses
> are more than fast enough (no uaccess), or KVM is only accessing a single
> bit (nested_svm_exit_handled_msr()) and so there's nothing to batch.
>
> In addition to (hopefully) documenting the uniqueness of the merging code,
> restricting chunked access to _just_ the merging code will allow for
> increasing the chunk size (to unsigned long) with minimal risk.
>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/kvm/svm/nested.c | 50 ++++++++++++++++-----------------------
> arch/x86/kvm/svm/svm.h | 18 ++++++++++----
> 2 files changed, 34 insertions(+), 34 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index e07e10fb52a5..a4e98ada732b 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -187,31 +187,19 @@ void recalc_intercepts(struct vcpu_svm *svm)
> static int nested_svm_msrpm_merge_offsets[6] __ro_after_init;
> static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
>
> -static const u32 msrpm_ranges[] = {
> - SVM_MSRPM_RANGE_0_BASE_MSR,
> - SVM_MSRPM_RANGE_1_BASE_MSR,
> - SVM_MSRPM_RANGE_2_BASE_MSR
> -};
> +#define SVM_BUILD_MSR_BYTE_NR_CASE(range_nr, msr) \
> + case SVM_MSRPM_FIRST_MSR(range_nr) ... SVM_MSRPM_LAST_MSR(range_nr): \
> + return SVM_MSRPM_BYTE_NR(range_nr, msr);
>
> static u32 svm_msrpm_offset(u32 msr)
> {
> - u32 offset;
> - int i;
> -
> - for (i = 0; i < ARRAY_SIZE(msrpm_ranges); i++) {
> - if (msr < msrpm_ranges[i] ||
> - msr >= msrpm_ranges[i] + SVM_MSRS_PER_RANGE)
> - continue;
> -
> - offset = (msr - msrpm_ranges[i]) / SVM_MSRS_PER_BYTE;
> - offset += (i * SVM_MSRPM_BYTES_PER_RANGE); /* add range offset */
> -
> - /* Now we have the u8 offset - but need the u32 offset */
> - return offset / 4;
> + switch (msr) {
> + SVM_BUILD_MSR_BYTE_NR_CASE(0, msr)
> + SVM_BUILD_MSR_BYTE_NR_CASE(1, msr)
> + SVM_BUILD_MSR_BYTE_NR_CASE(2, msr)
> + default:
> + return MSR_INVALID;
> }
> -
> - /* MSR not in any range */
> - return MSR_INVALID;
> }
>
> int __init nested_svm_init_msrpm_merge_offsets(void)
> @@ -245,6 +233,12 @@ int __init nested_svm_init_msrpm_merge_offsets(void)
> if (WARN_ON(offset == MSR_INVALID))
> return -EIO;
>
> + /*
> + * Merging is done in 32-bit chunks to reduce the number of
> + * accesses to L1's bitmap.
> + */
> + offset /= sizeof(u32);
> +
> for (j = 0; j < nested_svm_nr_msrpm_merge_offsets; j++) {
> if (nested_svm_msrpm_merge_offsets[j] == offset)
> break;
> @@ -1363,8 +1357,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
>
> static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
> {
> - u32 offset, msr, value;
> - int write, mask;
> + u32 offset, msr;
> + int write;
> + u8 value;
>
> if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
> return NESTED_EXIT_HOST;
> @@ -1372,18 +1367,15 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
> msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
> offset = svm_msrpm_offset(msr);
> write = svm->vmcb->control.exit_info_1 & 1;
> - mask = 1 << ((2 * (msr & 0xf)) + write);
This is wrong. The bit to read isn't always bit 0 or bit 1, therefore
mask needs to remain. But it can be written easily as:
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
write = svm->vmcb->control.exit_info_1 & 1;
bit = svm_msrpm_bit(msr);
if (bit == MSR_INVALID)
return NESTED_EXIT_DONE;
offset = bit / BITS_PER_BYTE;
mask = BIT(write) << (bit & (BITS_PER_BYTE - 1));
and this even removes the need to use svm_msrpm_offset() in
nested_svm_exit_handled_msr().
At this point, it may even make sense to keep the adjustment for the
offset in svm_msrpm_offset(), like this:
static u32 svm_msrpm_offset(u32 msr)
{
u32 bit = svm_msr_bit(msr);
if (bit == MSR_INVALID)
return MSR_INVALID;
/*
* Merging is done in 32-bit chunks to reduce the number of
* accesses to L1's bitmap.
*/
return bit / (BITS_PER_BYTE * sizeof(u32));
}
I'll let you be the judge on this.
Paolo
> if (offset == MSR_INVALID)
> return NESTED_EXIT_DONE;
>
> - /* Offset is in 32 bit units but need in 8 bit units */
> - offset *= 4;
> -
> - if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
> + if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset,
> + &value, sizeof(value)))
> return NESTED_EXIT_DONE;
>
> - return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
> + return (value & BIT(write)) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
> }
>
> static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 77287c870967..155b6089fcd2 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -634,15 +634,23 @@ static_assert(SVM_MSRS_PER_RANGE == 8192);
> (range_nr * SVM_MSRPM_BYTES_PER_RANGE * BITS_PER_BYTE + \
> (msr - SVM_MSRPM_RANGE_## range_nr ##_BASE_MSR) * SVM_BITS_PER_MSR)
>
> -#define SVM_MSRPM_SANITY_CHECK_BITS(range_nr) \
> +#define SVM_MSRPM_BYTE_NR(range_nr, msr) \
> + (range_nr * SVM_MSRPM_BYTES_PER_RANGE + \
> + (msr - SVM_MSRPM_RANGE_## range_nr ##_BASE_MSR) / SVM_MSRS_PER_BYTE)
> +
> +#define SVM_MSRPM_SANITY_CHECK_BITS_AND_BYTES(range_nr) \
> static_assert(SVM_MSRPM_BIT_NR(range_nr, SVM_MSRPM_FIRST_MSR(range_nr) + 1) == \
> range_nr * 2048 * 8 + 2); \
> static_assert(SVM_MSRPM_BIT_NR(range_nr, SVM_MSRPM_FIRST_MSR(range_nr) + 7) == \
> - range_nr * 2048 * 8 + 14);
> + range_nr * 2048 * 8 + 14); \
> +static_assert(SVM_MSRPM_BYTE_NR(range_nr, SVM_MSRPM_FIRST_MSR(range_nr) + 1) == \
> + range_nr * 2048); \
> +static_assert(SVM_MSRPM_BYTE_NR(range_nr, SVM_MSRPM_FIRST_MSR(range_nr) + 7) == \
> + range_nr * 2048 + 1);
>
> -SVM_MSRPM_SANITY_CHECK_BITS(0);
> -SVM_MSRPM_SANITY_CHECK_BITS(1);
> -SVM_MSRPM_SANITY_CHECK_BITS(2);
> +SVM_MSRPM_SANITY_CHECK_BITS_AND_BYTES(0);
> +SVM_MSRPM_SANITY_CHECK_BITS_AND_BYTES(1);
> +SVM_MSRPM_SANITY_CHECK_BITS_AND_BYTES(2);
>
> #define SVM_BUILD_MSR_BITMAP_CASE(bitmap, range_nr, msr, bitop, bit_rw) \
> case SVM_MSRPM_FIRST_MSR(range_nr) ... SVM_MSRPM_LAST_MSR(range_nr): \
Powered by blists - more mailing lists