[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <424fc510-0f80-afa0-4f13-d4d133e81c98@amd.com>
Date: Mon, 3 Mar 2025 10:53:57 -0600
From: Tom Lendacky <thomas.lendacky@....com>
To: Sean Christopherson <seanjc@...gle.com>,
Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>,
x86@...nel.org, Paolo Bonzini <pbonzini@...hat.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
Zheyun Shen <szy0127@...u.edu.cn>, Kevin Loughlin
<kevinloughlin@...gle.com>, Mingwei Zhang <mizhang@...gle.com>
Subject: Re: [PATCH 7/7] KVM: SVM: Flush cache only on CPUs running SEV guest
On 2/26/25 19:48, Sean Christopherson wrote:
> From: Zheyun Shen <szy0127@...u.edu.cn>
>
> On AMD CPUs without ensuring cache consistency, each memory page
> reclamation in an SEV guest triggers a call to do WBNOINVD/WBINVD on all
> CPUs, thereby affecting the performance of other programs on the host.
>
> Typically, an AMD server may have 128 cores or more, while the SEV guest
> might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity
> to bind these 8 vCPUs to specific physical CPUs.
>
> Therefore, keeping a record of the physical core numbers each time a vCPU
> runs can help avoid flushing the cache for all CPUs every time.
>
> Signed-off-by: Zheyun Shen <szy0127@...u.edu.cn>
> Co-developed-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/kvm/svm/sev.c | 42 +++++++++++++++++++++++++++++++++++-------
> arch/x86/kvm/svm/svm.h | 1 +
> 2 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 4238af23ab1b..b7a4cb728fba 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -447,6 +447,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
> ret = sev_platform_init(&init_args);
> if (ret)
> goto e_free;
> + if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT))
> + goto e_free;
Looks like there should be a "ret = -ENOMEM" before the goto.
Thanks,
Tom
>
> /* This needs to happen after SEV/SNP firmware initialization. */
> if (vm_type == KVM_X86_SNP_VM) {
> @@ -706,16 +708,31 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
> }
> }
>
> -static void sev_writeback_caches(void)
> +static void sev_writeback_caches(struct kvm *kvm)
> {
> + /*
> + * Note, the caller is responsible for ensuring correctness if the mask
> + * can be modified, e.g. if a CPU could be doing VMRUN.
> + */
> + if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
> + return;
> +
> /*
> * Ensure that all dirty guest tagged cache entries are written back
> * before releasing the pages back to the system for use. CLFLUSH will
> * not do this without SME_COHERENT, and flushing many cache lines
> * individually is slower than blasting WBINVD for large VMs, so issue
> - * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported).
> + * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
> + * on CPUs that have done VMRUN, i.e. may have dirtied data using the
> + * VM's ASID.
> + *
> + * For simplicity, never remove CPUs from the bitmap. Ideally, KVM
> + * would clear the mask when flushing caches, but doing so requires
> + * serializing multiple calls and having responding CPUs (to the IPI)
> + * mark themselves as still running if they are running (or about to
> + * run) a vCPU for the VM.
> */
> - wbnoinvd_on_all_cpus();
> + wbnoinvd_on_many_cpus(to_kvm_sev_info(kvm)->have_run_cpus);
> }
>
> static unsigned long get_num_contig_pages(unsigned long idx,
> @@ -2766,7 +2783,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
> goto failed;
> }
>
> - sev_writeback_caches();
> + sev_writeback_caches(kvm);
>
> __unregister_enc_region_locked(kvm, region);
>
> @@ -2914,6 +2931,7 @@ void sev_vm_destroy(struct kvm *kvm)
> }
>
> sev_asid_free(sev);
> + free_cpumask_var(sev->have_run_cpus);
> }
>
> void __init sev_set_cpu_caps(void)
> @@ -3127,7 +3145,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
> return;
>
> do_sev_writeback_caches:
> - sev_writeback_caches();
> + sev_writeback_caches(vcpu->kvm);
> }
>
> void sev_guest_memory_reclaimed(struct kvm *kvm)
> @@ -3140,7 +3158,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
> if (!sev_guest(kvm) || sev_snp_guest(kvm))
> return;
>
> - sev_writeback_caches();
> + sev_writeback_caches(kvm);
> }
>
> void sev_free_vcpu(struct kvm_vcpu *vcpu)
> @@ -3456,7 +3474,17 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
> void pre_sev_run(struct vcpu_svm *svm, int cpu)
> {
> struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
> - unsigned int asid = sev_get_asid(svm->vcpu.kvm);
> + struct kvm *kvm = svm->vcpu.kvm;
> + unsigned int asid = sev_get_asid(kvm);
> +
> + /*
> + * To optimize cache flushes when memory is reclaimed from an SEV VM,
> + * track physical CPUs that enter the guest for SEV VMs and thus can
> + * have encrypted, dirty data in the cache, and flush caches only for
> + * CPUs that have entered the guest.
> + */
> + if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
> + cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
>
> /* Assign the asid allocated with this SEV guest */
> svm->asid = asid;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 5b159f017055..6ad18ce5a754 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -112,6 +112,7 @@ struct kvm_sev_info {
> void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */
> void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
> struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
> + cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
> };
>
> struct kvm_svm {
Powered by blists - more mailing lists