[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240712-asi-rfc-24-v1-21-144b319a40d8@google.com>
Date: Fri, 12 Jul 2024 17:00:39 +0000
From: Brendan Jackman <jackmanb@...gle.com>
To: Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, "H. Peter Anvin" <hpa@...or.com>,
Andy Lutomirski <luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>,
Alexandre Chartre <alexandre.chartre@...cle.com>, Liran Alon <liran.alon@...cle.com>,
Jan Setje-Eilers <jan.setjeeilers@...cle.com>, Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>, Mark Rutland <mark.rutland@....com>,
Andrew Morton <akpm@...ux-foundation.org>, Mel Gorman <mgorman@...e.de>,
Lorenzo Stoakes <lstoakes@...il.com>, David Hildenbrand <david@...hat.com>, Vlastimil Babka <vbabka@...e.cz>,
Michal Hocko <mhocko@...nel.org>, Khalid Aziz <khalid.aziz@...cle.com>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>,
Valentin Schneider <vschneid@...hat.com>, Paul Turner <pjt@...gle.com>, Reiji Watanabe <reijiw@...gle.com>,
Junaid Shahid <junaids@...gle.com>, Ofir Weisse <oweisse@...gle.com>,
Yosry Ahmed <yosryahmed@...gle.com>, Patrick Bellasi <derkling@...gle.com>,
KP Singh <kpsingh@...gle.com>, Alexandra Sandulescu <aesa@...gle.com>,
Matteo Rizzo <matteorizzo@...gle.com>, Jann Horn <jannh@...gle.com>
Cc: x86@...nel.org, linux-kernel@...r.kernel.org, linux-mm@...ck.org,
kvm@...r.kernel.org, Brendan Jackman <jackmanb@...gle.com>
Subject: [PATCH 21/26] KVM: x86: asi: Restricted address space for VM execution
An ASI restricted address space is added for KVM. It is currently only
enabled for Intel CPUs.
This change incorporates an extra asi_exit at the end of vcpu_run. We
expect later iterations of ASI to drop that call as we gain the
ablity to context switch within the ASI domain.
Signed-off-by: Brendan Jackman <jackmanb@...gle.com>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 36 ++++++++++++++++++++++--------------
arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++--
4 files changed, 54 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6efd1497b0263..6c3326cb8273c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -36,6 +36,7 @@
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/hyperv-tlfs.h>
+#include <asm/asi.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -1514,6 +1515,8 @@ struct kvm_arch {
*/
#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
struct kvm_mmu_memory_cache split_desc_cache;
+
+ struct asi *asi;
};
struct kvm_vm_stat {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9aaf83c8d57df..6f9a279c12dc7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4108,6 +4108,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_enter_irqoff();
amd_clear_divider();
+ asi_enter(vcpu->kvm->arch.asi);
if (sev_es_guest(vcpu->kvm))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
@@ -4115,6 +4116,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
+ asi_relax();
guest_state_exit_irqoff();
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 22411f4aff530..1105d666a8ade 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -49,6 +49,7 @@
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/vmx.h>
+#include <asm/asi.h>
#include <trace/events/ipi.h>
@@ -7255,14 +7256,32 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
unsigned int flags)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long cr3;
guest_state_enter_irqoff();
+ asi_enter(vcpu->kvm->arch.asi);
+
+ /*
+ * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
+ * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+ * it switches back to the current->mm, which can occur in KVM context
+ * when switching to a temporary mm to patch kernel code, e.g. if KVM
+ * toggles a static key while handling a VM-Exit.
+ * Also, this must be done after asi_enter(), as it changes CR3
+ * when switching address spaces.
+ */
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
/*
* L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
* mitigation for MDS is done late in VMentry and is still
* executed in spite of L1D Flush. This is because an extra VERW
* should not matter much after the big hammer L1D Flush.
+ * This is only after asi_enter() for performance reasons.
*/
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
@@ -7283,6 +7302,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx->idt_vectoring_info = 0;
+ asi_relax();
+
vmx_enable_fb_clear(vmx);
if (unlikely(vmx->fail)) {
@@ -7311,7 +7332,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -7354,19 +7375,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
- /*
- * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
- * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
- * it switches back to the current->mm, which can occur in KVM context
- * when switching to a temporary mm to patch kernel code, e.g. if KVM
- * toggles a static key while handling a VM-Exit.
- */
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
-
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91478b769af08..b9947e88d4ac6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -85,6 +85,7 @@
#include <asm/emulate_prefix.h>
#include <asm/sgx.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/asi.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -318,6 +319,8 @@ u64 __read_mostly host_xcr0;
static struct kmem_cache *x86_emulator_cache;
+static int __read_mostly kvm_asi_index = -1;
+
/*
* When called, it means the previous get/set msr reached an invalid msr.
* Return true if we want to ignore/silent this failed msr access.
@@ -9750,6 +9753,11 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (r)
goto out_free_percpu;
+ r = asi_register_class("KVM", NULL);
+ if (r < 0)
+ goto out_mmu_exit;
+ kvm_asi_index = r;
+
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@ -9767,7 +9775,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
r = ops->hardware_setup();
if (r != 0)
- goto out_mmu_exit;
+ goto out_asi_unregister;
kvm_ops_update(ops);
@@ -9820,6 +9828,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
out_unwind_ops:
kvm_x86_ops.hardware_enable = NULL;
static_call(kvm_x86_hardware_unsetup)();
+out_asi_unregister:
+ asi_unregister_class(kvm_asi_index);
out_mmu_exit:
kvm_mmu_vendor_module_exit();
out_free_percpu:
@@ -9851,6 +9861,7 @@ void kvm_x86_vendor_exit(void)
cancel_work_sync(&pvclock_gtod_work);
#endif
static_call(kvm_x86_hardware_unsetup)();
+ asi_unregister_class(kvm_asi_index);
kvm_mmu_vendor_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
@@ -11436,6 +11447,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = vcpu_run(vcpu);
+ /*
+ * At present ASI doesn't have the capability to transition directly
+ * from the restricted address space to the user address space. So we
+ * just return to the unrestricted address space in between.
+ */
+ asi_exit();
+
out:
kvm_put_guest_fpu(vcpu);
if (kvm_run->kvm_valid_regs)
@@ -12539,10 +12557,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_mmu_init_vm(kvm);
- ret = static_call(kvm_x86_vm_init)(kvm);
+ ret = asi_init(kvm->mm, kvm_asi_index, &kvm->arch.asi);
if (ret)
goto out_uninit_mmu;
+ ret = static_call(kvm_x86_vm_init)(kvm);
+ if (ret)
+ goto out_asi_destroy;
+
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@@ -12579,6 +12601,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
+out_asi_destroy:
+ asi_destroy(kvm->arch.asi);
out_uninit_mmu:
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
@@ -12720,6 +12744,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+ asi_destroy(kvm->arch.asi);
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
--
2.45.2.993.g49e7a77208-goog
Powered by blists - more mailing lists