lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Tue,  1 Mar 2022 20:26:39 +0200
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     kvm@...r.kernel.org
Cc:     Vitaly Kuznetsov <vkuznets@...hat.com>,
        Ingo Molnar <mingo@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Jani Nikula <jani.nikula@...ux.intel.com>,
        Daniel Vetter <daniel@...ll.ch>,
        Zhi Wang <zhi.a.wang@...el.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Tvrtko Ursulin <tvrtko.ursulin@...ux.intel.com>,
        David Airlie <airlied@...ux.ie>,
        Wanpeng Li <wanpengli@...cent.com>,
        Joonas Lahtinen <joonas.lahtinen@...ux.intel.com>,
        x86@...nel.org, intel-gvt-dev@...ts.freedesktop.org,
        Joerg Roedel <joro@...tes.org>,
        Zhenyu Wang <zhenyuw@...ux.intel.com>,
        Rodrigo Vivi <rodrigo.vivi@...el.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>, Borislav Petkov <bp@...en8.de>,
        Jim Mattson <jmattson@...gle.com>,
        intel-gfx@...ts.freedesktop.org, linux-kernel@...r.kernel.org,
        Thomas Gleixner <tglx@...utronix.de>,
        dri-devel@...ts.freedesktop.org,
        Maxim Levitsky <mlevitsk@...hat.com>
Subject: [PATCH v3 11/11] KVM: SVM: allow to avoid not needed updates to is_running

Allow optionally to make KVM not update is_running unless it is
functionally needed which is only when a vCPU halts,
or is in the guest mode.

This means security wise that if a vCPU is scheduled out,
other vCPUs could still send doorbell messages to the
last physical CPU where this vCPU was last running.

This in theory can be considered less secure, thus
this option is not enabled by default.

The option is avic_doorbell_strict and is true by
default, setting it to false allows this relaxed
non strict mode.

Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
---
 arch/x86/kvm/svm/avic.c | 39 +++++++++++++++++++++++++++------------
 arch/x86/kvm/svm/svm.c  |  7 +++++--
 arch/x86/kvm/svm/svm.h  |  1 +
 virt/kvm/kvm_main.c     |  3 ++-
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index dd13fd3588e2b..1d690a9d3952e 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -166,10 +166,13 @@ void avic_physid_shadow_table_update_vcpu_location(struct kvm_vcpu *vcpu, int cp
 	raw_spin_lock_irqsave(&kvm_svm->avic.table_entries_lock, flags);
 
 	list_for_each_entry(e, &vcpu_svm->nested.physid_ref_entries, link) {
-		u64 sentry = READ_ONCE(*e->sentry);
+		u64 old_sentry = READ_ONCE(*e->sentry);
+		u64 new_sentry = old_sentry;
 
-		physid_entry_set_apicid(&sentry, cpu);
-		WRITE_ONCE(*e->sentry, sentry);
+		physid_entry_set_apicid(&new_sentry, cpu);
+
+		if (new_sentry != old_sentry)
+			WRITE_ONCE(*e->sentry, new_sentry);
 		nentries++;
 	}
 
@@ -1507,7 +1510,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 
 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	u64 entry;
+	u64 old_entry, new_entry;
 	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
 	int h_physical_id = kvm_cpu_get_apicid(cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1531,14 +1534,16 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (kvm_vcpu_is_blocking(vcpu))
 		return;
 
-	entry = READ_ONCE(*(svm->avic_physical_id_cache));
-	WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+	old_entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	new_entry = old_entry;
 
-	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
-	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
-	entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+	new_entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+	new_entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+	new_entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+
+	if (old_entry != new_entry)
+		WRITE_ONCE(*(svm->avic_physical_id_cache), new_entry);
 
-	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
 }
 
@@ -1549,14 +1554,24 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 
 	lockdep_assert_preemption_disabled();
 
+	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+	/*
+	 * It is only meaningful to intercept IPIs from the guest
+	 * when either vCPU is blocked, or in guest mode.
+	 * In all other cases (e.g userspace vmexit, or preemption
+	 * by other task, the vCPU is guaranteed to return to guest mode
+	 * as soon as it can
+	 */
+	if (!avic_doorbell_strict && !kvm_vcpu_is_blocking(vcpu) && !is_guest_mode(vcpu))
+		return;
+
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
 
 	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
 	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
 		return;
 
-	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
-
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0d6b715375a69..463b756f665ae 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -202,6 +202,9 @@ module_param(tsc_scaling, int, 0444);
 static bool avic;
 module_param(avic, bool, 0444);
 
+bool avic_doorbell_strict = true;
+module_param(avic_doorbell_strict, bool, 0444);
+
 bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
@@ -1340,7 +1343,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 	svm->loaded = false;
 
 	if (svm->nested.initialized && svm->avic_enabled)
-		avic_physid_shadow_table_update_vcpu_location(vcpu, -1);
+		if (!avic_doorbell_strict || kvm_vcpu_is_blocking(vcpu))
+			avic_physid_shadow_table_update_vcpu_location(vcpu, -1);
 
 	svm_prepare_host_switch(vcpu);
 
@@ -4707,7 +4711,6 @@ static __init void svm_set_cpu_caps(void)
 	/* CPUID 0x80000001 and 0x8000000A (SVM features) */
 	if (nested) {
 		kvm_cpu_cap_set(X86_FEATURE_SVM);
-		kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
 
 		if (nrips)
 			kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8ebda12995abe..d0108bae2cdac 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -33,6 +33,7 @@
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
 extern bool intercept_smi;
+extern bool avic_doorbell_strict;
 
 /*
  * Clean bits in VMCB.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cdf1fa3c60ae2..67a29233e216b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3291,9 +3291,10 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.generic.blocking = 1;
 
+	prepare_to_rcuwait(wait);
+
 	kvm_arch_vcpu_blocking(vcpu);
 
-	prepare_to_rcuwait(wait);
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
-- 
2.26.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ