lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 13 Jul 2021 17:20:20 +0300
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     kvm@...r.kernel.org
Cc:     linux-kernel@...r.kernel.org (open list:X86 ARCHITECTURE (32-BIT AND
        64-BIT)), Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, Borislav Petkov <bp@...en8.de>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        "H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
        x86@...nel.org (maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)),
        Sean Christopherson <seanjc@...gle.com>,
        Maxim Levitsky <mlevitsk@...hat.com>
Subject: [PATCH v2 5/8] KVM: x86: APICv: fix race in kvm_request_apicv_update on SVM

Currently on SVM, the kvm_request_apicv_update calls the
'pre_update_apicv_exec_ctrl' without doing any synchronization
and that function toggles the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT.

If there is a mismatch between that memslot state and the AVIC state,
on one of vCPUs, an APIC mmio write can be lost:

For example:

VCPU0: enable the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
VCPU1: write to an APIC mmio register.

Since AVIC is still disabled on VCPU1, the access will not be  intercepted
by it, and neither will it cause MMIO fault, but rather it will just update
the dummy page mapped into the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT.

Fix that by adding a lock guarding the AVIC state changes, and carefully
order the operations of kvm_request_apicv_update to avoid this race:

1. Take the lock
2. Send KVM_REQ_APICV_UPDATE
3. Update the apic inhibit reason
4. Release the lock

This ensures that at (2) all vCPUs are kicked out of the guest mode,
but don't yet see the new avic state.
Then only after (4) all other vCPUs can update their AVIC state and resume.

Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
---
 arch/x86/kvm/x86.c       | 39 ++++++++++++++++++++++++---------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      |  1 +
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 29b92f6cbad4..a91e35b92447 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9180,6 +9180,8 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	if (!lapic_in_kernel(vcpu))
 		return;
 
+	mutex_lock(&vcpu->kvm->apicv_update_lock);
+
 	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
@@ -9192,6 +9194,8 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	 */
 	if (!vcpu->arch.apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+	mutex_unlock(&vcpu->kvm->apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
@@ -9204,32 +9208,34 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
  */
 void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
-	unsigned long old, new, expected;
+	unsigned long old, new;
 
 	if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
 	    !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
 		return;
 
-	old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
-	do {
-		expected = new = old;
-		if (activate)
-			__clear_bit(bit, &new);
-		else
-			__set_bit(bit, &new);
-		if (new == old)
-			break;
-		old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
-	} while (old != expected);
+	mutex_lock(&kvm->apicv_update_lock);
+
+	old = new = kvm->arch.apicv_inhibit_reasons;
+	if (activate)
+		__clear_bit(bit, &new);
+	else
+		__set_bit(bit, &new);
+
+	kvm->arch.apicv_inhibit_reasons = new;
 
 	if (!!old == !!new)
-		return;
+		goto out;
 
 	trace_kvm_apicv_update_request(activate, bit);
+
+	kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+
 	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
 		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
 
-	kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+out:
+	mutex_unlock(&kvm->apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
@@ -9436,8 +9442,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
-		if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
+		if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) {
+			srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_update_apicv(vcpu);
+			vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		}
 		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
 			kvm_check_async_pf_completion(vcpu);
 		if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 37cbb56ccd09..0364d35d43dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -524,6 +524,7 @@ struct kvm {
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
 	struct mutex slots_lock;
+	struct mutex apicv_update_lock;
 
 	/*
 	 * Protects the arch-specific fields of struct kvm_memory_slots in
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ed4d1581d502..ba5d5d9ebc64 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -943,6 +943,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
 	mutex_init(&kvm->slots_arch_lock);
+	mutex_init(&kvm->apicv_update_lock);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
-- 
2.26.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ