linux-kernel - [RESEND PATCH ] KVM: VMX: Enable/disable PML when dirty logging gets enabled/disabled

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210210212308.2219465-1-makarandsonare@google.com>
Date:   Wed, 10 Feb 2021 13:23:08 -0800
From:   Makarand Sonare <makarandsonare@...gle.com>
To:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
        pbonzini@...hat.com, seanjc@...gle.com, pshier@...gle.com,
        jmattson@...gle.com
Cc:     Makarand Sonare <makarandsonare@...gle.com>,
        Ben Gardon <bgardon@...gle.com>
Subject: [RESEND PATCH ] KVM: VMX: Enable/disable PML when dirty logging gets enabled/disabled

Currently, if enable_pml=1 PML remains enabled for the entire lifetime
of the VM irrespective of whether dirty logging is enable or disabled.
When dirty logging is disabled, all the pages of the VM are manually
marked dirty, so that PML is effectively non-operational. Clearing
the dirty bits is an expensive operation which can cause severe MMU
lock contention in a performance sensitive path when dirty logging
is disabled after a failed or canceled live migration. Also, this
would break if some other code path clears the dirty bits in which
case, PML will actually start logging dirty pages even when dirty
logging is disabled incurring unnecessary vmexits when the PML buffer
becomes full. In order to avoid this extra overhead, we should
enable or disable PML in VMCS when dirty logging gets enabled
or disabled instead of keeping it always enabled.

Tested:
	kvm-unit-tests
	dirty_log_test
	dirty_log_perf_test

Signed-off-by: Makarand Sonare <makarandsonare@...gle.com>
Reviewed-by: Ben Gardon <bgardon@...gle.com>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/vmx/nested.c       |  5 ++++
 arch/x86/kvm/vmx/vmx.c          | 49 +++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/vmx.h          |  3 ++
 arch/x86/kvm/x86.c              |  4 +++
 include/linux/kvm_host.h        |  1 +
 virt/kvm/kvm_main.c             | 14 ++++++++--
 7 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1ed1206c196db..6aca4f0f9d806 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -91,6 +91,8 @@
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
 #define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_VCPU_DIRTY_LOGGING_STATE \
+	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1029,6 +1031,7 @@ struct kvm_arch {
 	} msr_filter;
 
 	bool bus_lock_detection_enabled;
+	bool pml_enabled;
 
 	struct kvm_pmu_event_filter *pmu_event_filter;
 	struct task_struct *nx_lpage_recovery_thread;
@@ -1295,6 +1298,7 @@ struct kvm_x86_ops {
 					   struct kvm_memory_slot *slot,
 					   gfn_t offset, unsigned long mask);
 	int (*cpu_dirty_log_size)(void);
+	void (*update_vcpu_dirty_logging_state)(struct kvm_vcpu *vcpu);
 
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2f0b5e9cd638..9e8e89fdc03a2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4495,6 +4495,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		vmx_set_virtual_apic_mode(vcpu);
 	}
 
+	if (vmx->nested.deferred_update_pml_vmcs) {
+		vmx->nested.deferred_update_pml_vmcs = false;
+		vmx_update_pml_in_vmcs(vcpu);
+	}
+
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_clean(vmx->nested.apic_access_page);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 777177ea9a35e..eb6639f0ee7eb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4276,7 +4276,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	*/
 	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
 
-	if (!enable_pml)
+	if (!enable_pml || !vcpu->kvm->arch.pml_enabled)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
 	if (cpu_has_vmx_xsaves()) {
@@ -7133,7 +7133,8 @@ static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_SHADOW_VMCS |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_DESC;
+		SECONDARY_EXEC_DESC |
+		SECONDARY_EXEC_ENABLE_PML;
 
 	u32 new_ctl = vmx->secondary_exec_control;
 	u32 cur_ctl = secondary_exec_controls_get(vmx);
@@ -7509,6 +7510,19 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 static void vmx_slot_enable_log_dirty(struct kvm *kvm,
 				     struct kvm_memory_slot *slot)
 {
+	/*
+	 * Check all slots and enable PML if dirty logging
+	 * is being enabled for the 1st slot
+	 *
+	 */
+	if (enable_pml &&
+	    kvm->dirty_logging_enable_count == 1 &&
+	    !kvm->arch.pml_enabled) {
+		kvm->arch.pml_enabled = true;
+		kvm_make_all_cpus_request(kvm,
+			KVM_REQ_UPDATE_VCPU_DIRTY_LOGGING_STATE);
+	}
+
 	if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
 		kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
 	kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
@@ -7517,9 +7531,39 @@ static void vmx_slot_enable_log_dirty(struct kvm *kvm,
 static void vmx_slot_disable_log_dirty(struct kvm *kvm,
 				       struct kvm_memory_slot *slot)
 {
+	/*
+	 * Check all slots and disable PML if dirty logging
+	 * is being disabled for the last slot
+	 *
+	 */
+	if (enable_pml &&
+	    kvm->dirty_logging_enable_count == 0 &&
+	    kvm->arch.pml_enabled) {
+		kvm->arch.pml_enabled = false;
+		kvm_make_all_cpus_request(kvm,
+			KVM_REQ_UPDATE_VCPU_DIRTY_LOGGING_STATE);
+	}
+
 	kvm_mmu_slot_set_dirty(kvm, slot);
 }
 
+void vmx_update_pml_in_vmcs(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_secondary_exec_ctrls()) {
+		if (is_guest_mode(vcpu)) {
+			to_vmx(vcpu)->nested.deferred_update_pml_vmcs = true;
+			return;
+		}
+
+		if (vcpu->kvm->arch.pml_enabled)
+			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+				SECONDARY_EXEC_ENABLE_PML);
+		else
+			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+				SECONDARY_EXEC_ENABLE_PML);
+	}
+}
+
 static void vmx_flush_log_dirty(struct kvm *kvm)
 {
 	kvm_flush_pml_buffers(kvm);
@@ -7747,6 +7791,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+	.update_vcpu_dirty_logging_state = vmx_update_pml_in_vmcs,
 
 	.pre_block = vmx_pre_block,
 	.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 12c53d05a902b..b7dc413cda7bd 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -181,6 +181,8 @@ struct nested_vmx {
 
 	struct loaded_vmcs vmcs02;
 
+	bool deferred_update_pml_vmcs;
+
 	/*
 	 * Guest pages referred to in the vmcs02 with host-physical
 	 * pointers, so we must keep them pinned while L2 runs.
@@ -393,6 +395,7 @@ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
 	u32 msr, int type, bool value);
+void vmx_update_pml_in_vmcs(struct kvm_vcpu *vcpu);
 
 static inline u8 vmx_get_rvi(void)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d9f931c632936..75b924c9c5af0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8984,6 +8984,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_check_async_pf_completion(vcpu);
 		if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
 			static_call(kvm_x86_msr_filter_changed)(vcpu);
+
+		if (kvm_check_request(KVM_REQ_UPDATE_VCPU_DIRTY_LOGGING_STATE,
+				      vcpu))
+			kvm_x86_ops.update_vcpu_dirty_logging_state(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f417447129b9c..a8a28ba955923 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -518,6 +518,7 @@ struct kvm {
 	pid_t userspace_pid;
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
+	uint dirty_logging_enable_count;
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ee4ac2618ec59..c6e5b026bbfe8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -307,6 +307,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	return kvm_make_all_cpus_request_except(kvm, req, NULL);
 }
+EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
@@ -1366,15 +1367,24 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	/* Allocate/free page dirty bitmap as needed */
-	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
+	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) {
 		new.dirty_bitmap = NULL;
-	else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
+
+		if (old.flags & KVM_MEM_LOG_DIRTY_PAGES) {
+			WARN_ON(kvm->dirty_logging_enable_count == 0);
+			--kvm->dirty_logging_enable_count;
+		}
+
+	} else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
 		r = kvm_alloc_dirty_bitmap(&new);
 		if (r)
 			return r;
 
 		if (kvm_dirty_log_manual_protect_and_init_set(kvm))
 			bitmap_set(new.dirty_bitmap, 0, new.npages);
+
+		++kvm->dirty_logging_enable_count;
+		WARN_ON(kvm->dirty_logging_enable_count == 0);
 	}
 
 	r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
-- 
2.30.0.478.g8a0d178c01-goog