lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 9 Mar 2020 16:57:27 +0800
From:   Keqian Zhu <zhukeqian1@...wei.com>
To:     <kvmarm@...ts.cs.columbia.edu>, <kvm@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>,
        <linux-arm-kernel@...ts.infradead.org>
CC:     <wanghaibin.wang@...wei.com>, Keqian Zhu <zhukeqian1@...wei.com>,
        Jay Zhou <jianjay.zhou@...wei.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Peter Xu <peterx@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>
Subject: [RFC] KVM: arm64: support enabling dirty log graually in small chunks

There is already support of enabling dirty log graually
in small chunks for x86. This adds support for arm64.

Under the Huawei Kunpeng 920 2.6GHz platform, I did some
tests with a 128G linux VM and counted the time taken of
memory_global_dirty_log_start, here is the numbers:

VM Size        Before    After optimization
128G           527ms     4ms

Signed-off-by: Keqian Zhu <zhukeqian1@...wei.com>
---
Cc: Jay Zhou <jianjay.zhou@...wei.com>
Cc: Paolo Bonzini <pbonzini@...hat.com> 
Cc: Peter Xu <peterx@...hat.com>
Cc: Sean Christopherson <sean.j.christopherson@...el.com>
---
 Documentation/virt/kvm/api.rst    |  2 +-
 arch/arm64/include/asm/kvm_host.h |  4 ++++
 virt/kvm/arm/mmu.c                | 30 ++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0adef66585b1..89d4f2680af1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5735,7 +5735,7 @@ will be initialized to 1 when created.  This also improves performance because
 dirty logging can be enabled gradually in small chunks on the first call
 to KVM_CLEAR_DIRTY_LOG.  KVM_DIRTY_LOG_INITIALLY_SET depends on
 KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
-x86 for now).
+x86 and arm64 for now).
 
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d87aa609d2b6..0deb2ac7d091 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/jump_label.h>
 #include <linux/kvm_types.h>
 #include <linux/percpu.h>
+#include <linux/kvm.h>
 #include <asm/arch_gicv3.h>
 #include <asm/barrier.h>
 #include <asm/cpufeature.h>
@@ -45,6 +46,9 @@
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
 #define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
+#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+					KVM_DIRTY_LOG_INITIALLY_SET)
+
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern unsigned int kvm_sve_max_vl;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e3b9ee268823..5c7ca84dec85 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1438,9 +1438,11 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
  * @pud:	pointer to pud entry
  * @addr:	range start address
  * @end:	range end address
+ * @wp_ptes:	write protect ptes or not
  */
 static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
-			   phys_addr_t addr, phys_addr_t end)
+			   phys_addr_t addr, phys_addr_t end,
+			   bool wp_ptes)
 {
 	pmd_t *pmd;
 	phys_addr_t next;
@@ -1453,7 +1455,7 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
 			if (pmd_thp_or_huge(*pmd)) {
 				if (!kvm_s2pmd_readonly(pmd))
 					kvm_set_s2pmd_readonly(pmd);
-			} else {
+			} else if (wp_ptes) {
 				stage2_wp_ptes(pmd, addr, next);
 			}
 		}
@@ -1465,9 +1467,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
  * @pgd:	pointer to pgd entry
  * @addr:	range start address
  * @end:	range end address
+ * @wp_ptes:	write protect ptes or not
  */
 static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
-			    phys_addr_t addr, phys_addr_t end)
+			    phys_addr_t addr, phys_addr_t end,
+			    bool wp_ptes)
 {
 	pud_t *pud;
 	phys_addr_t next;
@@ -1480,7 +1484,7 @@ static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
 				if (!kvm_s2pud_readonly(pud))
 					kvm_set_s2pud_readonly(pud);
 			} else {
-				stage2_wp_pmds(kvm, pud, addr, next);
+				stage2_wp_pmds(kvm, pud, addr, next, wp_ptes);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
@@ -1491,8 +1495,10 @@ static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
  * @kvm:	The KVM pointer
  * @addr:	Start address of range
  * @end:	End address of range
+ * @wp_ptes:	Write protect ptes or not
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr,
+			    phys_addr_t end, bool wp_ptes)
 {
 	pgd_t *pgd;
 	phys_addr_t next;
@@ -1513,7 +1519,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 			break;
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (stage2_pgd_present(kvm, *pgd))
-			stage2_wp_puds(kvm, pgd, addr, next);
+			stage2_wp_puds(kvm, pgd, addr, next, wp_ptes);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -1535,6 +1541,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
 	phys_addr_t start, end;
+	bool wp_ptes;
 
 	if (WARN_ON_ONCE(!memslot))
 		return;
@@ -1543,7 +1550,14 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
+	/*
+	 * If we're with initial-all-set, we don't need to write protect
+	 * any small page because they're reported as dirty already.
+	 * However we still need to write-protect huge pages so that the
+	 * page split can happen lazily on the first write to the huge page.
+	 */
+	wp_ptes = !kvm_dirty_log_manual_protect_and_init_set(kvm);
+	stage2_wp_range(kvm, start, end, wp_ptes);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1567,7 +1581,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, start, end, true);
 }
 
 /*
-- 
2.19.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ