[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4b85699ec1d354cc73f5302560231f86@misterjones.org>
Date: Mon, 09 Mar 2020 11:45:39 +0000
From: Marc Zyngier <maz@...nel.org>
To: Keqian Zhu <zhukeqian1@...wei.com>
Cc: kvmarm@...ts.cs.columbia.edu, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
Jay Zhou <jianjay.zhou@...wei.com>,
Sean Christopherson <sean.j.christopherson@...el.com>,
Paolo Bonzini <pbonzini@...hat.com>,
James Morse <james.morse@....com>,
Julien Thierry <julien.thierry.kdev@...il.com>,
Suzuki K Poulose <suzuki.poulose@....com>
Subject: Re: [RFC] KVM: arm64: support enabling dirty log graually in small
chunks
Kegian,
In the future, please Cc me on your KVM/arm64 patches, as well as
all the reviewers mentioned in the MAINTAINERS file.
On 2020-03-09 08:57, Keqian Zhu wrote:
> There is already support of enabling dirty log graually
gradually?
> in small chunks for x86. This adds support for arm64.
>
> Under the Huawei Kunpeng 920 2.6GHz platform, I did some
> tests with a 128G linux VM and counted the time taken of
Linux
> memory_global_dirty_log_start, here is the numbers:
>
> VM Size Before After optimization
> 128G 527ms 4ms
What does this benchmark do? Can you please provide a pointer to it?
>
> Signed-off-by: Keqian Zhu <zhukeqian1@...wei.com>
> ---
> Cc: Jay Zhou <jianjay.zhou@...wei.com>
> Cc: Paolo Bonzini <pbonzini@...hat.com>
> Cc: Peter Xu <peterx@...hat.com>
> Cc: Sean Christopherson <sean.j.christopherson@...el.com>
> ---
> Documentation/virt/kvm/api.rst | 2 +-
> arch/arm64/include/asm/kvm_host.h | 4 ++++
> virt/kvm/arm/mmu.c | 30 ++++++++++++++++++++++--------
> 3 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst
> b/Documentation/virt/kvm/api.rst
> index 0adef66585b1..89d4f2680af1 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -5735,7 +5735,7 @@ will be initialized to 1 when created. This
> also improves performance because
> dirty logging can be enabled gradually in small chunks on the first
> call
> to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
> KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
> -x86 for now).
> +x86 and arm64 for now).
What is this based on? I can't find this in -next, and you provide no
context whatsoever.
I assume this is related to this:
https://lore.kernel.org/kvm/20200227013227.1401-1-jianjay.zhou@huawei.com/
Is there a userspace counterpart to it?
> KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the
> name
> KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that
> make
> diff --git a/arch/arm64/include/asm/kvm_host.h
> b/arch/arm64/include/asm/kvm_host.h
> index d87aa609d2b6..0deb2ac7d091 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -16,6 +16,7 @@
> #include <linux/jump_label.h>
> #include <linux/kvm_types.h>
> #include <linux/percpu.h>
> +#include <linux/kvm.h>
> #include <asm/arch_gicv3.h>
> #include <asm/barrier.h>
> #include <asm/cpufeature.h>
> @@ -45,6 +46,9 @@
> #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
> #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
>
> +#define KVM_DIRTY_LOG_MANUAL_CAPS
> (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
> + KVM_DIRTY_LOG_INITIALLY_SET)
> +
> DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>
> extern unsigned int kvm_sve_max_vl;
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index e3b9ee268823..5c7ca84dec85 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -1438,9 +1438,11 @@ static void stage2_wp_ptes(pmd_t *pmd,
> phys_addr_t addr, phys_addr_t end)
> * @pud: pointer to pud entry
> * @addr: range start address
> * @end: range end address
> + * @wp_ptes: write protect ptes or not
> */
> static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
> - phys_addr_t addr, phys_addr_t end)
> + phys_addr_t addr, phys_addr_t end,
> + bool wp_ptes)
If you are going to pass extra parameters like this, make it at least
extensible (unsigned long flags, for example).
> {
> pmd_t *pmd;
> phys_addr_t next;
> @@ -1453,7 +1455,7 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t
> *pud,
> if (pmd_thp_or_huge(*pmd)) {
> if (!kvm_s2pmd_readonly(pmd))
> kvm_set_s2pmd_readonly(pmd);
> - } else {
> + } else if (wp_ptes) {
> stage2_wp_ptes(pmd, addr, next);
> }
> }
> @@ -1465,9 +1467,11 @@ static void stage2_wp_pmds(struct kvm *kvm,
> pud_t *pud,
> * @pgd: pointer to pgd entry
> * @addr: range start address
> * @end: range end address
> + * @wp_ptes: write protect ptes or not
> */
> static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
> - phys_addr_t addr, phys_addr_t end)
> + phys_addr_t addr, phys_addr_t end,
> + bool wp_ptes)
> {
> pud_t *pud;
> phys_addr_t next;
> @@ -1480,7 +1484,7 @@ static void stage2_wp_puds(struct kvm *kvm,
> pgd_t *pgd,
> if (!kvm_s2pud_readonly(pud))
> kvm_set_s2pud_readonly(pud);
> } else {
> - stage2_wp_pmds(kvm, pud, addr, next);
> + stage2_wp_pmds(kvm, pud, addr, next, wp_ptes);
> }
> }
> } while (pud++, addr = next, addr != end);
> @@ -1491,8 +1495,10 @@ static void stage2_wp_puds(struct kvm *kvm,
> pgd_t *pgd,
> * @kvm: The KVM pointer
> * @addr: Start address of range
> * @end: End address of range
> + * @wp_ptes: Write protect ptes or not
> */
> -static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr,
> phys_addr_t end)
> +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr,
> + phys_addr_t end, bool wp_ptes)
> {
> pgd_t *pgd;
> phys_addr_t next;
> @@ -1513,7 +1519,7 @@ static void stage2_wp_range(struct kvm *kvm,
> phys_addr_t addr, phys_addr_t end)
> break;
> next = stage2_pgd_addr_end(kvm, addr, end);
> if (stage2_pgd_present(kvm, *pgd))
> - stage2_wp_puds(kvm, pgd, addr, next);
> + stage2_wp_puds(kvm, pgd, addr, next, wp_ptes);
> } while (pgd++, addr = next, addr != end);
> }
>
> @@ -1535,6 +1541,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm,
> int slot)
> struct kvm_memslots *slots = kvm_memslots(kvm);
> struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
> phys_addr_t start, end;
> + bool wp_ptes;
>
> if (WARN_ON_ONCE(!memslot))
> return;
> @@ -1543,7 +1550,14 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm,
> int slot)
> end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
>
> spin_lock(&kvm->mmu_lock);
> - stage2_wp_range(kvm, start, end);
> + /*
> + * If we're with initial-all-set, we don't need to write protect
> + * any small page because they're reported as dirty already.
> + * However we still need to write-protect huge pages so that the
> + * page split can happen lazily on the first write to the huge page.
> + */
> + wp_ptes = !kvm_dirty_log_manual_protect_and_init_set(kvm);
> + stage2_wp_range(kvm, start, end, wp_ptes);
> spin_unlock(&kvm->mmu_lock);
> kvm_flush_remote_tlbs(kvm);
> }
> @@ -1567,7 +1581,7 @@ static void
> kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
> phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
>
> - stage2_wp_range(kvm, start, end);
> + stage2_wp_range(kvm, start, end, true);
> }
>
> /*
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Powered by blists - more mailing lists