[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <b75280ca-433b-22da-63df-6917c1147c4f@loongson.cn>
Date: Thu, 14 Dec 2023 11:54:20 +0800
From: maobibo <maobibo@...ngson.cn>
To: Tianrui Zhao <zhaotianrui@...ngson.cn>,
Huacai Chen <chenhuacai@...nel.org>
Cc: WANG Xuerui <kernel@...0n.name>, kvm@...r.kernel.org,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] LoongArch: KVM: Optimization for memslot hugepage
checking
slightly ping.... :)
On 2023/11/27 上午9:44, Bibo Mao wrote:
> During shadow mmu page fault, there is checking for huge page for
> specified memslot. Page fault is hot path, check logic can be done
> when memslot is created. Here two flags are added for huge page
> checking, KVM_MEM_HUGEPAGE_CAPABLE and KVM_MEM_HUGEPAGE_INCAPABLE.
> Instead for optimization qemu, memslot for dram is always huge page
> aligned. The flag is firstly checked during hot page fault path.
>
> Now only huge page flag is supported, there is a long way for super
> page support in LoongArch system. Since super page size is 64G for
> 16K pagesize and 1G for 4K pagesize, 64G physical address is rarely
> used and LoongArch kernel needs support super page for 4K. Also memory
> layout of LoongArch qemu VM should be 1G aligned.
>
> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
> ---
> arch/loongarch/include/asm/kvm_host.h | 3 +
> arch/loongarch/kvm/mmu.c | 127 +++++++++++++++++---------
> 2 files changed, 89 insertions(+), 41 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index 11328700d4fa..0e89db020481 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -45,7 +45,10 @@ struct kvm_vcpu_stat {
> u64 signal_exits;
> };
>
> +#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
> +#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1)
> struct kvm_arch_memory_slot {
> + unsigned long flags;
> };
>
> struct kvm_context {
> diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
> index 80480df5f550..6845733f37dc 100644
> --- a/arch/loongarch/kvm/mmu.c
> +++ b/arch/loongarch/kvm/mmu.c
> @@ -13,6 +13,16 @@
> #include <asm/tlb.h>
> #include <asm/kvm_mmu.h>
>
> +static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot)
> +{
> + return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE;
> +}
> +
> +static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot)
> +{
> + return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE;
> +}
> +
> static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
> {
> ctx->level = kvm->arch.root_level;
> @@ -365,6 +375,71 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
> kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
> }
>
> +int kvm_arch_prepare_memory_region(struct kvm *kvm,
> + const struct kvm_memory_slot *old,
> + struct kvm_memory_slot *new,
> + enum kvm_mr_change change)
> +{
> + size_t size, gpa_offset, hva_offset;
> + gpa_t gpa_start;
> + hva_t hva_start;
> +
> + if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE))
> + return 0;
> + /*
> + * Prevent userspace from creating a memory region outside of the
> + * VM GPA address space
> + */
> + if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT))
> + return -ENOMEM;
> +
> + size = new->npages * PAGE_SIZE;
> + gpa_start = new->base_gfn << PAGE_SHIFT;
> + hva_start = new->userspace_addr;
> + new->arch.flags = 0;
> + if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE)
> + && IS_ALIGNED(hva_start, PMD_SIZE))
> + new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE;
> + else {
> + /*
> + * Pages belonging to memslots that don't have the same
> + * alignment within a PMD for userspace and GPA cannot be
> + * mapped with PMD entries, because we'll end up mapping
> + * the wrong pages.
> + *
> + * Consider a layout like the following:
> + *
> + * memslot->userspace_addr:
> + * +-----+--------------------+--------------------+---+
> + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
> + * +-----+--------------------+--------------------+---+
> + *
> + * memslot->base_gfn << PAGE_SIZE:
> + * +---+--------------------+--------------------+-----+
> + * |abc|def Stage-2 block | Stage-2 block |tvxyz|
> + * +---+--------------------+--------------------+-----+
> + *
> + * If we create those stage-2 blocks, we'll end up with this
> + * incorrect mapping:
> + * d -> f
> + * e -> g
> + * f -> h
> + */
> + gpa_offset = gpa_start & (PMD_SIZE - 1);
> + hva_offset = hva_start & (PMD_SIZE - 1);
> + if (gpa_offset != hva_offset) {
> + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
> + } else {
> + if (gpa_offset == 0)
> + gpa_offset = PMD_SIZE;
> + if ((size + gpa_offset) < (PMD_SIZE * 2))
> + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
> + }
> + }
> +
> + return 0;
> +}
> +
> void kvm_arch_commit_memory_region(struct kvm *kvm,
> struct kvm_memory_slot *old,
> const struct kvm_memory_slot *new,
> @@ -562,47 +637,23 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ
> }
>
> static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
> - unsigned long hva, unsigned long map_size, bool write)
> + unsigned long hva, bool write)
> {
> - size_t size;
> - gpa_t gpa_start;
> - hva_t uaddr_start, uaddr_end;
> + hva_t start, end;
>
> /* Disable dirty logging on HugePages */
> if (kvm_slot_dirty_track_enabled(memslot) && write)
> return false;
>
> - size = memslot->npages * PAGE_SIZE;
> - gpa_start = memslot->base_gfn << PAGE_SHIFT;
> - uaddr_start = memslot->userspace_addr;
> - uaddr_end = uaddr_start + size;
> + if (kvm_hugepage_capable(memslot))
> + return true;
>
> - /*
> - * Pages belonging to memslots that don't have the same alignment
> - * within a PMD for userspace and GPA cannot be mapped with stage-2
> - * PMD entries, because we'll end up mapping the wrong pages.
> - *
> - * Consider a layout like the following:
> - *
> - * memslot->userspace_addr:
> - * +-----+--------------------+--------------------+---+
> - * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
> - * +-----+--------------------+--------------------+---+
> - *
> - * memslot->base_gfn << PAGE_SIZE:
> - * +---+--------------------+--------------------+-----+
> - * |abc|def Stage-2 block | Stage-2 block |tvxyz|
> - * +---+--------------------+--------------------+-----+
> - *
> - * If we create those stage-2 blocks, we'll end up with this incorrect
> - * mapping:
> - * d -> f
> - * e -> g
> - * f -> h
> - */
> - if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
> + if (kvm_hugepage_incapable(memslot))
> return false;
>
> + start = memslot->userspace_addr;
> + end = start + memslot->npages * PAGE_SIZE;
> +
> /*
> * Next, let's make sure we're not trying to map anything not covered
> * by the memslot. This means we have to prohibit block size mappings
> @@ -615,8 +666,8 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
> * userspace_addr or the base_gfn, as both are equally aligned (per
> * the check above) and equally sized.
> */
> - return (hva & ~(map_size - 1)) >= uaddr_start &&
> - (hva & ~(map_size - 1)) + map_size <= uaddr_end;
> + return (hva >= ALIGN(start, PMD_SIZE)) &&
> + (hva < ALIGN_DOWN(end, PMD_SIZE));
> }
>
> /*
> @@ -842,7 +893,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
>
> /* Disable dirty logging on HugePages */
> level = 0;
> - if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) {
> + if (!fault_supports_huge_mapping(memslot, hva, write)) {
> level = 0;
> } else {
> level = host_pfn_mapping_level(kvm, gfn, memslot);
> @@ -901,12 +952,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> {
> }
>
> -int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
> - struct kvm_memory_slot *new, enum kvm_mr_change change)
> -{
> - return 0;
> -}
> -
> void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
> const struct kvm_memory_slot *memslot)
> {
>
Powered by blists - more mailing lists