[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAAhSdy1u0MkWqreL-fDYZq3KvpAVFPefK0osJ7tai1oXGSyw7w@mail.gmail.com>
Date: Fri, 7 Nov 2025 16:33:38 +0530
From: Anup Patel <anup@...infault.org>
To: liu.xuemei1@....com.cn
Cc: atish.patra@...ux.dev, paul.walmsley@...ive.com, palmer@...belt.com,
aou@...s.berkeley.edu, alex@...ti.fr, kvm@...r.kernel.org,
kvm-riscv@...ts.infradead.org, linux-riscv@...ts.infradead.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] RISC-V: KVM: Transparent huge page support
On Tue, Sep 30, 2025 at 12:51 PM <liu.xuemei1@....com.cn> wrote:
>
> From: Jessica Liu <liu.xuemei1@....com.cn>
>
> Use block mapping if backed by a THP, as implemented in architectures
> like ARM and x86_64.
>
> Signed-off-by: Jessica Liu <liu.xuemei1@....com.cn>
> ---
> Changes in v2:
> - Fixed the typo of writing PAGE_SHIFT as PAGE_SIZE.
>
> arch/riscv/include/asm/kvm_gstage.h | 3 +
> arch/riscv/kvm/gstage.c | 100 ++++++++++++++++++++++++++++
> arch/riscv/kvm/mmu.c | 12 +++-
> 3 files changed, 114 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
> index 595e2183173e..cc67fb2d2d42 100644
> --- a/arch/riscv/include/asm/kvm_gstage.h
> +++ b/arch/riscv/include/asm/kvm_gstage.h
> @@ -69,4 +69,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
>
> void kvm_riscv_gstage_mode_detect(void);
>
> +long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
> + unsigned long hva, kvm_pfn_t *pfnp, gpa_t *gpa);
> +
> #endif
> diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
> index 24c270d6d0e2..129dee62c570 100644
> --- a/arch/riscv/kvm/gstage.c
> +++ b/arch/riscv/kvm/gstage.c
> @@ -77,6 +77,106 @@ static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
> return 0;
> }
>
> +static int gstage_get_user_mapping_size(struct kvm *kvm, u64 addr)
> +{
> + pte_t *ptepp;
> + u32 ptep_level;
> + unsigned long out_pgsize;
> + struct kvm_gstage gstage = {
> + .pgd = kvm->mm->pgd
> + };
> +
> + if (!kvm_riscv_gstage_get_leaf(&gstage, addr, &ptepp, &ptep_level))
> + return -EFAULT;
> +
> + if (gstage_level_to_page_size(ptep_level, &out_pgsize))
> + return -EFAULT;
> +
> + return out_pgsize;
> +}
> +
> +static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva)
> +{
> + gpa_t gpa_start;
> + hva_t uaddr_start, uaddr_end;
> + size_t size;
> +
> + size = memslot->npages * PAGE_SIZE;
> + uaddr_start = memslot->userspace_addr;
> + uaddr_end = uaddr_start + size;
> +
> + gpa_start = memslot->base_gfn << PAGE_SHIFT;
> +
> + /*
> + * Pages belonging to memslots that don't have the same alignment
> + * within a PMD for userspace and GPA cannot be mapped with g-stage
> + * PMD entries, because we'll end up mapping the wrong pages.
> + *
> + * Consider a layout like the following:
> + *
> + * memslot->userspace_addr:
> + * +-----+--------------------+--------------------+---+
> + * |abcde|fgh vs-stage block | vs-stage block tv|xyz|
> + * +-----+--------------------+--------------------+---+
> + *
> + * memslot->base_gfn << PAGE_SHIFT:
> + * +---+--------------------+--------------------+-----+
> + * |abc|def g-stage block | g-stage block |tvxyz|
> + * +---+--------------------+--------------------+-----+
> + *
> + * If we create those g-stage blocks, we'll end up with this incorrect
> + * mapping:
> + * d -> f
> + * e -> g
> + * f -> h
> + */
> + if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
> + return false;
> +
> + /*
> + * Next, let's make sure we're not trying to map anything not covered
> + * by the memslot. This means we have to prohibit block size mappings
> + * for the beginning and end of a non-block aligned and non-block sized
> + * memory slot (illustrated by the head and tail parts of the
> + * userspace view above containing pages 'abcde' and 'xyz',
> + * respectively).
> + *
> + * Note that it doesn't matter if we do the check using the
> + * userspace_addr or the base_gfn, as both are equally aligned (per
> + * the check above) and equally sized.
> + */
> + return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
> +}
> +
> +long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
> + unsigned long hva, kvm_pfn_t *hfnp, gpa_t *gpa)
> +{
> + kvm_pfn_t hfn = *hfnp;
> +
> + /*
> + * Make sure the adjustment is done only for THP pages. Also make
> + * sure that the HVA and GPA are sufficiently aligned and that the
> + * block map is contained within the memslot.
> + */
> + if (gstage_supports_huge_mapping(memslot, hva)) {
> + int sz = gstage_get_user_mapping_size(kvm, hva);
> +
> + if (sz < 0)
> + return sz;
> +
> + if (sz < PMD_SIZE)
> + return PAGE_SIZE;
> +
> + *gpa &= PMD_MASK;
> + hfn &= ~(PTRS_PER_PMD - 1);
> + *hfnp = hfn;
> +
> + return PMD_SIZE;
> + }
> +
> + return PAGE_SIZE;
> +}
> +
The gstage.c is for common page table management which will be
shared by nested virtualization and pKVM. whereas mmu.c is for
host/hypervisor mappings.
All above functions except gstage_get_user_mapping_size() must
be moved to mmu.c.
Also, change prototype of gstage_get_user_mapping_size() to
int kvm_riscv_gstage_get_mapping_size(struct kvm_gstage *gstage, gpa_t addr);
> bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
> pte_t **ptepp, u32 *ptep_level)
> {
> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
> index 525fb5a330c0..f70cf721ebb8 100644
> --- a/arch/riscv/kvm/mmu.c
> +++ b/arch/riscv/kvm/mmu.c
> @@ -337,7 +337,8 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
> struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
> bool logging = (memslot->dirty_bitmap &&
> !(memslot->flags & KVM_MEM_READONLY)) ? true : false;
> - unsigned long vma_pagesize, mmu_seq;
> + unsigned long mmu_seq;
> + long vma_pagesize;
> struct kvm_gstage gstage;
> struct page *page;
>
> @@ -416,6 +417,15 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
> if (mmu_invalidate_retry(kvm, mmu_seq))
> goto out_unlock;
>
> + /* check if we are backed by a THP and thus use block mapping if possible */
> + if (vma_pagesize == PAGE_SIZE) {
> + vma_pagesize = kvm_riscv_gstage_thp_adjust(kvm, memslot, hva, &hfn, &gpa);
> + if (vma_pagesize < 0) {
> + ret = vma_pagesize;
> + goto out_unlock;
> + }
> + }
> +
> if (writable) {
> mark_page_dirty_in_slot(kvm, memslot, gfn);
> ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
> --
> 2.27.0
>
Regards,
Anup
Powered by blists - more mailing lists