[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <814f7ef7-48a2-a025-dcf3-5c4719cff811@quicinc.com>
Date: Thu, 27 Jul 2023 10:28:58 -0700
From: Elliot Berman <quic_eberman@...cinc.com>
To: Yi-De Wu <yi-de.wu@...iatek.com>,
Yingshiuan Pan <yingshiuan.pan@...iatek.com>,
Ze-Yu Wang <ze-yu.wang@...iatek.com>,
"Rob Herring" <robh+dt@...nel.org>,
Krzysztof Kozlowski <krzysztof.kozlowski+dt@...aro.org>,
Conor Dooley <conor+dt@...nel.org>,
Jonathan Corbet <corbet@....net>,
Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>, Arnd Bergmann <arnd@...db.de>,
"Matthias Brugger" <matthias.bgg@...il.com>,
AngeloGioacchino Del Regno
<angelogioacchino.delregno@...labora.com>
CC: <devicetree@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<linux-doc@...r.kernel.org>,
<linux-arm-kernel@...ts.infradead.org>,
<linux-arch@...r.kernel.org>, <linux-mediatek@...ts.infradead.org>,
"David Bradil" <dbrazdil@...gle.com>,
Trilok Soni <quic_tsoni@...cinc.com>,
"Ivan Tseng" <ivan.tseng@...iatek.com>,
Jade Shih <jades.shih@...iatek.com>,
"My Chuang" <my.chuang@...iatek.com>,
Shawn Hsiao <shawn.hsiao@...iatek.com>,
PeiLun Suei <peilun.suei@...iatek.com>,
Liju Chen <liju-clr.chen@...iatek.com>,
Willix Yeh <chi-shen.yeh@...iatek.com>
Subject: Re: [PATCH v5 12/12] virt: geniezone: Add memory pin/unpin support
Hi Jerry,
On 7/27/2023 1:00 AM, Yi-De Wu wrote:
> From: "Jerry Wang" <ze-yu.wang@...iatek.com>
>
> Implement memory pin/unpin.
> - use rb_tree to store pinned memory page
> - pin page when handling page fault
> - unpin page when handling relinquish or destroy
>
> Signed-off-by: Jerry Wang <ze-yu.wang@...iatek.com>
> Signed-off-by: Liju Chen <liju-clr.chen@...iatek.com>
> Signed-off-by: Yi-De Wu <yi-de.wu@...iatek.com>
> ---
> drivers/virt/geniezone/Makefile | 3 +-
> drivers/virt/geniezone/gzvm_exception.c | 32 ----
> drivers/virt/geniezone/gzvm_hvc.c | 34 ++++
> drivers/virt/geniezone/gzvm_mmu.c | 210 ++++++++++++++++++++++++
> drivers/virt/geniezone/gzvm_vcpu.c | 4 +-
> drivers/virt/geniezone/gzvm_vm.c | 113 +++----------
> include/linux/gzvm_drv.h | 19 ++-
> include/uapi/linux/gzvm.h | 5 +
> 8 files changed, 291 insertions(+), 129 deletions(-)
> create mode 100644 drivers/virt/geniezone/gzvm_hvc.c
> create mode 100644 drivers/virt/geniezone/gzvm_mmu.c
>
> diff --git a/drivers/virt/geniezone/Makefile b/drivers/virt/geniezone/Makefile
> index e1299f99df76..ca4b84630573 100644
> --- a/drivers/virt/geniezone/Makefile
> +++ b/drivers/virt/geniezone/Makefile
> @@ -8,4 +8,5 @@ GZVM_DIR ?= ../../../drivers/virt/geniezone
>
> gzvm-y := $(GZVM_DIR)/gzvm_main.o $(GZVM_DIR)/gzvm_vm.o \
> $(GZVM_DIR)/gzvm_vcpu.o $(GZVM_DIR)/gzvm_irqfd.o \
> - $(GZVM_DIR)/gzvm_ioeventfd.o $(GZVM_DIR)/gzvm_exception.o
> + $(GZVM_DIR)/gzvm_ioeventfd.o $(GZVM_DIR)/gzvm_exception.o \
> + $(GZVM_DIR)/gzvm_hvc.o $(GZVM_DIR)/gzvm_mmu.o
> diff --git a/drivers/virt/geniezone/gzvm_exception.c b/drivers/virt/geniezone/gzvm_exception.c
> index c2cab1472d2f..8c413815369b 100644
> --- a/drivers/virt/geniezone/gzvm_exception.c
> +++ b/drivers/virt/geniezone/gzvm_exception.c
> @@ -6,38 +6,6 @@
> #include <linux/device.h>
> #include <linux/gzvm_drv.h>
>
> -/**
> - * gzvm_handle_page_fault() - Handle guest page fault, find corresponding page
> - * for the faulting gpa
> - * @vcpu: Pointer to struct gzvm_vcpu_run of the faulting vcpu
> - *
> - * Return:
> - * * 0 - Success to handle guest page fault
> - * * -EFAULT - Failed to map phys addr to guest's GPA
> - */
> -static int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
> -{
> - struct gzvm *vm = vcpu->gzvm;
> - int memslot_id;
> - u64 pfn, gfn;
> - int ret;
> -
> - gfn = PHYS_PFN(vcpu->run->exception.fault_gpa);
> - memslot_id = gzvm_find_memslot(vm, gfn);
> - if (unlikely(memslot_id < 0))
> - return -EFAULT;
> -
> - ret = gzvm_gfn_to_pfn_memslot(&vm->memslot[memslot_id], gfn, &pfn);
> - if (unlikely(ret))
> - return -EFAULT;
> -
> - ret = gzvm_arch_map_guest(vm->vm_id, memslot_id, pfn, gfn, 1);
> - if (unlikely(ret))
> - return -EFAULT;
> -
> - return 0;
> -}
> -
> /**
> * gzvm_handle_guest_exception() - Handle guest exception
> * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
> diff --git a/drivers/virt/geniezone/gzvm_hvc.c b/drivers/virt/geniezone/gzvm_hvc.c
> new file mode 100644
> index 000000000000..d8854eeec04e
> --- /dev/null
> +++ b/drivers/virt/geniezone/gzvm_hvc.c
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2023 MediaTek Inc.
> + */
> +
> +#include <linux/gzvm_drv.h>
> +
> +/**
> + * gzvm_handle_guest_hvc() - Handle guest hvc
> + * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
> + * Return:
> + * * true - This hvc has been processed, no need to back to VMM.
> + * * false - This hvc has not been processed, require userspace.
> + */
> +bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu)
> +{
> + int ret;
> + unsigned long ipa;
> +
> + switch (vcpu->run->hypercall.args[0]) {
> + case GZVM_HVC_MEM_RELINQUISH:
> + ipa = vcpu->run->hypercall.args[1];
> + ret = gzvm_handle_relinquish(vcpu, ipa);
> + break;
> + default:
> + ret = false;
> + break;
> + }
> +
> + if (!ret)
> + return true;
> + else
> + return false;
> +}
> diff --git a/drivers/virt/geniezone/gzvm_mmu.c b/drivers/virt/geniezone/gzvm_mmu.c
> new file mode 100644
> index 000000000000..4c2c7da7f929
> --- /dev/null
> +++ b/drivers/virt/geniezone/gzvm_mmu.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2023 MediaTek Inc.
> + */
> +
> +#include <linux/gzvm_drv.h>
> +
> +/**
> + * hva_to_pa_fast() - converts hva to pa in generic fast way
> + * @hva: Host virtual address.
> + *
> + * Return: 0 if translation error
> + */
> +u64 hva_to_pa_fast(u64 hva)
> +{
> + struct page *page[1];
> +
> + u64 pfn;
> +
> + if (get_user_page_fast_only(hva, 0, page)) {
> + pfn = page_to_phys(page[0]);
> + put_page((struct page *)page);
> + return pfn;
> + } else {
> + return 0;
> + }
> +}
> +
> +/**
> + * hva_to_pa_slow() - note that this function may sleep
> + * @hva: Host virtual address.
> + *
> + * Return: 0 if translation error
> + */
> +u64 hva_to_pa_slow(u64 hva)
> +{
> + struct page *page = NULL;
> + u64 pfn = 0;
> + int npages;
> +
> + npages = get_user_pages_unlocked(hva, 1, &page, 0);
> + if (npages != 1)
> + return 0;
> +
> + if (page) {
> + pfn = page_to_phys(page);
> + put_page(page);
> + }
> +
> + return pfn;
> +}
> +
> +static u64 __gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn)
> +{
> + u64 hva, pa;
> +
> + hva = gzvm_gfn_to_hva_memslot(memslot, gfn);
> +
> + pa = gzvm_hva_to_pa_arch(hva);
> + if (pa != 0)
> + return PHYS_PFN(pa);
> +
> + pa = hva_to_pa_fast(hva);
> + if (pa)
> + return PHYS_PFN(pa);
> +
> + pa = hva_to_pa_slow(hva);
> + if (pa)
> + return PHYS_PFN(pa);
> +
> + return 0;
> +}
> +
> +/**
> + * gzvm_gfn_to_pfn_memslot() - Translate gfn (guest ipa) to pfn (host pa),
> + * result is in @pfn
> + * @memslot: Pointer to struct gzvm_memslot.
> + * @gfn: Guest frame number.
> + * @pfn: Host page frame number.
> + *
> + * Return:
> + * * 0 - Succeed
> + * * -EFAULT - Failed to convert
> + */
> +int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn)
> +{
> + u64 __pfn;
> +
> + if (!memslot)
> + return -EFAULT;
> +
> + __pfn = __gzvm_gfn_to_pfn_memslot(memslot, gfn);
> + if (__pfn == 0) {
> + *pfn = 0;
> + return -EFAULT;
> + }
> +
> + *pfn = __pfn;
> +
> + return 0;
> +}
> +
> +static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
> +{
> + struct gzvm_pinned_page *a = container_of(node, struct gzvm_pinned_page, node);
> + struct gzvm_pinned_page *b = container_of(parent, struct gzvm_pinned_page, node);
> +
> + if (a->ipa < b->ipa)
> + return -1;
> + if (a->ipa > b->ipa)
> + return 1;
> + return 0;
> +}
> +
> +static int rb_ppage_cmp(const void *key, const struct rb_node *node)
> +{
> + struct gzvm_pinned_page *p = container_of(node, struct gzvm_pinned_page, node);
> + phys_addr_t ipa = (phys_addr_t)key;
> +
> + return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
> +}
> +
> +static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
> +{
> + if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
> + return -EEXIST;
> + return 0;
> +}
> +
> +static int pin_one_page(unsigned long hva, struct page **page)
> +{
> + struct mm_struct *mm = current->mm;
> + unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
Recently received this feedback you might also find helpful:
https://lore.kernel.org/all/20230714121321.GB5597@willie-the-truck/
> +
> + mmap_read_lock(mm);
> + pin_user_pages(hva, 1, flags, page);
> + mmap_read_unlock(mm);
> +
> + return 0;
> +}
> +
> +/**
> + * gzvm_handle_relinquish() - Handle memory relinquish request from hypervisor
> + *
> + * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
> + * @ipa: Start address(gpa) of a reclaimed page
> + */
> +int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa)
> +{
> + struct gzvm_pinned_page *ppage;
> + struct rb_node *node;
> + struct gzvm *vm = vcpu->gzvm;
> +
> + node = rb_find((void *)ipa, &vm->pinned_pages, rb_ppage_cmp);
> +
> + if (node)
> + rb_erase(node, &vm->pinned_pages);
> + else
> + return 0;
> +
> + ppage = container_of(node, struct gzvm_pinned_page, node);
> + unpin_user_pages_dirty_lock(&ppage->page, 1, true);
> + kfree(ppage);
> + return 0;
> +}
> +
> +/**
> + * gzvm_handle_page_fault() - Handle guest page fault, find corresponding page
> + * for the faulting gpa
> + * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
> + */
> +int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
> +{
> + struct gzvm *vm = vcpu->gzvm;
> + int memslot_id;
> + u64 pfn, gfn;
> + unsigned long hva;
> + struct gzvm_pinned_page *ppage = NULL;
> + struct page *page = NULL;
> + int ret;
> +
> + gfn = PHYS_PFN(vcpu->run->exception.fault_gpa);
> + memslot_id = gzvm_find_memslot(vm, gfn);
> + if (unlikely(memslot_id < 0))
> + return -EFAULT;
> +
> + ret = gzvm_gfn_to_pfn_memslot(&vm->memslot[memslot_id], gfn, &pfn);
> + if (unlikely(ret))
> + return -EFAULT;
> +
> + ret = gzvm_arch_map_guest(vm->vm_id, memslot_id, pfn, gfn, 1);
> + if (unlikely(ret))
> + return -EFAULT;
> +
> + hva = gzvm_gfn_to_hva_memslot(&vm->memslot[memslot_id], gfn);
> + pin_one_page(hva, &page);
> +
> + if (!page)
> + return -EFAULT;
> +
> + ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
> + if (!ppage)
> + return -ENOMEM;
> +
> + ppage->page = page;
> + ppage->ipa = vcpu->run->exception.fault_gpa;
> + gzvm_insert_ppage(vm, ppage);
> +
> + return 0;
> +}
> diff --git a/drivers/virt/geniezone/gzvm_vcpu.c b/drivers/virt/geniezone/gzvm_vcpu.c
> index 794b24da40e2..964e18ff0d2a 100644
> --- a/drivers/virt/geniezone/gzvm_vcpu.c
> +++ b/drivers/virt/geniezone/gzvm_vcpu.c
> @@ -117,7 +117,9 @@ static long gzvm_vcpu_run(struct gzvm_vcpu *vcpu, void * __user argp)
> need_userspace = true;
> break;
> case GZVM_EXIT_HYPERCALL:
> - fallthrough;
> + if (!gzvm_handle_guest_hvc(vcpu))
> + need_userspace = true;
> + break;
> case GZVM_EXIT_DEBUG:
> fallthrough;
> case GZVM_EXIT_FAIL_ENTRY:
> diff --git a/drivers/virt/geniezone/gzvm_vm.c b/drivers/virt/geniezone/gzvm_vm.c
> index 3da5fdc141b6..6fae6bf976c8 100644
> --- a/drivers/virt/geniezone/gzvm_vm.c
> +++ b/drivers/virt/geniezone/gzvm_vm.c
> @@ -16,106 +16,13 @@
> static DEFINE_MUTEX(gzvm_list_lock);
> static LIST_HEAD(gzvm_list);
>
> -/**
> - * hva_to_pa_fast() - converts hva to pa in generic fast way
> - * @hva: Host virtual address.
> - *
> - * Return: 0 if translation error
> - */
> -static u64 hva_to_pa_fast(u64 hva)
> -{
> - struct page *page[1];
> -
> - u64 pfn;
> -
> - if (get_user_page_fast_only(hva, 0, page)) {
> - pfn = page_to_phys(page[0]);
> - put_page((struct page *)page);
> - return pfn;
> - } else {
> - return 0;
> - }
> -}
> -
> -/**
> - * hva_to_pa_slow() - note that this function may sleep
> - * @hva: Host virtual address.
> - *
> - * Return: 0 if translation error
> - */
> -static u64 hva_to_pa_slow(u64 hva)
> -{
> - struct page *page;
> - int npages;
> - u64 pfn;
> -
> - npages = get_user_pages_unlocked(hva, 1, &page, 0);
> - if (npages != 1)
> - return 0;
> -
> - pfn = page_to_phys(page);
> - put_page(page);
> -
> - return pfn;
> -}
> -
> -static u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn)
> +u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn)
> {
> u64 offset = gfn - memslot->base_gfn;
>
> return memslot->userspace_addr + offset * PAGE_SIZE;
> }
>
> -static u64 __gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn)
> -{
> - u64 hva, pa;
> -
> - hva = gzvm_gfn_to_hva_memslot(memslot, gfn);
> -
> - pa = gzvm_hva_to_pa_arch(hva);
> - if (pa != 0)
> - return PHYS_PFN(pa);
> -
> - pa = hva_to_pa_fast(hva);
> - if (pa)
> - return PHYS_PFN(pa);
> -
> - pa = hva_to_pa_slow(hva);
> - if (pa)
> - return PHYS_PFN(pa);
> -
> - return 0;
> -}
> -
> -/**
> - * gzvm_gfn_to_pfn_memslot() - Translate gfn (guest ipa) to pfn (host pa),
> - * result is in @pfn
> - * @memslot: Pointer to struct gzvm_memslot.
> - * @gfn: Guest frame number.
> - * @pfn: Host page frame number.
> - *
> - * Return:
> - * * 0 - Succeed
> - * * -EFAULT - Failed to convert
> - */
> -int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn)
> -{
> - u64 __pfn;
> -
> - if (!memslot)
> - return -EFAULT;
> -
> - __pfn = __gzvm_gfn_to_pfn_memslot(memslot, gfn);
> - if (__pfn == 0) {
> - *pfn = 0;
> - return -EFAULT;
> - }
> -
> - *pfn = __pfn;
> -
> - return 0;
> -}
> -
> /**
> * gzvm_find_memslot() - Find memslot containing this @gpa
> * @vm: Pointer to struct gzvm
> @@ -454,6 +361,21 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
> return ret;
> }
>
> +static void gzvm_destroy_ppage(struct gzvm *gzvm)
> +{
> + struct gzvm_pinned_page *ppage;
> + struct rb_node *node;
> +
> + node = rb_first(&gzvm->pinned_pages);
> + while (node) {
> + ppage = rb_entry(node, struct gzvm_pinned_page, node);
> + unpin_user_pages_dirty_lock(&ppage->page, 1, true);
> + node = rb_next(node);
> + rb_erase(&ppage->node, &gzvm->pinned_pages);
> + kfree(ppage);
> + }
> +}
> +
> static void gzvm_destroy_vm(struct gzvm *gzvm)
> {
> pr_debug("VM-%u is going to be destroyed\n", gzvm->vm_id);
> @@ -470,6 +392,8 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)
>
> mutex_unlock(&gzvm->lock);
>
> + gzvm_destroy_ppage(gzvm);
> +
> kfree(gzvm);
> }
>
> @@ -505,6 +429,7 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
> gzvm->vm_id = ret;
> gzvm->mm = current->mm;
> mutex_init(&gzvm->lock);
> + gzvm->pinned_pages = RB_ROOT;
>
> ret = gzvm_vm_irqfd_init(gzvm);
> if (ret) {
> diff --git a/include/linux/gzvm_drv.h b/include/linux/gzvm_drv.h
> index d7838679c700..a0bc3b4244cc 100644
> --- a/include/linux/gzvm_drv.h
> +++ b/include/linux/gzvm_drv.h
> @@ -12,6 +12,8 @@
> #include <linux/mutex.h>
> #include <linux/gzvm.h>
> #include <linux/srcu.h>
> +#include <linux/rbtree.h>
> +#include <linux/mm.h>
>
> #define GZVM_VCPU_MMAP_SIZE PAGE_SIZE
> #define INVALID_VM_ID 0xffff
> @@ -76,6 +78,12 @@ struct gzvm_vcpu {
> struct hrtimer gzvm_mtimer;
> };
>
> +struct gzvm_pinned_page {
> + struct rb_node node;
> + struct page *page;
> + u64 ipa;
> +};
> +
> struct gzvm {
> struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
> /* userspace tied to this vm */
> @@ -102,6 +110,9 @@ struct gzvm {
> struct srcu_struct irq_srcu;
> /* lock for irq injection */
> struct mutex irq_lock;
> +
> + /* Use rb-tree to record pin/unpin page */
> + struct rb_root pinned_pages;
> };
>
> long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
> @@ -137,9 +148,15 @@ int gzvm_arch_inform_exit(u16 vm_id);
> int gzvm_arch_drv_init(void);
> void gzvm_arch_drv_exit(void);
>
> -int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn);
> int gzvm_find_memslot(struct gzvm *vm, u64 gpa);
> +int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn);
> +u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn);
> +u64 hva_to_pa_fast(u64 hva);
> +u64 hva_to_pa_slow(u64 hva);
> +int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu);
> +int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa);
> bool gzvm_handle_guest_exception(struct gzvm_vcpu *vcpu);
> +bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu);
>
> int gzvm_arch_create_device(u16 vm_id, struct gzvm_create_device *gzvm_dev);
> int gzvm_arch_inject_irq(struct gzvm *gzvm, unsigned int vcpu_idx,
> diff --git a/include/uapi/linux/gzvm.h b/include/uapi/linux/gzvm.h
> index a3329b713089..74b959994c4a 100644
> --- a/include/uapi/linux/gzvm.h
> +++ b/include/uapi/linux/gzvm.h
> @@ -156,6 +156,11 @@ enum {
> GZVM_EXCEPTION_PAGE_FAULT = 0x1,
> };
>
> +/* hypercall definitions of GZVM_EXIT_HYPERCALL */
> +enum {
> + GZVM_HVC_MEM_RELINQUISH = 0xc6000009,
> +};
> +
> /**
> * struct gzvm_vcpu_run: Same purpose as kvm_run, this struct is
> * shared between userspace, kernel and
Powered by blists - more mailing lists