[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240826204353.2228736-20-peterx@redhat.com>
Date: Mon, 26 Aug 2024 16:43:53 -0400
From: Peter Xu <peterx@...hat.com>
To: linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Cc: Gavin Shan <gshan@...hat.com>,
Catalin Marinas <catalin.marinas@....com>,
x86@...nel.org,
Ingo Molnar <mingo@...hat.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Paolo Bonzini <pbonzini@...hat.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Thomas Gleixner <tglx@...utronix.de>,
Alistair Popple <apopple@...dia.com>,
kvm@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org,
Sean Christopherson <seanjc@...gle.com>,
peterx@...hat.com,
Oscar Salvador <osalvador@...e.de>,
Jason Gunthorpe <jgg@...dia.com>,
Borislav Petkov <bp@...en8.de>,
Zi Yan <ziy@...dia.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
David Hildenbrand <david@...hat.com>,
Yan Zhao <yan.y.zhao@...el.com>,
Will Deacon <will@...nel.org>,
Kefeng Wang <wangkefeng.wang@...wei.com>,
Alex Williamson <alex.williamson@...hat.com>
Subject: [PATCH v2 19/19] vfio/pci: Implement huge_fault support
From: Alex Williamson <alex.williamson@...hat.com>
With the addition of pfnmap support in vmf_insert_pfn_{pmd,pud}() we
can take advantage of PMD and PUD faults to PCI BAR mmaps and create
more efficient mappings. PCI BARs are always a power of two and will
typically get at least PMD alignment without userspace even trying.
Userspace alignment for PUD mappings is also not too difficult.
Consolidate faults through a single handler with a new wrapper for
standard single page faults. The pre-faulting behavior of commit
d71a989cf5d9 ("vfio/pci: Insert full vma on mmap'd MMIO fault") is
removed in this refactoring since huge_fault will cover the bulk of
the faults and results in more efficient page table usage. We also
want to avoid that pre-faulted single page mappings preempt huge page
mappings.
Cc: kvm@...r.kernel.org
Signed-off-by: Alex Williamson <alex.williamson@...hat.com>
Signed-off-by: Peter Xu <peterx@...hat.com>
---
drivers/vfio/pci/vfio_pci_core.c | 60 +++++++++++++++++++++++---------
1 file changed, 43 insertions(+), 17 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index ba0ce0075b2f..2d7478e9a62d 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -20,6 +20,7 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/pci.h>
+#include <linux/pfn_t.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -1657,14 +1658,20 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
}
-static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+ unsigned int order)
{
struct vm_area_struct *vma = vmf->vma;
struct vfio_pci_core_device *vdev = vma->vm_private_data;
unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
- unsigned long addr = vma->vm_start;
vm_fault_t ret = VM_FAULT_SIGBUS;
+ if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
+ vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
+ ret = VM_FAULT_FALLBACK;
+ goto out;
+ }
+
pfn = vma_to_pfn(vma);
down_read(&vdev->memory_lock);
@@ -1672,30 +1679,49 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
goto out_unlock;
- ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
- if (ret & VM_FAULT_ERROR)
- goto out_unlock;
-
- /*
- * Pre-fault the remainder of the vma, abort further insertions and
- * supress error if fault is encountered during pre-fault.
- */
- for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) {
- if (addr == vmf->address)
- continue;
-
- if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
- break;
+ switch (order) {
+ case 0:
+ ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
+ break;
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+ case PMD_ORDER:
+ ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
+ PFN_DEV), false);
+ break;
+#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+ case PUD_ORDER:
+ ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
+ PFN_DEV), false);
+ break;
+#endif
+ default:
+ ret = VM_FAULT_FALLBACK;
}
out_unlock:
up_read(&vdev->memory_lock);
+out:
+ dev_dbg_ratelimited(&vdev->pdev->dev,
+ "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
+ __func__, order,
+ vma->vm_pgoff >>
+ (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
+ pgoff, (unsigned int)ret);
return ret;
}
+static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
+{
+ return vfio_pci_mmap_huge_fault(vmf, 0);
+}
+
static const struct vm_operations_struct vfio_pci_mmap_ops = {
- .fault = vfio_pci_mmap_fault,
+ .fault = vfio_pci_mmap_page_fault,
+#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
+ .huge_fault = vfio_pci_mmap_huge_fault,
+#endif
};
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
--
2.45.0
Powered by blists - more mailing lists