[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251220040446.274991-11-houtao@huaweicloud.com>
Date: Sat, 20 Dec 2025 12:04:43 +0800
From: Hou Tao <houtao@...weicloud.com>
To: linux-kernel@...r.kernel.org
Cc: linux-pci@...r.kernel.org,
linux-mm@...ck.org,
linux-nvme@...ts.infradead.org,
Bjorn Helgaas <bhelgaas@...gle.com>,
Logan Gunthorpe <logang@...tatee.com>,
Alistair Popple <apopple@...dia.com>,
Leon Romanovsky <leonro@...dia.com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Tejun Heo <tj@...nel.org>,
"Rafael J . Wysocki" <rafael@...nel.org>,
Danilo Krummrich <dakr@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
David Hildenbrand <david@...nel.org>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Keith Busch <kbusch@...nel.org>,
Jens Axboe <axboe@...nel.dk>,
Christoph Hellwig <hch@....de>,
Sagi Grimberg <sagi@...mberg.me>,
houtao1@...wei.com
Subject: [PATCH 10/13] PCI/P2PDMA: support compound page in p2pmem_alloc_mmap()
From: Hou Tao <houtao1@...wei.com>
P2PDMA memory has already supported compound page and the helpers which
support inserting compound page into vma is also ready, therefore, add
support for compound page in p2pmem_alloc_mmap() as well. It will reduce
the overhead of mmap() and get_user_pages() a lot when compound page is
enabled for p2pdma memory.
The use of vm_private_data to save the alignment of p2pdma memory needs
explanation. The normal way to get the alignment is through pci_dev. It
can be achieved by either invoking kernfs_of() and sysfs_file_kobj() or
defining a new struct kernfs_vm_ops to pass the kobject to the
may_split() and ->pagesize() callbacks. The former approach depends too
much on kernfs implementation details, and the latter would lead to
excessive churn. Therefore, choose the simpler way of saving alignment
in vm_private_data instead.
Signed-off-by: Hou Tao <houtao1@...wei.com>
---
drivers/pci/p2pdma.c | 48 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index e97f5da73458..4a133219ac43 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -128,6 +128,25 @@ static unsigned long p2pmem_get_unmapped_area(struct file *filp, struct kobject
return mm_get_unmapped_area(filp, uaddr, len, pgoff, flags);
}
+static int p2pmem_may_split(struct vm_area_struct *vma, unsigned long addr)
+{
+ size_t align = (uintptr_t)vma->vm_private_data;
+
+ if (!IS_ALIGNED(addr, align))
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned long p2pmem_pagesize(struct vm_area_struct *vma)
+{
+ return (uintptr_t)vma->vm_private_data;
+}
+
+static const struct vm_operations_struct p2pmem_vm_ops = {
+ .may_split = p2pmem_may_split,
+ .pagesize = p2pmem_pagesize,
+};
+
static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
const struct bin_attribute *attr, struct vm_area_struct *vma)
{
@@ -136,6 +155,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
struct pci_p2pdma *p2pdma;
struct percpu_ref *ref;
unsigned long vaddr;
+ size_t align;
void *kaddr;
int ret;
@@ -161,6 +181,16 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
goto out;
}
+ align = p2pdma->align;
+ if (vma->vm_start & (align - 1) || vma->vm_end & (align - 1)) {
+ pci_info_ratelimited(pdev,
+ "%s: unaligned vma (%#lx~%#lx, %#lx)\n",
+ current->comm, vma->vm_start, vma->vm_end,
+ align);
+ ret = -EINVAL;
+ goto out;
+ }
+
kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref);
if (!kaddr) {
ret = -ENOMEM;
@@ -178,7 +208,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
}
rcu_read_unlock();
- for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += align) {
struct page *page = virt_to_page(kaddr);
/*
@@ -188,7 +218,12 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
*/
VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
set_page_count(page, 1);
- ret = vm_insert_page(vma, vaddr, page);
+ if (align == PUD_SIZE)
+ ret = vm_insert_folio_pud(vma, vaddr, page_folio(page));
+ else if (align == PMD_SIZE)
+ ret = vm_insert_folio_pmd(vma, vaddr, page_folio(page));
+ else
+ ret = vm_insert_page(vma, vaddr, page);
if (ret) {
gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
percpu_ref_put(ref);
@@ -196,10 +231,15 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
}
percpu_ref_get(ref);
put_page(page);
- kaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
+ kaddr += align;
+ len -= align;
}
+ /* Disable unaligned splitting due to vma merge */
+ vm_flags_set(vma, VM_DONTEXPAND);
+ vma->vm_ops = &p2pmem_vm_ops;
+ vma->vm_private_data = (void *)(uintptr_t)align;
+
percpu_ref_put(ref);
return 0;
--
2.29.2
Powered by blists - more mailing lists