lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251220040446.274991-11-houtao@huaweicloud.com>
Date: Sat, 20 Dec 2025 12:04:43 +0800
From: Hou Tao <houtao@...weicloud.com>
To: linux-kernel@...r.kernel.org
Cc: linux-pci@...r.kernel.org,
	linux-mm@...ck.org,
	linux-nvme@...ts.infradead.org,
	Bjorn Helgaas <bhelgaas@...gle.com>,
	Logan Gunthorpe <logang@...tatee.com>,
	Alistair Popple <apopple@...dia.com>,
	Leon Romanovsky <leonro@...dia.com>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Tejun Heo <tj@...nel.org>,
	"Rafael J . Wysocki" <rafael@...nel.org>,
	Danilo Krummrich <dakr@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	David Hildenbrand <david@...nel.org>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	Keith Busch <kbusch@...nel.org>,
	Jens Axboe <axboe@...nel.dk>,
	Christoph Hellwig <hch@....de>,
	Sagi Grimberg <sagi@...mberg.me>,
	houtao1@...wei.com
Subject: [PATCH 10/13] PCI/P2PDMA: support compound page in p2pmem_alloc_mmap()

From: Hou Tao <houtao1@...wei.com>

P2PDMA memory has already supported compound page and the helpers which
support inserting compound page into vma is also ready, therefore, add
support for compound page in p2pmem_alloc_mmap() as well. It will reduce
the overhead of mmap() and get_user_pages() a lot when compound page is
enabled for p2pdma memory.

The use of vm_private_data to save the alignment of p2pdma memory needs
explanation. The normal way to get the alignment is through pci_dev. It
can be achieved by either invoking kernfs_of() and sysfs_file_kobj() or
defining a new struct kernfs_vm_ops to pass the kobject to the
may_split() and ->pagesize() callbacks. The former approach depends too
much on kernfs implementation details, and the latter would lead to
excessive churn. Therefore, choose the simpler way of saving alignment
in vm_private_data instead.

Signed-off-by: Hou Tao <houtao1@...wei.com>
---
 drivers/pci/p2pdma.c | 48 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index e97f5da73458..4a133219ac43 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -128,6 +128,25 @@ static unsigned long p2pmem_get_unmapped_area(struct file *filp, struct kobject
 	return mm_get_unmapped_area(filp, uaddr, len, pgoff, flags);
 }
 
+static int p2pmem_may_split(struct vm_area_struct *vma, unsigned long addr)
+{
+	size_t align = (uintptr_t)vma->vm_private_data;
+
+	if (!IS_ALIGNED(addr, align))
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned long p2pmem_pagesize(struct vm_area_struct *vma)
+{
+	return (uintptr_t)vma->vm_private_data;
+}
+
+static const struct vm_operations_struct p2pmem_vm_ops = {
+	.may_split = p2pmem_may_split,
+	.pagesize = p2pmem_pagesize,
+};
+
 static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		const struct bin_attribute *attr, struct vm_area_struct *vma)
 {
@@ -136,6 +155,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 	struct pci_p2pdma *p2pdma;
 	struct percpu_ref *ref;
 	unsigned long vaddr;
+	size_t align;
 	void *kaddr;
 	int ret;
 
@@ -161,6 +181,16 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		goto out;
 	}
 
+	align = p2pdma->align;
+	if (vma->vm_start & (align - 1) || vma->vm_end & (align - 1)) {
+		pci_info_ratelimited(pdev,
+				     "%s: unaligned vma (%#lx~%#lx, %#lx)\n",
+				     current->comm, vma->vm_start, vma->vm_end,
+				     align);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref);
 	if (!kaddr) {
 		ret = -ENOMEM;
@@ -178,7 +208,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 	}
 	rcu_read_unlock();
 
-	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += align) {
 		struct page *page = virt_to_page(kaddr);
 
 		/*
@@ -188,7 +218,12 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		 */
 		VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
 		set_page_count(page, 1);
-		ret = vm_insert_page(vma, vaddr, page);
+		if (align == PUD_SIZE)
+			ret = vm_insert_folio_pud(vma, vaddr, page_folio(page));
+		else if (align == PMD_SIZE)
+			ret = vm_insert_folio_pmd(vma, vaddr, page_folio(page));
+		else
+			ret = vm_insert_page(vma, vaddr, page);
 		if (ret) {
 			gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
 			percpu_ref_put(ref);
@@ -196,10 +231,15 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		}
 		percpu_ref_get(ref);
 		put_page(page);
-		kaddr += PAGE_SIZE;
-		len -= PAGE_SIZE;
+		kaddr += align;
+		len -= align;
 	}
 
+	/* Disable unaligned splitting due to vma merge */
+	vm_flags_set(vma, VM_DONTEXPAND);
+	vma->vm_ops = &p2pmem_vm_ops;
+	vma->vm_private_data = (void *)(uintptr_t)align;
+
 	percpu_ref_put(ref);
 
 	return 0;
-- 
2.29.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ