[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <ed7bd51468d10fcbd4c0110ad1b81a2b07ebd880.1607332046.git.yuleixzhang@tencent.com>
Date: Mon, 7 Dec 2020 19:31:17 +0800
From: yulei.kernel@...il.com
To: linux-mm@...ck.org, akpm@...ux-foundation.org,
linux-fsdevel@...r.kernel.org, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org, naoya.horiguchi@....com,
viro@...iv.linux.org.uk, pbonzini@...hat.com
Cc: joao.m.martins@...cle.com, rdunlap@...radead.org,
sean.j.christopherson@...el.com, xiaoguangrong.eric@...il.com,
kernellwp@...il.com, lihaiwei.kernel@...il.com,
Yulei Zhang <yuleixzhang@...cent.com>,
Chen Zhuo <sagazchen@...cent.com>
Subject: [RFC V2 24/37] dmemfs: support hugepage for dmemfs
From: Yulei Zhang <yuleixzhang@...cent.com>
It add hugepage support for dmemfs. We use PFN_DMEM to notify
vmf_insert_pfn_pmd, and dmem huge pmd will be marked with
_PAGE_SPECIAL and _PAGE_DMEM. So that GUP-fast can separate
dmemfs page from other page type and handle it correctly.
Signed-off-by: Chen Zhuo <sagazchen@...cent.com>
Signed-off-by: Yulei Zhang <yuleixzhang@...cent.com>
---
fs/dmemfs/inode.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 111 insertions(+), 2 deletions(-)
diff --git a/fs/dmemfs/inode.c b/fs/dmemfs/inode.c
index b165bd3..17a518c 100644
--- a/fs/dmemfs/inode.c
+++ b/fs/dmemfs/inode.c
@@ -457,7 +457,7 @@ static int dmemfs_split(struct vm_area_struct *vma, unsigned long addr)
return 0;
}
-static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
+static vm_fault_t __dmemfs_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
@@ -485,6 +485,63 @@ static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
return ret;
}
+static vm_fault_t __dmemfs_pmd_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ unsigned long page_addr;
+ struct inode *inode = file_inode(vma->vm_file);
+ void *entry;
+ phys_addr_t phys;
+ pfn_t pfn;
+ int ret;
+
+ if (dmem_page_size(inode) < PMD_SIZE)
+ return VM_FAULT_FALLBACK;
+
+ WARN_ON(pmd_addr < vma->vm_start ||
+ vma->vm_end < pmd_addr + PMD_SIZE);
+
+ page_addr = vmf->address & ~(dmem_page_size(inode) - 1);
+ entry = radix_get_create_entry(vma, page_addr, inode,
+ linear_page_index(vma, page_addr));
+ if (IS_ERR(entry))
+ return (PTR_ERR(entry) == -ENOMEM) ?
+ VM_FAULT_OOM : VM_FAULT_SIGBUS;
+
+ phys = dmem_addr_to_pfn(inode, dmem_entry_to_addr(inode, entry),
+ linear_page_index(vma, pmd_addr), PMD_SHIFT);
+ phys <<= PAGE_SHIFT;
+ pfn = phys_to_pfn_t(phys, PFN_DMEM);
+ ret = vmf_insert_pfn_pmd(vmf, pfn, !!(vma->vm_flags & VM_WRITE));
+
+ radix_put_entry();
+ return ret;
+}
+
+static vm_fault_t dmemfs_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size)
+{
+ int ret;
+
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ ret = __dmemfs_fault(vmf);
+ break;
+ case PE_SIZE_PMD:
+ ret = __dmemfs_pmd_fault(vmf);
+ break;
+ default:
+ ret = VM_FAULT_SIGBUS;
+ }
+
+ return ret;
+}
+
+static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
+{
+ return dmemfs_huge_fault(vmf, PE_SIZE_PTE);
+}
+
static unsigned long dmemfs_pagesize(struct vm_area_struct *vma)
{
return dmem_page_size(file_inode(vma->vm_file));
@@ -495,6 +552,7 @@ static unsigned long dmemfs_pagesize(struct vm_area_struct *vma)
.fault = dmemfs_fault,
.pagesize = dmemfs_pagesize,
.access = dmemfs_access_dmem,
+ .huge_fault = dmemfs_huge_fault,
};
int dmemfs_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -507,15 +565,66 @@ int dmemfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_PFNMAP | VM_DMEM | VM_IO;
+ vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY | VM_DMEM | VM_IO;
+
+ if (dmem_page_size(inode) != PAGE_SIZE)
+ vma->vm_flags |= VM_HUGEPAGE;
file_accessed(file);
vma->vm_ops = &dmemfs_vm_ops;
return 0;
}
+/*
+ * If the size of area returned by mm->get_unmapped_area() is one
+ * dmem pagesize larger than 'len', the returned addr by
+ * mm->get_unmapped_area() could be aligned to dmem pagesize to
+ * meet alignment demand.
+ */
+static unsigned long
+dmemfs_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ unsigned long len_pad;
+ unsigned long off = pgoff << PAGE_SHIFT;
+ unsigned long align;
+
+ align = dmem_page_size(file_inode(file));
+
+ /* For pud or pmd pagesize, could not support fault fallback. */
+ if (len & (align - 1))
+ return -EINVAL;
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (addr & (align - 1))
+ return -EINVAL;
+ return addr;
+ }
+
+ /*
+ * Pad a extra align space for 'len', as we want to find a unmapped
+ * area which is larger enough to align with dmemfs pagesize, if
+ * pagesize of dmem is larger than 4K.
+ */
+ len_pad = (align == PAGE_SIZE) ? len : len + align;
+
+ /* 'len' or 'off' is too large for pad. */
+ if (len_pad < len || (off + len_pad) < off)
+ return -EINVAL;
+
+ addr = current->mm->get_unmapped_area(file, addr, len_pad,
+ pgoff, flags);
+
+ /* Now 'addr' could be aligned to upper boundary. */
+ return IS_ERR_VALUE(addr) ? addr : round_up(addr, align);
+}
+
static const struct file_operations dmemfs_file_operations = {
.mmap = dmemfs_file_mmap,
+ .get_unmapped_area = dmemfs_get_unmapped_area,
};
static int dmemfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
--
1.8.3.1
Powered by blists - more mailing lists