[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180920200358.31680-8-keith.busch@intel.com>
Date: Thu, 20 Sep 2018 14:03:58 -0600
From: Keith Busch <keith.busch@...el.com>
To: linux-kernel@...r.kernel.org
Cc: Kirill Shutemov <kirill.shutemov@...ux.intel.com>,
Dave Hansen <dave.hansen@...el.com>,
Dan Williams <dan.j.williams@...el.com>,
Keith Busch <keith.busch@...el.com>
Subject: [PATCHv2 7/7] mm/gup: Cache dev_pagemap while pinning pages
Pinning pages from ZONE_DEVICE memory needs to check the backing device's
live-ness, which is tracked in the device's dev_pagemap metadata. This
metadata is stored in a radix tree and looking it up adds measurable
software overhead.
This patch avoids repeating this relatively costly operation when
dev_pagemap is used by caching the last dev_pagemap while getting user
pages. The gup_benchmark reports this reduces the time to get user pages
to as low as 1/3 of the previous time.
Cc: Kirill Shutemov <kirill.shutemov@...ux.intel.com>
Cc: Dave Hansen <dave.hansen@...el.com>
Cc: Dan Williams <dan.j.williams@...el.com>
Signed-off-by: Keith Busch <keith.busch@...el.com>
---
include/linux/mm.h | 8 +++++++-
mm/gup.c | 41 ++++++++++++++++++++++++-----------------
mm/huge_memory.c | 35 +++++++++++++++--------------------
3 files changed, 46 insertions(+), 38 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1fd241c9071..d688e18a19c4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -380,6 +380,7 @@ struct vm_fault {
struct follow_page_context {
struct vm_area_struct *vma;
+ struct dev_pagemap *pgmap;
unsigned long address;
unsigned int page_mask;
unsigned int flags;
@@ -2546,14 +2547,19 @@ struct page *follow_page_mask(struct follow_page_context *ctx);
static inline struct page *follow_page(struct vm_area_struct *vma,
unsigned long address, unsigned int foll_flags)
{
+ struct page *page;
struct follow_page_context ctx = {
.vma = vma,
+ .pgmap = NULL,
.address = address,
.page_mask = 0,
.flags = foll_flags,
};
- return follow_page_mask(&ctx);
+ page = follow_page_mask(&ctx);
+ if (ctx.pgmap)
+ put_dev_pagemap(ctx.pgmap);
+ return page;
}
#define FOLL_WRITE 0x01 /* check pte is writable */
diff --git a/mm/gup.c b/mm/gup.c
index 4c4da54f8dbe..c98ea05eaa59 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -72,7 +72,6 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
{
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap = NULL;
struct page *page;
spinlock_t *ptl;
pte_t *ptep, pte;
@@ -114,8 +113,8 @@ static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
* Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference.
*/
- pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
- if (pgmap)
+ ctx->pgmap = get_dev_pagemap(pte_pfn(pte), ctx->pgmap);
+ if (ctx->pgmap)
page = pte_page(pte);
else
goto no_page;
@@ -154,9 +153,9 @@ static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
get_page(page);
/* drop the pgmap reference now that we hold the page */
- if (pgmap) {
- put_dev_pagemap(pgmap);
- pgmap = NULL;
+ if (ctx->pgmap) {
+ put_dev_pagemap(ctx->pgmap);
+ ctx->pgmap = NULL;
}
}
if (ctx->flags & FOLL_TOUCH) {
@@ -645,7 +644,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
- long i = 0;
+ long ret = 0, i = 0;
struct vm_area_struct *vma = NULL;
struct follow_page_context ctx = {};
@@ -681,8 +680,10 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
goto next_page;
}
- if (!vma || check_vma_flags(vma, gup_flags))
- return i ? : -EFAULT;
+ if (!vma || check_vma_flags(vma, gup_flags)) {
+ ret = -EFAULT;
+ goto out;
+ }
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
@@ -697,23 +698,25 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory.
*/
- if (unlikely(fatal_signal_pending(current)))
- return i ? i : -ERESTARTSYS;
+ if (unlikely(fatal_signal_pending(current))) {
+ ret = -ERESTARTSYS;
+ goto out;
+ }
cond_resched();
page = follow_page_mask(&ctx);
if (!page) {
- int ret;
ret = faultin_page(tsk, &ctx, nonblocking);
switch (ret) {
case 0:
goto retry;
+ case -EBUSY:
+ ret = 0;
+ /* FALLTHRU */
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
- return i ? i : ret;
- case -EBUSY:
- return i;
+ goto out;
case -ENOENT:
goto next_page;
}
@@ -725,7 +728,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
*/
goto next_page;
} else if (IS_ERR(page)) {
- return i ? i : PTR_ERR(page);
+ ret = PTR_ERR(page);
+ goto out;
}
if (pages) {
pages[i] = page;
@@ -745,7 +749,10 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
- return i;
+out:
+ if (ctx.pgmap)
+ put_dev_pagemap(ctx.pgmap);
+ return i ? i : ret;
}
static bool vma_permits_fault(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index abd36e6afe2c..6787011385ce 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -851,12 +851,23 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pmd(vma, addr, pmd);
}
+static struct page *pagemap_page(struct follow_page_context *ctx,
+ unsigned long pfn)
+{
+ struct page *page;
+
+ ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
+ if (!ctx->pgmap)
+ return ERR_PTR(-EFAULT);
+ page = pfn_to_page(pfn);
+ get_page(page);
+ return page;
+}
+
struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
{
unsigned long pfn = pmd_pfn(*pmd);
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap;
- struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
@@ -885,14 +896,7 @@ struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
return ERR_PTR(-EEXIST);
pfn += (ctx->address & ~PMD_MASK) >> PAGE_SHIFT;
- pgmap = get_dev_pagemap(pfn, NULL);
- if (!pgmap)
- return ERR_PTR(-EFAULT);
- page = pfn_to_page(pfn);
- get_page(page);
- put_dev_pagemap(pgmap);
-
- return page;
+ return pagemap_page(ctx, pfn);
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1002,8 +1006,6 @@ struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud)
{
unsigned long pfn = pud_pfn(*pud);
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap;
- struct page *page;
assert_spin_locked(pud_lockptr(mm, pud));
@@ -1026,14 +1028,7 @@ struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud)
return ERR_PTR(-EEXIST);
pfn += (ctx->address & ~PUD_MASK) >> PAGE_SHIFT;
- pgmap = get_dev_pagemap(pfn, NULL);
- if (!pgmap)
- return ERR_PTR(-EFAULT);
- page = pfn_to_page(pfn);
- get_page(page);
- put_dev_pagemap(pgmap);
-
- return page;
+ return pagemap_page(ctx, pfn);
}
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
--
2.14.4
Powered by blists - more mailing lists