[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170829235447.10050-2-jglisse@redhat.com>
Date: Tue, 29 Aug 2017 19:54:35 -0400
From: Jérôme Glisse <jglisse@...hat.com>
To: linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc: Jérôme Glisse <jglisse@...hat.com>,
Dan Williams <dan.j.williams@...el.com>,
Ross Zwisler <ross.zwisler@...ux.intel.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Bernhard Held <berny156@....de>,
Adam Borowski <kilobyte@...band.pl>,
Andrea Arcangeli <aarcange@...hat.com>,
Radim Krčmář <rkrcmar@...hat.com>,
Wanpeng Li <kernellwp@...il.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Takashi Iwai <tiwai@...e.de>,
Nadav Amit <nadav.amit@...il.com>,
Mike Galbraith <efault@....de>,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
axie <axie@....com>, Andrew Morton <akpm@...ux-foundation.org>
Subject: [PATCH 01/13] dax: update to new mmu_notifier semantic
Replacing all mmu_notifier_invalida_page() by mmu_notifier_invalidat_range
and making sure it is bracketed by call to mmu_notifier_invalidate_range_start/
end.
Note that because we can not presume the pmd value or pte value we have to
assume the worse and unconditionaly report an invalidation as happening.
Signed-off-by: Jérôme Glisse <jglisse@...hat.com>
Cc: Dan Williams <dan.j.williams@...el.com>
Cc: Ross Zwisler <ross.zwisler@...ux.intel.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Bernhard Held <berny156@....de>
Cc: Adam Borowski <kilobyte@...band.pl>
Cc: Andrea Arcangeli <aarcange@...hat.com>
Cc: Radim Krčmář <rkrcmar@...hat.com>
Cc: Wanpeng Li <kernellwp@...il.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>
Cc: Takashi Iwai <tiwai@...e.de>
Cc: Nadav Amit <nadav.amit@...il.com>
Cc: Mike Galbraith <efault@....de>
Cc: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Cc: axie <axie@....com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
---
fs/dax.c | 19 +++++++++++--------
include/linux/mm.h | 1 +
mm/memory.c | 26 +++++++++++++++++++++-----
3 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 865d42c63e23..ab925dc6647a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pte_t pte, *ptep = NULL;
pmd_t *pmdp = NULL;
spinlock_t *ptl;
- bool changed;
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
- unsigned long address;
+ unsigned long address, start, end;
cond_resched();
@@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
continue;
address = pgoff_address(index, vma);
- changed = false;
- if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
+
+ /*
+ * Note because we provide start/end to follow_pte_pmd it will
+ * call mmu_notifier_invalidate_range_start() on our behalf
+ * before taking any lock.
+ */
+ if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
continue;
if (pmdp) {
@@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
- changed = true;
+ mmu_notifier_invalidate_range(vma->vm_mm, start, end);
unlock_pmd:
spin_unlock(ptl);
#endif
@@ -691,13 +695,12 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pte = pte_wrprotect(pte);
pte = pte_mkclean(pte);
set_pte_at(vma->vm_mm, address, ptep, pte);
- changed = true;
+ mmu_notifier_invalidate_range(vma->vm_mm, start, end);
unlock_pte:
pte_unmap_unlock(ptep, ptl);
}
- if (changed)
- mmu_notifier_invalidate_page(vma->vm_mm, address);
+ mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
}
i_mmap_unlock_read(mapping);
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5e8569..c1f6c95f3496 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1260,6 +1260,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ unsigned long *start, unsigned long *end,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
unsigned long *pfn);
diff --git a/mm/memory.c b/mm/memory.c
index fe2fba27ded2..56e48e4593cb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4008,7 +4008,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
#endif /* __PAGETABLE_PMD_FOLDED */
static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
- pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+ unsigned long *start, unsigned long *end,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -4035,17 +4036,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
if (!pmdpp)
goto out;
+ if (start && end) {
+ *start = address & PMD_MASK;
+ *end = *start + PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, *start, *end);
+ }
*ptlp = pmd_lock(mm, pmd);
if (pmd_huge(*pmd)) {
*pmdpp = pmd;
return 0;
}
spin_unlock(*ptlp);
+ if (start && end)
+ mmu_notifier_invalidate_range_end(mm, *start, *end);
}
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
goto out;
+ if (start && end) {
+ *start = address & PAGE_MASK;
+ *end = *start + PAGE_SIZE;
+ mmu_notifier_invalidate_range_start(mm, *start, *end);
+ }
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
if (!pte_present(*ptep))
goto unlock;
@@ -4053,6 +4066,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
return 0;
unlock:
pte_unmap_unlock(ptep, *ptlp);
+ if (start && end)
+ mmu_notifier_invalidate_range_end(mm, *start, *end);
out:
return -EINVAL;
}
@@ -4064,20 +4079,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
/* (void) is needed to make gcc happy */
(void) __cond_lock(*ptlp,
- !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
- ptlp)));
+ !(res = __follow_pte_pmd(mm, address, NULL, NULL,
+ ptepp, NULL, ptlp)));
return res;
}
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ unsigned long *start, unsigned long *end,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{
int res;
/* (void) is needed to make gcc happy */
(void) __cond_lock(*ptlp,
- !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
- ptlp)));
+ !(res = __follow_pte_pmd(mm, address, start, end,
+ ptepp, pmdpp, ptlp)));
return res;
}
EXPORT_SYMBOL(follow_pte_pmd);
--
2.13.5
Powered by blists - more mailing lists