[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1497635555-25679-7-git-send-email-ldufour@linux.vnet.ibm.com>
Date: Fri, 16 Jun 2017 19:52:30 +0200
From: Laurent Dufour <ldufour@...ux.vnet.ibm.com>
To: paulmck@...ux.vnet.ibm.com, peterz@...radead.org,
akpm@...ux-foundation.org, kirill@...temov.name,
ak@...ux.intel.com, mhocko@...nel.org, dave@...olabs.net,
jack@...e.cz, Matthew Wilcox <willy@...radead.org>
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org,
haren@...ux.vnet.ibm.com, khandual@...ux.vnet.ibm.com,
npiggin@...il.com, bsingharora@...il.com,
Tim Chen <tim.c.chen@...ux.intel.com>
Subject: [RFC v5 06/11] mm: Protect VMA modifications using VMA sequence count
The VMA sequence count has been introduced to allow fast detection of
VMA modification when running a page fault handler without holding
the mmap_sem.
This patch provides protection agains the VMA modification done in :
- madvise()
- mremap()
- mpol_rebind_policy()
- vma_replace_policy()
- change_prot_numa()
- mlock(), munlock()
- mprotect()
- mmap_region()
Signed-off-by: Laurent Dufour <ldufour@...ux.vnet.ibm.com>
---
fs/proc/task_mmu.c | 2 ++
mm/madvise.c | 4 ++++
mm/mempolicy.c | 10 +++++++++-
mm/mlock.c | 9 ++++++---
mm/mmap.c | 2 ++
mm/mprotect.c | 2 ++
mm/mremap.c | 7 +++++++
7 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..9bc40620ba39 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1069,8 +1069,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
goto out_mm;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ write_seqcount_begin(&vma->vm_sequence);
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
+ write_seqcount_end(&vma->vm_sequence);
}
downgrade_write(&mm->mmap_sem);
break;
diff --git a/mm/madvise.c b/mm/madvise.c
index 25b78ee4fc2c..d1fa6a7ee604 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -172,7 +172,9 @@ static long madvise_behavior(struct vm_area_struct *vma,
/*
* vm_flags is protected by the mmap_sem held in write mode.
*/
+ write_seqcount_begin(&vma->vm_sequence);
vma->vm_flags = new_flags;
+ write_seqcount_end(&vma->vm_sequence);
out:
return error;
}
@@ -439,9 +441,11 @@ static void madvise_free_page_range(struct mmu_gather *tlb,
.private = tlb,
};
+ write_seqcount_begin(&vma->vm_sequence);
tlb_start_vma(tlb, vma);
walk_page_range(addr, end, &free_walk);
tlb_end_vma(tlb, vma);
+ write_seqcount_end(&vma->vm_sequence);
}
static int madvise_free_single_vma(struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 37d0b334bfe9..5e44b3e69a0d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -447,8 +447,11 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
struct vm_area_struct *vma;
down_write(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ write_seqcount_begin(&vma->vm_sequence);
mpol_rebind_policy(vma->vm_policy, new, MPOL_REBIND_ONCE);
+ write_seqcount_end(&vma->vm_sequence);
+ }
up_write(&mm->mmap_sem);
}
@@ -606,9 +609,11 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
{
int nr_updated;
+ write_seqcount_begin(&vma->vm_sequence);
nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
+ write_seqcount_end(&vma->vm_sequence);
return nr_updated;
}
@@ -709,6 +714,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
if (IS_ERR(new))
return PTR_ERR(new);
+ write_seqcount_begin(&vma->vm_sequence);
if (vma->vm_ops && vma->vm_ops->set_policy) {
err = vma->vm_ops->set_policy(vma, new);
if (err)
@@ -717,10 +723,12 @@ static int vma_replace_policy(struct vm_area_struct *vma,
old = vma->vm_policy;
vma->vm_policy = new; /* protected by mmap_sem */
+ write_seqcount_end(&vma->vm_sequence);
mpol_put(old);
return 0;
err_out:
+ write_seqcount_end(&vma->vm_sequence);
mpol_put(new);
return err;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index b562b5523a65..30d9bfc61929 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -438,7 +438,9 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
void munlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
+ write_seqcount_begin(&vma->vm_sequence);
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ write_seqcount_end(&vma->vm_sequence);
while (start < end) {
struct page *page;
@@ -563,10 +565,11 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
* It's okay if try_to_unmap_one unmaps a page just after we
* set VM_LOCKED, populate_vma_page_range will bring it back.
*/
-
- if (lock)
+ if (lock) {
+ write_seqcount_begin(&vma->vm_sequence);
vma->vm_flags = newflags;
- else
+ write_seqcount_end(&vma->vm_sequence);
+ } else
munlock_vma_pages_range(vma, start, end);
out:
diff --git a/mm/mmap.c b/mm/mmap.c
index ad85f210a92c..b48bbe6a49c6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1720,6 +1720,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
out:
perf_event_mmap(vma);
+ write_seqcount_begin(&vma->vm_sequence);
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
@@ -1742,6 +1743,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma->vm_flags |= VM_SOFTDIRTY;
vma_set_page_prot(vma);
+ write_seqcount_end(&vma->vm_sequence);
return addr;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8edd0d576254..1db5b0bf6952 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -345,6 +345,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* vm_flags and vm_page_prot are protected by the mmap_sem
* held in write mode.
*/
+ write_seqcount_begin(&vma->vm_sequence);
vma->vm_flags = newflags;
dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
vma_set_page_prot(vma);
@@ -360,6 +361,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
(newflags & VM_WRITE)) {
populate_vma_page_range(vma, start, end, NULL);
}
+ write_seqcount_end(&vma->vm_sequence);
vm_stat_account(mm, oldflags, -nrpages);
vm_stat_account(mm, newflags, nrpages);
diff --git a/mm/mremap.c b/mm/mremap.c
index cd8a1b199ef9..9c7f69c9e80f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -300,6 +300,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (!new_vma)
return -ENOMEM;
+ write_seqcount_begin(&vma->vm_sequence);
+ write_seqcount_begin_nested(&new_vma->vm_sequence,
+ SINGLE_DEPTH_NESTING);
+
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
need_rmap_locks);
if (moved_len < old_len) {
@@ -316,6 +320,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
true);
+ write_seqcount_end(&vma->vm_sequence);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
@@ -324,7 +329,9 @@ static unsigned long move_vma(struct vm_area_struct *vma,
mremap_userfaultfd_prep(new_vma, uf);
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
+ write_seqcount_end(&vma->vm_sequence);
}
+ write_seqcount_end(&new_vma->vm_sequence);
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
--
2.7.4
Powered by blists - more mailing lists