[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220906195949.7nln7y6urs6rfyyd@revolver>
Date: Tue, 6 Sep 2022 20:00:03 +0000
From: Liam Howlett <liam.howlett@...cle.com>
To: Suren Baghdasaryan <surenb@...gle.com>
CC: Laurent Dufour <ldufour@...ux.ibm.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Michel Lespinasse <michel@...pinasse.org>,
Jerome Glisse <jglisse@...gle.com>,
Michal Hocko <mhocko@...e.com>,
Vlastimil Babka <vbabka@...e.cz>,
Johannes Weiner <hannes@...xchg.org>,
Mel Gorman <mgorman@...e.de>,
Davidlohr Bueso <dave@...olabs.net>,
Matthew Wilcox <willy@...radead.org>,
Peter Zijlstra <peterz@...radead.org>,
Laurent Dufour <laurent.dufour@...ibm.com>,
"Paul E . McKenney" <paulmck@...nel.org>,
Andy Lutomirski <luto@...nel.org>,
Song Liu <songliubraving@...com>, Peter Xu <peterx@...hat.com>,
David Hildenbrand <david@...hat.com>,
"dhowells@...hat.com" <dhowells@...hat.com>,
Hugh Dickins <hughd@...gle.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Kent Overstreet <kent.overstreet@...ux.dev>,
David Rientjes <rientjes@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
Joel Fernandes <joelaf@...gle.com>,
Minchan Kim <minchan@...gle.com>,
kernel-team <kernel-team@...roid.com>,
linux-mm <linux-mm@...ck.org>,
"linux-arm-kernel@...ts.infradead.org"
<linux-arm-kernel@...ts.infradead.org>,
"linuxppc-dev@...ts.ozlabs.org" <linuxppc-dev@...ts.ozlabs.org>,
"x86@...nel.org" <x86@...nel.org>,
LKML <linux-kernel@...r.kernel.org>
Subject: Re: [RFC PATCH RESEND 06/28] mm: mark VMA as locked whenever
vma->vm_flags are modified
* Suren Baghdasaryan <surenb@...gle.com> [220906 15:01]:
> On Tue, Sep 6, 2022 at 7:27 AM Laurent Dufour <ldufour@...ux.ibm.com> wrote:
> >
> > Le 01/09/2022 à 19:34, Suren Baghdasaryan a écrit :
> > > VMA flag modifications should be done under VMA lock to prevent concurrent
> > > page fault handling in that area.
> > >
> > > Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
> > > ---
> > > fs/proc/task_mmu.c | 1 +
> > > fs/userfaultfd.c | 6 ++++++
> > > mm/madvise.c | 1 +
> > > mm/mlock.c | 2 ++
> > > mm/mmap.c | 1 +
> > > mm/mprotect.c | 1 +
> > > 6 files changed, 12 insertions(+)
> >
> > There are few changes also done in the driver's space, for instance:
> >
> > *** arch/x86/kernel/cpu/sgx/driver.c:
> > sgx_mmap[98] vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND |
> > VM_DONTDUMP | VM_IO;
> > *** arch/x86/kernel/cpu/sgx/virt.c:
> > sgx_vepc_mmap[108] vma->vm_flags |= VM_PFNMAP | VM_IO |
> > VM_DONTDUMP | VM_DONTCOPY;
> > *** drivers/dax/device.c:
> > dax_mmap[311] vma->vm_flags |= VM_HUGEPAGE;
> >
> > I guess these changes to vm_flags should be protected as well, or to be
> > checked one by one.
>
> Thanks for noting these! I'll add necessary locking here and will look
> for other places I might have missed.
Would an inline set/clear bit function be worth while for vm_flags? If
it is then a name change to vm_flags may get the compiler to catch any
missed cases. There doesn't seem to be many cases (12 inserts) so maybe
not.
>
> >
> > >
> > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > > index 4e0023643f8b..ceffa5c2c650 100644
> > > --- a/fs/proc/task_mmu.c
> > > +++ b/fs/proc/task_mmu.c
> > > @@ -1285,6 +1285,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
> > > for (vma = mm->mmap; vma; vma = vma->vm_next) {
> > > if (!(vma->vm_flags & VM_SOFTDIRTY))
> > > continue;
> > > + vma_mark_locked(vma);
> > > vma->vm_flags &= ~VM_SOFTDIRTY;
> > > vma_set_page_prot(vma);
> > > }
> > > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > > index 175de70e3adf..fe557b3d1c07 100644
> > > --- a/fs/userfaultfd.c
> > > +++ b/fs/userfaultfd.c
> > > @@ -620,6 +620,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
> > > mmap_write_lock(mm);
> > > for (vma = mm->mmap; vma; vma = vma->vm_next)
> > > if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
> > > + vma_mark_locked(vma);
> > > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
> > > vma->vm_flags &= ~__VM_UFFD_FLAGS;
> > > }
> > > @@ -653,6 +654,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
> > >
> > > octx = vma->vm_userfaultfd_ctx.ctx;
> > > if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
> > > + vma_mark_locked(vma);
> > > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
> > > vma->vm_flags &= ~__VM_UFFD_FLAGS;
> > > return 0;
> > > @@ -734,6 +736,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
> > > atomic_inc(&ctx->mmap_changing);
> > > } else {
> > > /* Drop uffd context if remap feature not enabled */
> > > + vma_mark_locked(vma);
> > > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
> > > vma->vm_flags &= ~__VM_UFFD_FLAGS;
> > > }
> > > @@ -891,6 +894,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
> > > vma = prev;
> > > else
> > > prev = vma;
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = new_flags;
> > > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
> > > }
> > > @@ -1449,6 +1453,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
> > > * the next vma was merged into the current one and
> > > * the current one has not been updated yet.
> > > */
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = new_flags;
> > > vma->vm_userfaultfd_ctx.ctx = ctx;
> > >
> > > @@ -1630,6 +1635,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
> > > * the next vma was merged into the current one and
> > > * the current one has not been updated yet.
> > > */
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = new_flags;
> > > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
> > >
> > > diff --git a/mm/madvise.c b/mm/madvise.c
> > > index 5f0f0948a50e..a173f0025abd 100644
> > > --- a/mm/madvise.c
> > > +++ b/mm/madvise.c
> > > @@ -181,6 +181,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
> > > /*
> > > * vm_flags is protected by the mmap_lock held in write mode.
> > > */
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = new_flags;
> > > if (!vma->vm_file) {
> > > error = replace_anon_vma_name(vma, anon_name);
> > > diff --git a/mm/mlock.c b/mm/mlock.c
> > > index b14e929084cc..f62e1a4d05f2 100644
> > > --- a/mm/mlock.c
> > > +++ b/mm/mlock.c
> > > @@ -380,6 +380,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
> > > */
> > > if (newflags & VM_LOCKED)
> > > newflags |= VM_IO;
> > > + vma_mark_locked(vma);
> > > WRITE_ONCE(vma->vm_flags, newflags);
> > >
> > > lru_add_drain();
> > > @@ -456,6 +457,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
> > >
> > > if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
> > > /* No work to do, and mlocking twice would be wrong */
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = newflags;
> > > } else {
> > > mlock_vma_pages_range(vma, start, end, newflags);
> > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > index 693e6776be39..f89c9b058105 100644
> > > --- a/mm/mmap.c
> > > +++ b/mm/mmap.c
> > > @@ -1818,6 +1818,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > out:
> > > perf_event_mmap(vma);
> > >
> > > + vma_mark_locked(vma);
> > > vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
> > > if (vm_flags & VM_LOCKED) {
> > > if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
> >
> > I guess, this doesn't really impact, but the call to vma_mark_locked(vma)
> > may be done only in the case the vm_flags field is touched.
> > Something like this:
> >
> > vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
> > if (vm_flags & VM_LOCKED) {
> > if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
> > is_vm_hugetlb_page(vma) ||
> > - vma == get_gate_vma(current->mm))
> > + vma == get_gate_vma(current->mm)) {
> > + vma_mark_locked(vma);
> > vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
> > - else
> > + } else
> > mm->locked_vm += (len >> PAGE_SHIFT);
> > }
> >
> >
> > > diff --git a/mm/mprotect.c b/mm/mprotect.c
> > > index bc6bddd156ca..df47fc21b0e4 100644
> > > --- a/mm/mprotect.c
> > > +++ b/mm/mprotect.c
> > > @@ -621,6 +621,7 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > > * vm_flags and vm_page_prot are protected by the mmap_lock
> > > * held in write mode.
> > > */
> > > + vma_mark_locked(vma);
> > > vma->vm_flags = newflags;
> > > /*
> > > * We want to check manually if we can change individual PTEs writable
> >
Powered by blists - more mailing lists