[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0567268d-edea-4229-8bf4-bfbfc2af1fd4@suse.cz>
Date: Fri, 23 Jan 2026 10:16:22 +0100
From: Vlastimil Babka <vbabka@...e.cz>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Andrew Morton <akpm@...ux-foundation.org>
Cc: David Hildenbrand <david@...nel.org>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>, Mike Rapoport
<rppt@...nel.org>, Suren Baghdasaryan <surenb@...gle.com>,
Michal Hocko <mhocko@...e.com>, Shakeel Butt <shakeel.butt@...ux.dev>,
Jann Horn <jannh@...gle.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, linux-rt-devel@...ts.linux.dev,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Will Deacon <will@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Waiman Long <longman@...hat.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Clark Williams <clrkwllms@...nel.org>, Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [PATCH v3 06/10] mm/vma: clean up __vma_enter/exit_locked()
On 1/22/26 14:01, Lorenzo Stoakes wrote:
> These functions are very confusing indeed. 'Entering' a lock could be
> interpreted as acquiring it, but this is not what these functions are
> interacting with.
>
> Equally they don't indicate at all what kind of lock we are 'entering' or
> 'exiting'. Finally they are misleading as we invoke these functions when we
> already hold a write lock to detach a VMA.
>
> These functions are explicitly simply 'entering' and 'exiting' a state in
> which we hold the EXCLUSIVE lock in order that we can either mark the VMA
> as being write-locked, or mark the VMA detached.
If we hold a write lock (i.e. in vma_mark_detached()), that normally means
it's also exclusive?
And if we talk about the state between __vma_enter_exclusive_locked and
__vma_exit_exclusive_locked() as "holding an EXCLUSIVE lock", it's not
exactly the same lock as what we call "VMA write lock" right, so what lock
is it?
Maybe it would help if we stopped calling this internal thing a "lock"?
Except we use it for lockdep's lock_acquire_exclusive(). Sigh, sorry I don't
have any great suggestion.
Maybe call those functions __vma_exclude_readers_start() and
__vma_exclude_readers_end() instead, or something?
> Rename the functions accordingly, and also update
> __vma_exit_exclusive_locked() to return detached state with a __must_check
> directive, as it is simply clumsy to pass an output pointer here to
> detached state and inconsistent vs. __vma_enter_exclusive_locked().
>
> Finally, remove the unnecessary 'inline' directives.
>
> No functional change intended.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> include/linux/mmap_lock.h | 4 +--
> mm/mmap_lock.c | 60 +++++++++++++++++++++++++--------------
> 2 files changed, 41 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index da63b1be6ec0..873bc5f3c97c 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -209,8 +209,8 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
> __vma_lockdep_release_read(vma);
> detached = __vma_refcount_put(vma, &refcnt);
> /*
> - * __vma_enter_locked() may be sleeping waiting for readers to drop
> - * their reference count, so wake it up if we were the last reader
> + * __vma_enter_exclusive_locked() may be sleeping waiting for readers to
> + * drop their reference count, so wake it up if we were the last reader
> * blocking it from being acquired.
> */
> if (!detached && are_readers_excluded(refcnt))
> diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> index 7a0361cff6db..f73221174a8b 100644
> --- a/mm/mmap_lock.c
> +++ b/mm/mmap_lock.c
> @@ -46,19 +46,43 @@ EXPORT_SYMBOL(__mmap_lock_do_trace_released);
> #ifdef CONFIG_MMU
> #ifdef CONFIG_PER_VMA_LOCK
>
> -static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached)
> +/*
> + * Now that all readers have been evicted, mark the VMA as being out of the
> + * 'exclude readers' state.
> + *
> + * Returns true if the VMA is now detached, otherwise false.
> + */
> +static bool __must_check __vma_exit_exclusive_locked(struct vm_area_struct *vma)
> {
> - *detached = refcount_sub_and_test(VM_REFCNT_EXCLUDE_READERS_FLAG,
> - &vma->vm_refcnt);
> + bool detached;
> +
> + detached = refcount_sub_and_test(VM_REFCNT_EXCLUDE_READERS_FLAG,
> + &vma->vm_refcnt);
> __vma_lockdep_release_exclusive(vma);
> + return detached;
> }
>
> /*
> - * __vma_enter_locked() returns 0 immediately if the vma is not
> - * attached, otherwise it waits for any current readers to finish and
> - * returns 1. Returns -EINTR if a signal is received while waiting.
> + * Mark the VMA as being in a state of excluding readers, check to see if any
> + * VMA read locks are indeed held, and if so wait for them to be released.
> + *
> + * Note that this function pairs with vma_refcount_put() which will wake up this
> + * thread when it detects that the last reader has released its lock.
> + *
> + * The state parameter ought to be set to TASK_UNINTERRUPTIBLE in cases where we
> + * wish the thread to sleep uninterruptibly or TASK_KILLABLE if a fatal signal
> + * is permitted to kill it.
> + *
> + * The function will return 0 immediately if the VMA is detached, and 1 once the
> + * VMA has evicted all readers, leaving the VMA exclusively locked.
> + *
> + * If the function returns 1, the caller is required to invoke
> + * __vma_exit_exclusive_locked() once the exclusive state is no longer required.
> + *
> + * If state is set to something other than TASK_UNINTERRUPTIBLE, the function
> + * may also return -EINTR to indicate a fatal signal was received while waiting.
> */
> -static inline int __vma_enter_locked(struct vm_area_struct *vma,
> +static int __vma_enter_exclusive_locked(struct vm_area_struct *vma,
> bool detaching, int state)
> {
> int err;
> @@ -85,13 +109,10 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma,
> refcount_read(&vma->vm_refcnt) == tgt_refcnt,
> state);
> if (err) {
> - bool detached;
> -
> - __vma_exit_locked(vma, &detached);
> - if (detached) {
> + if (__vma_exit_exclusive_locked(vma)) {
> /*
> * The wait failed, but the last reader went away
> - * as well. Tell the caller the VMA is detached.
> + * as well. Tell the caller the VMA is detached.
> */
> WARN_ON_ONCE(!detaching);
> err = 0;
> @@ -108,7 +129,7 @@ int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
> {
> int locked;
>
> - locked = __vma_enter_locked(vma, false, state);
> + locked = __vma_enter_exclusive_locked(vma, false, state);
> if (locked < 0)
> return locked;
>
> @@ -120,12 +141,9 @@ int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
> */
> WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
>
> - if (locked) {
> - bool detached;
> -
> - __vma_exit_locked(vma, &detached);
> - WARN_ON_ONCE(detached); /* vma should remain attached */
> - }
> + /* vma should remain attached. */
> + if (locked)
> + WARN_ON_ONCE(__vma_exit_exclusive_locked(vma));
>
> return 0;
> }
> @@ -145,12 +163,12 @@ void vma_mark_detached(struct vm_area_struct *vma)
> detached = __vma_refcount_put(vma, NULL);
> if (unlikely(!detached)) {
> /* Wait until vma is detached with no readers. */
> - if (__vma_enter_locked(vma, true, TASK_UNINTERRUPTIBLE)) {
> + if (__vma_enter_exclusive_locked(vma, true, TASK_UNINTERRUPTIBLE)) {
> /*
> * Once this is complete, no readers can increment the
> * reference count, and the VMA is marked detached.
> */
> - __vma_exit_locked(vma, &detached);
> + detached = __vma_exit_exclusive_locked(vma);
> WARN_ON_ONCE(!detached);
> }
> }
Powered by blists - more mailing lists