[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dfb9e654-620e-4658-9ebf-96f531f3bc90@lucifer.local>
Date: Thu, 31 Jul 2025 12:12:48 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Suren Baghdasaryan <surenb@...gle.com>
Cc: akpm@...ux-foundation.org, jannh@...gle.com, Liam.Howlett@...cle.com,
vbabka@...e.cz, pfalcato@...e.de, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] mm: limit the scope of vma_start_read()
One small nit, or maybe just _my_ personal preference :P could you use a
cover letter for these? As I find the 2/2 replying to the 1/2 thing a bit
weird.
Possibly a 'Lorenzo' thing though ;)
On Wed, Jul 30, 2025 at 06:34:03PM -0700, Suren Baghdasaryan wrote:
> Limit the scope of vma_start_read() as it is used only as a helper for
> higher-level locking functions implemented inside mmap_lock.c and we are
> about to introduce more complex RCU rules for this function.
> The change is pure code refactoring and has no functional changes.
>
> Suggested-by: Vlastimil Babka <vbabka@...e.cz>
> Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
I've checked this carefully, compiles locally + all fine with no
perceivable delta other than move so:
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> include/linux/mmap_lock.h | 85 ---------------------------------------
> mm/mmap_lock.c | 85 +++++++++++++++++++++++++++++++++++++++
> 2 files changed, 85 insertions(+), 85 deletions(-)
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 11a078de9150..2c9fffa58714 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
> }
> }
>
> -/*
> - * Try to read-lock a vma. The function is allowed to occasionally yield false
> - * locked result to avoid performance overhead, in which case we fall back to
> - * using mmap_lock. The function should never yield false unlocked result.
> - * False locked result is possible if mm_lock_seq overflows or if vma gets
> - * reused and attached to a different mm before we lock it.
> - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
> - * detached.
> - *
> - * WARNING! The vma passed to this function cannot be used if the function
> - * fails to lock it because in certain cases RCU lock is dropped and then
> - * reacquired. Once RCU lock is dropped the vma can be concurently freed.
> - */
> -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
> - struct vm_area_struct *vma)
> -{
> - int oldcnt;
> -
> - /*
> - * Check before locking. A race might cause false locked result.
> - * We can use READ_ONCE() for the mm_lock_seq here, and don't need
> - * ACQUIRE semantics, because this is just a lockless check whose result
> - * we don't rely on for anything - the mm_lock_seq read against which we
> - * need ordering is below.
> - */
> - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
> - return NULL;
> -
> - /*
> - * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
> - * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
> - * Acquire fence is required here to avoid reordering against later
> - * vm_lock_seq check and checks inside lock_vma_under_rcu().
> - */
> - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
> - VMA_REF_LIMIT))) {
> - /* return EAGAIN if vma got detached from under us */
> - return oldcnt ? NULL : ERR_PTR(-EAGAIN);
> - }
> -
> - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
> -
> - /*
> - * If vma got attached to another mm from under us, that mm is not
> - * stable and can be freed in the narrow window after vma->vm_refcnt
> - * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
> - * releasing vma->vm_refcnt.
> - */
> - if (unlikely(vma->vm_mm != mm)) {
> - /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
> - struct mm_struct *other_mm = vma->vm_mm;
> -
> - /*
> - * __mmdrop() is a heavy operation and we don't need RCU
> - * protection here. Release RCU lock during these operations.
> - * We reinstate the RCU read lock as the caller expects it to
> - * be held when this function returns even on error.
> - */
> - rcu_read_unlock();
> - mmgrab(other_mm);
> - vma_refcount_put(vma);
> - mmdrop(other_mm);
> - rcu_read_lock();
> - return NULL;
> - }
> -
> - /*
> - * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
> - * False unlocked result is impossible because we modify and check
> - * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
> - * modification invalidates all existing locks.
> - *
> - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
> - * racing with vma_end_write_all(), we only start reading from the VMA
> - * after it has been unlocked.
> - * This pairs with RELEASE semantics in vma_end_write_all().
> - */
> - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
> - vma_refcount_put(vma);
> - return NULL;
> - }
> -
> - return vma;
> -}
> -
> /*
> * Use only while holding mmap read lock which guarantees that locking will not
> * fail (nobody can concurrently write-lock the vma). vma_start_read() should
> diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> index b006cec8e6fe..10826f347a9f 100644
> --- a/mm/mmap_lock.c
> +++ b/mm/mmap_lock.c
> @@ -127,6 +127,91 @@ void vma_mark_detached(struct vm_area_struct *vma)
> }
> }
>
> +/*
> + * Try to read-lock a vma. The function is allowed to occasionally yield false
> + * locked result to avoid performance overhead, in which case we fall back to
> + * using mmap_lock. The function should never yield false unlocked result.
> + * False locked result is possible if mm_lock_seq overflows or if vma gets
> + * reused and attached to a different mm before we lock it.
> + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
> + * detached.
> + *
> + * WARNING! The vma passed to this function cannot be used if the function
> + * fails to lock it because in certain cases RCU lock is dropped and then
> + * reacquired. Once RCU lock is dropped the vma can be concurently freed.
> + */
> +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
> + struct vm_area_struct *vma)
> +{
> + int oldcnt;
> +
> + /*
> + * Check before locking. A race might cause false locked result.
> + * We can use READ_ONCE() for the mm_lock_seq here, and don't need
> + * ACQUIRE semantics, because this is just a lockless check whose result
> + * we don't rely on for anything - the mm_lock_seq read against which we
> + * need ordering is below.
> + */
> + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
> + return NULL;
> +
> + /*
> + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
> + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
> + * Acquire fence is required here to avoid reordering against later
> + * vm_lock_seq check and checks inside lock_vma_under_rcu().
> + */
> + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
> + VMA_REF_LIMIT))) {
> + /* return EAGAIN if vma got detached from under us */
> + return oldcnt ? NULL : ERR_PTR(-EAGAIN);
> + }
> +
> + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
> +
> + /*
> + * If vma got attached to another mm from under us, that mm is not
> + * stable and can be freed in the narrow window after vma->vm_refcnt
> + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
> + * releasing vma->vm_refcnt.
> + */
> + if (unlikely(vma->vm_mm != mm)) {
> + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
> + struct mm_struct *other_mm = vma->vm_mm;
> +
> + /*
> + * __mmdrop() is a heavy operation and we don't need RCU
> + * protection here. Release RCU lock during these operations.
> + * We reinstate the RCU read lock as the caller expects it to
> + * be held when this function returns even on error.
> + */
> + rcu_read_unlock();
> + mmgrab(other_mm);
> + vma_refcount_put(vma);
> + mmdrop(other_mm);
> + rcu_read_lock();
> + return NULL;
> + }
> +
> + /*
> + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
> + * False unlocked result is impossible because we modify and check
> + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
> + * modification invalidates all existing locks.
> + *
> + * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
> + * racing with vma_end_write_all(), we only start reading from the VMA
> + * after it has been unlocked.
> + * This pairs with RELEASE semantics in vma_end_write_all().
> + */
> + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
> + vma_refcount_put(vma);
> + return NULL;
> + }
> +
> + return vma;
> +}
> +
> /*
> * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
> * stable and not isolated. If the VMA is not found or is being modified the
> --
> 2.50.1.552.g942d659e1b-goog
>
Powered by blists - more mailing lists