lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpHghvnWWvF6JN+DHbD8Vv7zPVC0BZcTmW6HcrWrxo=KWw@mail.gmail.com>
Date: Thu, 31 Jul 2025 08:20:37 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: akpm@...ux-foundation.org
Cc: jannh@...gle.com, Liam.Howlett@...cle.com, lorenzo.stoakes@...cle.com, 
	vbabka@...e.cz, pfalcato@...e.de, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 1/2] mm: limit the scope of vma_start_read()

On Thu, Jul 31, 2025 at 8:19 AM Suren Baghdasaryan <surenb@...gle.com> wrote:
>
> Limit the scope of vma_start_read() as it is used only as a helper for
> higher-level locking functions implemented inside mmap_lock.c and we are
> about to introduce more complex RCU rules for this function.
> The change is pure code refactoring and has no functional changes.
>
> Suggested-by: Vlastimil Babka <vbabka@...e.cz>
> Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>

Forgot to add Lorenzo's

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>

Thanks!

> ---
>  include/linux/mmap_lock.h | 85 ---------------------------------------
>  mm/mmap_lock.c            | 85 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 85 insertions(+), 85 deletions(-)
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 11a078de9150..2c9fffa58714 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
>         }
>  }
>
> -/*
> - * Try to read-lock a vma. The function is allowed to occasionally yield false
> - * locked result to avoid performance overhead, in which case we fall back to
> - * using mmap_lock. The function should never yield false unlocked result.
> - * False locked result is possible if mm_lock_seq overflows or if vma gets
> - * reused and attached to a different mm before we lock it.
> - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
> - * detached.
> - *
> - * WARNING! The vma passed to this function cannot be used if the function
> - * fails to lock it because in certain cases RCU lock is dropped and then
> - * reacquired. Once RCU lock is dropped the vma can be concurently freed.
> - */
> -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
> -                                                   struct vm_area_struct *vma)
> -{
> -       int oldcnt;
> -
> -       /*
> -        * Check before locking. A race might cause false locked result.
> -        * We can use READ_ONCE() for the mm_lock_seq here, and don't need
> -        * ACQUIRE semantics, because this is just a lockless check whose result
> -        * we don't rely on for anything - the mm_lock_seq read against which we
> -        * need ordering is below.
> -        */
> -       if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
> -               return NULL;
> -
> -       /*
> -        * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
> -        * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
> -        * Acquire fence is required here to avoid reordering against later
> -        * vm_lock_seq check and checks inside lock_vma_under_rcu().
> -        */
> -       if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
> -                                                             VMA_REF_LIMIT))) {
> -               /* return EAGAIN if vma got detached from under us */
> -               return oldcnt ? NULL : ERR_PTR(-EAGAIN);
> -       }
> -
> -       rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
> -
> -       /*
> -        * If vma got attached to another mm from under us, that mm is not
> -        * stable and can be freed in the narrow window after vma->vm_refcnt
> -        * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
> -        * releasing vma->vm_refcnt.
> -        */
> -       if (unlikely(vma->vm_mm != mm)) {
> -               /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
> -               struct mm_struct *other_mm = vma->vm_mm;
> -
> -               /*
> -                * __mmdrop() is a heavy operation and we don't need RCU
> -                * protection here. Release RCU lock during these operations.
> -                * We reinstate the RCU read lock as the caller expects it to
> -                * be held when this function returns even on error.
> -                */
> -               rcu_read_unlock();
> -               mmgrab(other_mm);
> -               vma_refcount_put(vma);
> -               mmdrop(other_mm);
> -               rcu_read_lock();
> -               return NULL;
> -       }
> -
> -       /*
> -        * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
> -        * False unlocked result is impossible because we modify and check
> -        * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
> -        * modification invalidates all existing locks.
> -        *
> -        * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
> -        * racing with vma_end_write_all(), we only start reading from the VMA
> -        * after it has been unlocked.
> -        * This pairs with RELEASE semantics in vma_end_write_all().
> -        */
> -       if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
> -               vma_refcount_put(vma);
> -               return NULL;
> -       }
> -
> -       return vma;
> -}
> -
>  /*
>   * Use only while holding mmap read lock which guarantees that locking will not
>   * fail (nobody can concurrently write-lock the vma). vma_start_read() should
> diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
> index b006cec8e6fe..10826f347a9f 100644
> --- a/mm/mmap_lock.c
> +++ b/mm/mmap_lock.c
> @@ -127,6 +127,91 @@ void vma_mark_detached(struct vm_area_struct *vma)
>         }
>  }
>
> +/*
> + * Try to read-lock a vma. The function is allowed to occasionally yield false
> + * locked result to avoid performance overhead, in which case we fall back to
> + * using mmap_lock. The function should never yield false unlocked result.
> + * False locked result is possible if mm_lock_seq overflows or if vma gets
> + * reused and attached to a different mm before we lock it.
> + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
> + * detached.
> + *
> + * WARNING! The vma passed to this function cannot be used if the function
> + * fails to lock it because in certain cases RCU lock is dropped and then
> + * reacquired. Once RCU lock is dropped the vma can be concurently freed.
> + */
> +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
> +                                                   struct vm_area_struct *vma)
> +{
> +       int oldcnt;
> +
> +       /*
> +        * Check before locking. A race might cause false locked result.
> +        * We can use READ_ONCE() for the mm_lock_seq here, and don't need
> +        * ACQUIRE semantics, because this is just a lockless check whose result
> +        * we don't rely on for anything - the mm_lock_seq read against which we
> +        * need ordering is below.
> +        */
> +       if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
> +               return NULL;
> +
> +       /*
> +        * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
> +        * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
> +        * Acquire fence is required here to avoid reordering against later
> +        * vm_lock_seq check and checks inside lock_vma_under_rcu().
> +        */
> +       if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
> +                                                             VMA_REF_LIMIT))) {
> +               /* return EAGAIN if vma got detached from under us */
> +               return oldcnt ? NULL : ERR_PTR(-EAGAIN);
> +       }
> +
> +       rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
> +
> +       /*
> +        * If vma got attached to another mm from under us, that mm is not
> +        * stable and can be freed in the narrow window after vma->vm_refcnt
> +        * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
> +        * releasing vma->vm_refcnt.
> +        */
> +       if (unlikely(vma->vm_mm != mm)) {
> +               /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
> +               struct mm_struct *other_mm = vma->vm_mm;
> +
> +               /*
> +                * __mmdrop() is a heavy operation and we don't need RCU
> +                * protection here. Release RCU lock during these operations.
> +                * We reinstate the RCU read lock as the caller expects it to
> +                * be held when this function returns even on error.
> +                */
> +               rcu_read_unlock();
> +               mmgrab(other_mm);
> +               vma_refcount_put(vma);
> +               mmdrop(other_mm);
> +               rcu_read_lock();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
> +        * False unlocked result is impossible because we modify and check
> +        * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
> +        * modification invalidates all existing locks.
> +        *
> +        * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
> +        * racing with vma_end_write_all(), we only start reading from the VMA
> +        * after it has been unlocked.
> +        * This pairs with RELEASE semantics in vma_end_write_all().
> +        */
> +       if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
> +               vma_refcount_put(vma);
> +               return NULL;
> +       }
> +
> +       return vma;
> +}
> +
>  /*
>   * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
>   * stable and not isolated. If the VMA is not found or is being modified the
>
> base-commit: 01da54f10fddf3b01c5a3b80f6b16bbad390c302
> --
> 2.50.1.552.g942d659e1b-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ