[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <qvidvc6rxwacauspyvrkpfjmv4ear7g54zvsmh7prrbzeli4vk@wwjhvzsus77i>
Date: Fri, 22 Nov 2024 10:14:24 -0500
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
To: Suren Baghdasaryan <surenb@...gle.com>
Cc: akpm@...ux-foundation.org, peterz@...radead.org, andrii@...nel.org,
jannh@...gle.com, lorenzo.stoakes@...cle.com, vbabka@...e.cz,
mhocko@...nel.org, shakeel.butt@...ux.dev, hannes@...xchg.org,
david@...hat.com, willy@...radead.org, brauner@...nel.org,
oleg@...hat.com, arnd@...db.de, richard.weiyang@...il.com,
zhangpeng.00@...edance.com, linmiaohe@...wei.com,
viro@...iv.linux.org.uk, hca@...ux.ibm.com, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount
* Suren Baghdasaryan <surenb@...gle.com> [241121 11:28]:
> Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock
> variants to increment it, in-line with the usual seqcount usage pattern.
> This lets us check whether the mmap_lock is write-locked by checking
> mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be
> used when implementing mmap_lock speculation functions.
> As a result vm_lock_seq is also change to be unsigned to match the type
> of mm_lock_seq.sequence.
>
> Suggested-by: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@...cle.com>
> ---
> Changes since v1 [1]
> - Added ASSERT_EXCLUSIVE_WRITER() instead of a comment in
> vma_end_write_all, per Peter Zijlstra
>
> [1] https://lore.kernel.org/all/20241024205231.1944747-1-surenb@google.com/
>
> include/linux/mm.h | 12 +++----
> include/linux/mm_types.h | 7 ++--
> include/linux/mmap_lock.h | 55 +++++++++++++++++++++-----------
> kernel/fork.c | 5 +--
> mm/init-mm.c | 2 +-
> tools/testing/vma/vma.c | 4 +--
> tools/testing/vma/vma_internal.h | 4 +--
> 7 files changed, 53 insertions(+), 36 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index feb5c8021bef..e6de22738ee1 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
> * we don't rely on for anything - the mm_lock_seq read against which we
> * need ordering is below.
> */
> - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
> + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
> return false;
>
> if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
> @@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
> * after it has been unlocked.
> * This pairs with RELEASE semantics in vma_end_write_all().
> */
> - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
> + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
> up_read(&vma->vm_lock->lock);
> return false;
> }
> @@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
> }
>
> /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
> -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
> +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
> {
> mmap_assert_write_locked(vma->vm_mm);
>
> @@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
> * current task is holding mmap_write_lock, both vma->vm_lock_seq and
> * mm->mm_lock_seq can't be concurrently modified.
> */
> - *mm_lock_seq = vma->vm_mm->mm_lock_seq;
> + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
> return (vma->vm_lock_seq == *mm_lock_seq);
> }
>
> @@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
> */
> static inline void vma_start_write(struct vm_area_struct *vma)
> {
> - int mm_lock_seq;
> + unsigned int mm_lock_seq;
>
> if (__is_vma_write_locked(vma, &mm_lock_seq))
> return;
> @@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
>
> static inline void vma_assert_write_locked(struct vm_area_struct *vma)
> {
> - int mm_lock_seq;
> + unsigned int mm_lock_seq;
>
> VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
> }
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 381d22eba088..ac72888a54b8 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -715,7 +715,7 @@ struct vm_area_struct {
> * counter reuse can only lead to occasional unnecessary use of the
> * slowpath.
> */
> - int vm_lock_seq;
> + unsigned int vm_lock_seq;
> /* Unstable RCU readers are allowed to read this. */
> struct vma_lock *vm_lock;
> #endif
> @@ -909,6 +909,9 @@ struct mm_struct {
> * Roughly speaking, incrementing the sequence number is
> * equivalent to releasing locks on VMAs; reading the sequence
> * number can be part of taking a read lock on a VMA.
> + * Incremented every time mmap_lock is write-locked/unlocked.
> + * Initialized to 0, therefore odd values indicate mmap_lock
> + * is write-locked and even values that it's released.
> *
> * Can be modified under write mmap_lock using RELEASE
> * semantics.
> @@ -917,7 +920,7 @@ struct mm_struct {
> * Can be read with ACQUIRE semantics if not holding write
> * mmap_lock.
> */
> - int mm_lock_seq;
> + seqcount_t mm_lock_seq;
> #endif
>
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index de9dc20b01ba..083b7fa2588e 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -71,39 +71,38 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
> }
>
> #ifdef CONFIG_PER_VMA_LOCK
> -/*
> - * Drop all currently-held per-VMA locks.
> - * This is called from the mmap_lock implementation directly before releasing
> - * a write-locked mmap_lock (or downgrading it to read-locked).
> - * This should normally NOT be called manually from other places.
> - * If you want to call this manually anyway, keep in mind that this will release
> - * *all* VMA write locks, including ones from further up the stack.
> - */
> -static inline void vma_end_write_all(struct mm_struct *mm)
> +static inline void mm_lock_seqcount_init(struct mm_struct *mm)
> {
> - mmap_assert_write_locked(mm);
> - /*
> - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
> - * mmap_lock being held.
> - * We need RELEASE semantics here to ensure that preceding stores into
> - * the VMA take effect before we unlock it with this store.
> - * Pairs with ACQUIRE semantics in vma_start_read().
> - */
> - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
> + seqcount_init(&mm->mm_lock_seq);
> +}
> +
> +static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
> +{
> + do_raw_write_seqcount_begin(&mm->mm_lock_seq);
> +}
> +
> +static inline void mm_lock_seqcount_end(struct mm_struct *mm)
> +{
> + do_raw_write_seqcount_end(&mm->mm_lock_seq);
> }
> +
> #else
> -static inline void vma_end_write_all(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
> #endif
>
> static inline void mmap_init_lock(struct mm_struct *mm)
> {
> init_rwsem(&mm->mmap_lock);
> + mm_lock_seqcount_init(mm);
> }
>
> static inline void mmap_write_lock(struct mm_struct *mm)
> {
> __mmap_lock_trace_start_locking(mm, true);
> down_write(&mm->mmap_lock);
> + mm_lock_seqcount_begin(mm);
> __mmap_lock_trace_acquire_returned(mm, true, true);
> }
>
> @@ -111,6 +110,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
> {
> __mmap_lock_trace_start_locking(mm, true);
> down_write_nested(&mm->mmap_lock, subclass);
> + mm_lock_seqcount_begin(mm);
> __mmap_lock_trace_acquire_returned(mm, true, true);
> }
>
> @@ -120,10 +120,27 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
>
> __mmap_lock_trace_start_locking(mm, true);
> ret = down_write_killable(&mm->mmap_lock);
> + if (!ret)
> + mm_lock_seqcount_begin(mm);
> __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
> return ret;
> }
>
> +/*
> + * Drop all currently-held per-VMA locks.
> + * This is called from the mmap_lock implementation directly before releasing
> + * a write-locked mmap_lock (or downgrading it to read-locked).
> + * This should normally NOT be called manually from other places.
> + * If you want to call this manually anyway, keep in mind that this will release
> + * *all* VMA write locks, including ones from further up the stack.
> + */
> +static inline void vma_end_write_all(struct mm_struct *mm)
> +{
> + mmap_assert_write_locked(mm);
> + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
> + mm_lock_seqcount_end(mm);
> +}
> +
> static inline void mmap_write_unlock(struct mm_struct *mm)
> {
> __mmap_lock_trace_released(mm, true);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index e58d27c05788..8cd36645b9fc 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -449,7 +449,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
> return false;
>
> init_rwsem(&vma->vm_lock->lock);
> - vma->vm_lock_seq = -1;
> + vma->vm_lock_seq = UINT_MAX;
>
> return true;
> }
> @@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
> seqcount_init(&mm->write_protect_seq);
> mmap_init_lock(mm);
> INIT_LIST_HEAD(&mm->mmlist);
> -#ifdef CONFIG_PER_VMA_LOCK
> - mm->mm_lock_seq = 0;
> -#endif
> mm_pgtables_bytes_init(mm);
> mm->map_count = 0;
> mm->locked_vm = 0;
> diff --git a/mm/init-mm.c b/mm/init-mm.c
> index 24c809379274..6af3ad675930 100644
> --- a/mm/init-mm.c
> +++ b/mm/init-mm.c
> @@ -40,7 +40,7 @@ struct mm_struct init_mm = {
> .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
> .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
> #ifdef CONFIG_PER_VMA_LOCK
> - .mm_lock_seq = 0,
> + .mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq),
> #endif
> .user_ns = &init_user_ns,
> .cpu_bitmap = CPU_BITS_NONE,
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index b33b47342d41..9074aaced9c5 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -87,7 +87,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
> * begun. Linking to the tree will have caused this to be incremented,
> * which means we will get a false positive otherwise.
> */
> - vma->vm_lock_seq = -1;
> + vma->vm_lock_seq = UINT_MAX;
>
> return vma;
> }
> @@ -212,7 +212,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
> int seq = vma->vm_lock_seq;
>
> /* We reset after each check. */
> - vma->vm_lock_seq = -1;
> + vma->vm_lock_seq = UINT_MAX;
>
> /* The vma_start_write() stub simply increments this value. */
> return seq > -1;
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index c5b9da034511..4007ec580f85 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -231,7 +231,7 @@ struct vm_area_struct {
> * counter reuse can only lead to occasional unnecessary use of the
> * slowpath.
> */
> - int vm_lock_seq;
> + unsigned int vm_lock_seq;
> struct vma_lock *vm_lock;
> #endif
>
> @@ -406,7 +406,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
> return false;
>
> init_rwsem(&vma->vm_lock->lock);
> - vma->vm_lock_seq = -1;
> + vma->vm_lock_seq = UINT_MAX;
>
> return true;
> }
> --
> 2.47.0.338.g60cca15819-goog
>
Powered by blists - more mailing lists