[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHVXubgtuiH4sTCv23xwSh-=rsr-V=Hyt6TMts4RrM6x8Kupig@mail.gmail.com>
Date: Thu, 1 Aug 2024 08:15:45 +0200
From: Alexandre Ghiti <alexghiti@...osinc.com>
To: Andrew Jones <ajones@...tanamicro.com>
Cc: Jonathan Corbet <corbet@....net>, Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>, Albert Ou <aou@...s.berkeley.edu>,
Conor Dooley <conor@...nel.org>, Rob Herring <robh@...nel.org>,
Krzysztof Kozlowski <krzk+dt@...nel.org>, Andrea Parri <parri.andrea@...il.com>,
Nathan Chancellor <nathan@...nel.org>, Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>, Boqun Feng <boqun.feng@...il.com>,
Arnd Bergmann <arnd@...db.de>, Leonardo Bras <leobras@...hat.com>, Guo Ren <guoren@...nel.org>,
linux-doc@...r.kernel.org, devicetree@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-riscv@...ts.infradead.org,
linux-arch@...r.kernel.org, Andrea Parri <andrea@...osinc.com>
Subject: Re: [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg()
Hi Drew,
On Wed, Jul 31, 2024 at 11:59 AM Andrew Jones <ajones@...tanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 09:23:58AM GMT, Alexandre Ghiti wrote:
> > The current fully-ordered cmpxchgXX() implementation results in:
> >
> > amocas.X.rl a5,a4,(s1)
> > fence rw,rw
> >
> > This provides enough sync but we can actually use the following better
> > mapping instead:
> >
> > amocas.X.aqrl a5,a4,(s1)
>
> We won't get release semantics if the exchange fails. Does that matter?
>
> >
> > Suggested-by: Andrea Parri <andrea@...osinc.com>
> > Signed-off-by: Alexandre Ghiti <alexghiti@...osinc.com>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 72 +++++++++++++++++++-------------
> > 1 file changed, 44 insertions(+), 28 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index ebcd4a30ae60..391730367213 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -107,8 +107,10 @@
> > * store NEW in MEM. Return the initial value in MEM. Success is
> > * indicated by comparing RETURN with OLD.
> > */
> > -
> > -#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> > +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + r, p, o, n) \
> > ({ \
> > if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
> > IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> > @@ -117,9 +119,9 @@
> > r = o; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + cas_prepend \
> > " amocas" cas_sfx " %0, %z2, %1\n" \
> > - append \
> > + cas_append \
> > : "+&r" (r), "+A" (*(p)) \
> > : "rJ" (n) \
> > : "memory"); \
> > @@ -134,7 +136,7 @@
> > ulong __rc; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + sc_prepend \
> > "0: lr.w %0, %2\n" \
> > " and %1, %0, %z5\n" \
> > " bne %1, %z3, 1f\n" \
> > @@ -142,7 +144,7 @@
> > " or %1, %1, %z4\n" \
> > " sc.w" sc_sfx " %1, %1, %2\n" \
> > " bnez %1, 0b\n" \
> > - append \
> > + sc_append \
> > "1:\n" \
> > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
> > : "rJ" ((long)__oldx), "rJ" (__newx), \
> > @@ -153,16 +155,19 @@
> > } \
> > })
> >
> > -#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> > +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + r, p, co, o, n) \
> > ({ \
> > if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> > riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
> > r = o; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > - " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> > - append \
> > + cas_prepend \
> > + " amocas" cas_sfx " %0, %z2, %1\n" \
> > + cas_append \
> > : "+&r" (r), "+A" (*(p)) \
> > : "rJ" (n) \
> > : "memory"); \
> > @@ -170,12 +175,12 @@
> > register unsigned int __rc; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + sc_prepend \
> > "0: lr" lr_sfx " %0, %2\n" \
> > " bne %0, %z3, 1f\n" \
> > - " sc" sc_cas_sfx " %1, %z4, %2\n" \
> > + " sc" sc_sfx " %1, %z4, %2\n" \
>
> nit: If patch3 hadn't renamed sc_sfx to sc_cas_sfx then we wouldn't
> need to rename it again now.
You're right, if you don't mind I'll leave it as is though as it makes
the previous patch more consistent.
>
> > " bnez %1, 0b\n" \
> > - append \
> > + sc_append \
> > "1:\n" \
> > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> > : "rJ" (co o), "rJ" (n) \
> > @@ -183,7 +188,9 @@
> > } \
> > })
> >
> > -#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \
> > +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append) \
> > ({ \
> > __typeof__(ptr) __ptr = (ptr); \
> > __typeof__(*(__ptr)) __old = (old); \
> > @@ -192,22 +199,28 @@
> > \
> > switch (sizeof(*__ptr)) { \
> > case 1: \
> > - __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \
> > - prepend, append, \
> > - __ret, __ptr, __old, __new); \
> > + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, __old, __new); \
> > break; \
> > case 2: \
> > - __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \
> > - prepend, append, \
> > - __ret, __ptr, __old, __new); \
> > + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, __old, __new); \
> > break; \
> > case 4: \
> > - __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \
> > - __ret, __ptr, (long), __old, __new); \
> > + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, (long), __old, __new); \
> > break; \
> > case 8: \
> > - __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \
> > - __ret, __ptr, /**/, __old, __new); \
> > + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, /**/, __old, __new); \
> > break; \
> > default: \
> > BUILD_BUG(); \
> > @@ -216,16 +229,19 @@
> > })
> >
> > #define arch_cmpxchg_relaxed(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", "", "")
> > + _arch_cmpxchg((ptr), (o), (n), "", "", "", "", "", "")
> >
> > #define arch_cmpxchg_acquire(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
> > + _arch_cmpxchg((ptr), (o), (n), "", "", \
> > + "", RISCV_ACQUIRE_BARRIER, "", RISCV_ACQUIRE_BARRIER)
> >
> > #define arch_cmpxchg_release(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
> > + _arch_cmpxchg((ptr), (o), (n), "", "", \
> > + RISCV_RELEASE_BARRIER, "", RISCV_RELEASE_BARRIER, "")
> >
> > #define arch_cmpxchg(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
> > + _arch_cmpxchg((ptr), (o), (n), ".rl", ".aqrl", \
> > + "", RISCV_FULL_BARRIER, "", "")
>
> These aren't the easiest things to read, but I can't think of a way to
> improve it other than maybe some macro annotations. E.g.
>
> #define SC_SFX(x) x
> #define CAS_SFX(x) x
> #define SC_PREPEND(x) x
> #define SC_APPEND(x) x
> #define CAS_PREPEND(x) x
> #define CAS_APPEND(x) x
>
> #define arch_cmpxchg(ptr, o, n) \
> _arch_cmpxchg(ptr, o, n, \
> SC_SFX(".rl"), CAS_SFX(".aqrl"), \
> SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
> CAS_PREPEND(""), CAS_APPEND(""))
That's a very good idea, it's been hard to review even for me :)
I could add comments too, but I like your solution, so unless I find
something better in the next 30min, I'll implement that.
Thanks,
Alex
>
> >
> > #define arch_cmpxchg_local(ptr, o, n) \
> > arch_cmpxchg_relaxed((ptr), (o), (n))
> > --
> > 2.39.2
> >
>
> Thanks,
> drew
Powered by blists - more mailing lists