linux-kernel - Re: [RFC PATCH v3 4/5] riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJF2gTQY5RX87Zo8HcM1Og-Oc6vd5Vyj97KL-o6UcqMaT4oxng@mail.gmail.com>
Date:   Sat, 5 Aug 2023 12:24:20 +0800
From:   Guo Ren <guoren@...nel.org>
To:     Leonardo Bras Soares Passos <leobras@...hat.com>
Cc:     Will Deacon <will@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Boqun Feng <boqun.feng@...il.com>,
        Mark Rutland <mark.rutland@....com>,
        Paul Walmsley <paul.walmsley@...ive.com>,
        Palmer Dabbelt <palmer@...belt.com>,
        Albert Ou <aou@...s.berkeley.edu>,
        Andrea Parri <parri.andrea@...il.com>,
        Geert Uytterhoeven <geert@...ux-m68k.org>,
        Andrzej Hajda <andrzej.hajda@...el.com>,
        Palmer Dabbelt <palmer@...osinc.com>,
        linux-kernel@...r.kernel.org, linux-riscv@...ts.infradead.org
Subject: Re: [RFC PATCH v3 4/5] riscv/cmpxchg: Implement cmpxchg for variables
 of size 1 and 2

On Sat, Aug 5, 2023 at 11:14 AM Leonardo Bras Soares Passos
<leobras@...hat.com> wrote:
>
> Hello Guo Ren, thanks for the feedback!
>
> On Fri, Aug 4, 2023 at 2:45 PM Guo Ren <guoren@...nel.org> wrote:
> >
> > On Fri, Aug 4, 2023 at 4:49 AM Leonardo Bras <leobras@...hat.com> wrote:
> > >
> > > cmpxchg for variables of size 1-byte and 2-bytes is not yet available for
> > > riscv, even though its present in other architectures such as arm64 and
> > > x86. This could lead to not being able to implement some locking mechanisms
> > > or requiring some rework to make it work properly.
> > >
> > > Implement 1-byte and 2-bytes cmpxchg in order to achieve parity with other
> > > architectures.
> > >
> > > Signed-off-by: Leonardo Bras <leobras@...hat.com>
> > > ---
> > >  arch/riscv/include/asm/cmpxchg.h | 35 ++++++++++++++++++++++++++++++++
> > >  1 file changed, 35 insertions(+)
> > >
> > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > > index 5a07646fae65..dfb433ac544f 100644
> > > --- a/arch/riscv/include/asm/cmpxchg.h
> > > +++ b/arch/riscv/include/asm/cmpxchg.h
> > > @@ -72,6 +72,36 @@
> > >   * indicated by comparing RETURN with OLD.
> > >   */
> > >
> > > +#define __arch_cmpxchg_mask(sc_sfx, prepend, append, r, p, o, n)       \
> > > +({                                                                     \
> > > +       /* Depends on 2-byte variables being 2-byte aligned */          \
> > > +       ulong __s = ((ulong)(p) & 0x3) * BITS_PER_BYTE;                 \
> > > +       ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
> > > +                       << __s;                                         \
> > > +       ulong __newx = (ulong)(n) << __s;                               \
> > > +       ulong __oldx = (ulong)(o) << __s;                               \
> > > +       ulong __retx;                                                   \
> > > +       register unsigned int __rc;                                     \
> > > +                                                                       \
> > > +       __asm__ __volatile__ (                                          \
> > > +               prepend                                                 \
> > > +               "0:     lr.w %0, %2\n"                                  \
> > > +               "       and  %0, %0, %z5\n"                             \
> > > +               "       bne  %0, %z3, 1f\n"                             \
>
> > bug:
> > -               "       and  %0, %0, %z5\n"                             \
> > -               "       bne  %0, %z3, 1f\n"                             \
> > +               "       and  %1, %0, %z5\n"                             \
> > +               "       bne  %1, %z3, 1f\n"                             \
> > Your code breaks the %0.
>
> What do you mean by breaks here?
>
> In the end of this macro, I intended  to have __retx = (*p & __mask)
> which means the value is clean to be rotated at the end of the macro
> (no need to apply the mask again): r = __ret >> __s;
>
> Also, I assumed we are supposed to return the same variable type
> as the pointer, so this is valid:
> u8 a, *b, c;
> a = xchg(b, c);
>
> Is this correct?
I missed your removing "__ret & mask" at the end. So this may not the problem.

Your patch can't boot. After chewing your code for several hours, I
found a problem:
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 943f094375c7..67bcce63b267 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -14,6 +14,7 @@
 #define __arch_xchg_mask(prepend, append, r, p, n)                     \
 ({                                                                     \
        /* Depends on 2-byte variables being 2-byte aligned */          \
+       volatile ulong *__p = (ulong *)((ulong)(p) & ~0x3);             \
        ulong __s = ((ulong)(p) & 0x3) * BITS_PER_BYTE;                 \
        ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
                        << __s;                                         \
@@ -29,7 +30,7 @@
               "        sc.w %1, %1, %2\n"                              \
               "        bnez %1, 0b\n"                                  \
               append                                                   \
-              : "=&r" (__retx), "=&r" (__rc), "+A" (*(p))              \
+              : "=&r" (__retx), "=&r" (__rc), "+A" (*(__p))            \
               : "rJ" (__newx), "rJ" (~__mask)                          \
               : "memory");                                             \
                                                                        \
@@ -106,6 +107,7 @@
 #define __arch_cmpxchg_mask(sc_sfx, prepend, append, r, p, o, n)       \
 ({                                                                     \
        /* Depends on 2-byte variables being 2-byte aligned */          \
+       volatile ulong *__p = (ulong *)((ulong)(p) & ~0x3);             \
        ulong __s = ((ulong)(p) & 0x3) * BITS_PER_BYTE;                 \
        ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
                        << __s;                                         \
@@ -125,7 +127,7 @@
                "       bnez %1, 0b\n"                                  \
                append                                                  \
                "1:\n"                                                  \
-               : "=&r" (__retx), "=&r" (__rc), "+A" (*(p))             \
+               : "=&r" (__retx), "=&r" (__rc), "+A" (*(__p))           \
                : "rJ" ((long)__oldx), "rJ" (__newx),                   \
                  "rJ" (__mask), "rJ" (~__mask)                         \
                : "memory");                                            \

But the lkvm-static still can't boot with paravirt_spinlock .... Are
there any atomic tests in the Linux?

I found you use some "register int variables". Would it cause the problem?

You can reference this file, and it has passed the lock torture test:
https://github.com/guoren83/linux/blob/sg2042-master-qspinlock-64ilp32_v4/arch/riscv/include/asm/cmpxchg.h

I also merged your patches with the qspinlock series: (Use the above
cmpxchg.h the lkvm would run normally.)
https://github.com/guoren83/linux/tree/qspinlock_v11



>
> > > +               append                                                  \
> > > +               "1:\n"                                                  \
> > > +               : "=&r" (__retx), "=&r" (__rc), "+A" (*(p))             \
> > > +               : "rJ" ((long)__oldx), "rJ" (__newx),                   \
> > > +                 "rJ" (__mask), "rJ" (~__mask)                         \
> > > +               : "memory");                                            \
> > > +                                                                       \
> > > +       r = (__typeof__(*(p)))(__retx >> __s);                          \
> > > +})
> > > +
> > >
> > >  #define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n)        \
> > >  ({                                                                     \
> > > @@ -98,6 +128,11 @@
> > >         __typeof__(*(ptr)) __ret;                                       \
> > >                                                                         \
> > >         switch (sizeof(*__ptr)) {                                       \
> > > +       case 1:                                                         \
> > > +       case 2:                                                         \
> > > +               __arch_cmpxchg_mask(sc_sfx, prepend, append,            \
> > > +                                       __ret, __ptr, __old, __new);    \
> > > +               break;                                                  \
> > >         case 4:                                                         \
> > >                 __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
> > >                                 __ret, __ptr, (long), __old, __new);    \
> > > --
> > > 2.41.0
> > >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
> >
>


--
Best Regards
 Guo Ren