[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250630121430.1989-1-cp0613@linux.alibaba.com>
Date: Mon, 30 Jun 2025 20:14:30 +0800
From: cp0613@...ux.alibaba.com
To: david.laight.linux@...il.com
Cc: alex@...ti.fr,
aou@...s.berkeley.edu,
arnd@...db.de,
cp0613@...ux.alibaba.com,
linux-arch@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-riscv@...ts.infradead.org,
linux@...musvillemoes.dk,
palmer@...belt.com,
paul.walmsley@...ive.com,
yury.norov@...il.com
Subject: Re: [PATCH 2/2] bitops: rotate: Add riscv implementation using Zbb extension
On Sun, 29 Jun 2025 11:38:40 +0100, david.laight.linux@...il.com wrote:
> > It can be found that the zbb optimized implementation uses fewer instructions,
> > even for 16-bit and 8-bit data.
>
> Far too many register spills to stack.
> I think you've forgotten to specify -O2
Yes, I extracted it from the vmlinux disassembly, without compiling with -O2, and
I used the web tool you provided as follows:
```
unsigned int generic_ror32(unsigned int word, unsigned int shift)
{
return (word >> (shift & 31)) | (word << ((-shift) & 31));
}
unsigned int zbb_opt_ror32(unsigned int word, unsigned int shift)
{
#ifdef __riscv
__asm__ volatile("nop"); // ALTERNATIVE(nop)
__asm__ volatile(
".option push\n"
".option arch,+zbb\n"
"rorw %0, %1, %2\n"
".option pop\n"
: "=r" (word) : "r" (word), "r" (shift) :);
#endif
return word;
}
unsigned short generic_ror16(unsigned short word, unsigned int shift)
{
return (word >> (shift & 15)) | (word << ((-shift) & 15));
}
unsigned short zbb_opt_ror16(unsigned short word, unsigned int shift)
{
unsigned int word32 = ((unsigned int)word << 16) | word;
#ifdef __riscv
__asm__ volatile("nop"); // ALTERNATIVE(nop)
__asm__ volatile(
".option push\n"
".option arch,+zbb\n"
"rorw %0, %1, %2\n"
".option pop\n"
: "=r" (word32) : "r" (word32), "r" (shift) :);
#endif
return (unsigned short)word;
}
```
The disassembly obtained is:
```
generic_ror32:
andi a1,a1,31
negw a5,a1
sllw a5,a0,a5
srlw a0,a0,a1
or a0,a5,a0
ret
zbb_opt_ror32:
nop
rorw a0, a0, a1
sext.w a0,a0
ret
generic_ror16:
andi a1,a1,15
negw a5,a1
andi a5,a5,15
sllw a5,a0,a5
srlw a0,a0,a1
or a0,a0,a5
slli a0,a0,48
srli a0,a0,48
ret
zbb_opt_ror16:
slliw a5,a0,16
addw a5,a5,a0
nop
rorw a5, a5, a1
ret
```
Thanks,
Pei
Powered by blists - more mailing lists