lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240103-8a6aba29ada25eb1ab004687@orel>
Date: Wed, 3 Jan 2024 07:49:44 +0100
From: Andrew Jones <ajones@...tanamicro.com>
To: Guo Ren <guoren@...nel.org>
Cc: paul.walmsley@...ive.com, palmer@...belt.com, 
	panqinglin2020@...as.ac.cn, bjorn@...osinc.com, conor.dooley@...rochip.com, 
	leobras@...hat.com, peterz@...radead.org, keescook@...omium.org, 
	wuwei2016@...as.ac.cn, xiaoguang.xing@...hgo.com, chao.wei@...hgo.com, 
	unicorn_wang@...look.com, uwu@...nowy.me, jszhang@...nel.org, wefu@...hat.com, 
	atishp@...shpatra.org, linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org, 
	Guo Ren <guoren@...ux.alibaba.com>
Subject: Re: Re: [PATCH V2 1/3] riscv: Add Zicbop instruction definitions &
 cpufeature

On Wed, Jan 03, 2024 at 02:13:00PM +0800, Guo Ren wrote:
> On Tue, Jan 2, 2024 at 6:32 PM Andrew Jones <ajones@...tanamicro.com> wrote:
> >
> > On Sun, Dec 31, 2023 at 03:29:51AM -0500, guoren@...nel.org wrote:
> > > From: Guo Ren <guoren@...ux.alibaba.com>
> > >
> > > Cache-block prefetch instructions are HINTs to the hardware to
> > > indicate that software intends to perform a particular type of
> > > memory access in the near future. This patch adds prefetch.i,
> > > prefetch.r and prefetch.w instruction definitions by
> > > RISCV_ISA_EXT_ZICBOP cpufeature.
> >
> > It also adds S-type instruction encoding support which isn't mentioned.
> > Actually, it'd probably be best to put the new instruction encoding in
> > its own separate patch.
> Okay, I would separate the instruction encoding patch in the next version.
> 
> >
> > >
> > > Signed-off-by: Guo Ren <guoren@...ux.alibaba.com>
> > > Signed-off-by: Guo Ren <guoren@...nel.org>
> > > ---
> > >  arch/riscv/Kconfig                | 15 ++++++++
> > >  arch/riscv/include/asm/hwcap.h    |  1 +
> > >  arch/riscv/include/asm/insn-def.h | 60 +++++++++++++++++++++++++++++++
> > >  arch/riscv/kernel/cpufeature.c    |  1 +
> > >  4 files changed, 77 insertions(+)
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 24c1799e2ec4..fcbd417d65ea 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -579,6 +579,21 @@ config RISCV_ISA_ZICBOZ
> > >
> > >          If you don't know what to do here, say Y.
> > >
> > > +config RISCV_ISA_ZICBOP
> > > +     bool "Zicbop extension support for cache block prefetch"
> > > +     depends on MMU
> > > +     depends on RISCV_ALTERNATIVE
> > > +     default y
> > > +     help
> > > +       Adds support to dynamically detect the presence of the ZICBOP
> > > +       extension (Cache Block Prefetch Operations) and enable its
> > > +       usage.
> > > +
> > > +       The Zicbop extension can be used to prefetch cache block for
> >
> > blocks
> >
> > > +       read/write fetch.
> > > +
> > > +       If you don't know what to do here, say Y.
> > > +
> > >  config TOOLCHAIN_HAS_ZIHINTPAUSE
> > >       bool
> > >       default y
> > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > > index 06d30526ef3b..77d3b6ee25ab 100644
> > > --- a/arch/riscv/include/asm/hwcap.h
> > > +++ b/arch/riscv/include/asm/hwcap.h
> > > @@ -57,6 +57,7 @@
> > >  #define RISCV_ISA_EXT_ZIHPM          42
> > >  #define RISCV_ISA_EXT_SMSTATEEN              43
> > >  #define RISCV_ISA_EXT_ZICOND         44
> > > +#define RISCV_ISA_EXT_ZICBOP         45
> > >
> > >  #define RISCV_ISA_EXT_MAX            64
> > >
> > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
> > > index e27179b26086..bbda350a63bf 100644
> > > --- a/arch/riscv/include/asm/insn-def.h
> > > +++ b/arch/riscv/include/asm/insn-def.h
> > > @@ -18,6 +18,13 @@
> > >  #define INSN_I_RD_SHIFT                       7
> > >  #define INSN_I_OPCODE_SHIFT           0
> > >
> > > +#define INSN_S_SIMM7_SHIFT           25
> > > +#define INSN_S_RS2_SHIFT             20
> > > +#define INSN_S_RS1_SHIFT             15
> > > +#define INSN_S_FUNC3_SHIFT           12
> > > +#define INSN_S_SIMM5_SHIFT            7
> > > +#define INSN_S_OPCODE_SHIFT           0
> > > +
> > >  #ifdef __ASSEMBLY__
> > >
> > >  #ifdef CONFIG_AS_HAS_INSN
> > > @@ -30,6 +37,10 @@
> > >       .insn   i \opcode, \func3, \rd, \rs1, \simm12
> > >       .endm
> > >
> > > +     .macro insn_s, opcode, func3, rs2, simm12, rs1
> > > +     .insn   s \opcode, \func3, \rs2, \simm12(\rs1)
> > > +     .endm
> > > +
> > >  #else
> > >
> > >  #include <asm/gpr-num.h>
> > > @@ -51,10 +62,20 @@
> > >                (\simm12 << INSN_I_SIMM12_SHIFT))
> > >       .endm
> > >
> > > +     .macro insn_s, opcode, func3, rs2, simm12, rs1
> > > +     .4byte  ((\opcode << INSN_S_OPCODE_SHIFT) |             \
> > > +              (\func3 << INSN_S_FUNC3_SHIFT) |               \
> > > +              (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) |       \
> > > +              (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) |       \
> > > +              ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) |     \
> > > +              (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
> > > +     .endm
> > > +
> > >  #endif
> > >
> > >  #define __INSN_R(...)        insn_r __VA_ARGS__
> > >  #define __INSN_I(...)        insn_i __VA_ARGS__
> > > +#define __INSN_S(...)        insn_s __VA_ARGS__
> > >
> > >  #else /* ! __ASSEMBLY__ */
> > >
> > > @@ -66,6 +87,9 @@
> > >  #define __INSN_I(opcode, func3, rd, rs1, simm12)     \
> > >       ".insn  i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
> > >
> > > +#define __INSN_S(opcode, func3, rs2, simm12, rs1)    \
> > > +     ".insn  s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
> > > +
> > >  #else
> > >
> > >  #include <linux/stringify.h>
> > > @@ -92,12 +116,26 @@
> > >  "             (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
> > >  "    .endm\n"
> > >
> > > +#define DEFINE_INSN_S                                                        \
> > > +     __DEFINE_ASM_GPR_NUMS                                           \
> > > +"    .macro insn_s, opcode, func3, rs2, simm12, rs1\n"               \
> > > +"    .4byte  ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |"  \
> > > +"             (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |"    \
> > > +"             (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
> > > +"             (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
> > > +"             ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
> > > +"             (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
> > > +"    .endm\n"
> > > +
> > >  #define UNDEFINE_INSN_R                                                      \
> > >  "    .purgem insn_r\n"
> > >
> > >  #define UNDEFINE_INSN_I                                                      \
> > >  "    .purgem insn_i\n"
> > >
> > > +#define UNDEFINE_INSN_S                                                      \
> > > +"    .purgem insn_s\n"
> > > +
> > >  #define __INSN_R(opcode, func3, func7, rd, rs1, rs2)                 \
> > >       DEFINE_INSN_R                                                   \
> > >       "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
> > > @@ -108,6 +146,11 @@
> > >       "insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
> > >       UNDEFINE_INSN_I
> > >
> > > +#define __INSN_S(opcode, func3, rs2, simm12, rs1)                    \
> > > +     DEFINE_INSN_S                                                   \
> > > +     "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n"  \
> > > +     UNDEFINE_INSN_S
> > > +
> > >  #endif
> > >
> > >  #endif /* ! __ASSEMBLY__ */
> > > @@ -120,6 +163,10 @@
> > >       __INSN_I(RV_##opcode, RV_##func3, RV_##rd,              \
> > >                RV_##rs1, RV_##simm12)
> > >
> > > +#define INSN_S(opcode, func3, rs2, simm12, rs1)                      \
> > > +     __INSN_S(RV_##opcode, RV_##func3, RV_##rs2,             \
> > > +              RV_##simm12, RV_##rs1)
> > > +
> > >  #define RV_OPCODE(v)         __ASM_STR(v)
> > >  #define RV_FUNC3(v)          __ASM_STR(v)
> > >  #define RV_FUNC7(v)          __ASM_STR(v)
> > > @@ -133,6 +180,7 @@
> > >  #define RV___RS2(v)          __RV_REG(v)
> > >
> > >  #define RV_OPCODE_MISC_MEM   RV_OPCODE(15)
> > > +#define RV_OPCODE_OP_IMM     RV_OPCODE(19)
> > >  #define RV_OPCODE_SYSTEM     RV_OPCODE(115)
> > >
> > >  #define HFENCE_VVMA(vaddr, asid)                             \
> > > @@ -196,4 +244,16 @@
> > >       INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
> > >              RS1(base), SIMM12(4))
> > >
> > > +#define CBO_PREFETCH_I(base, offset)                         \
> > > +     INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0),               \
> > > +            SIMM12(offset), RS1(base))
> > > +
> > > +#define CBO_PREFETCH_R(base, offset)                         \
> > > +     INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1),               \
> > > +            SIMM12(offset), RS1(base))
> > > +
> > > +#define CBO_PREFETCH_W(base, offset)                         \
> > > +     INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3),               \
> > > +            SIMM12(offset), RS1(base))
> >
> > Shouldn't we ensure the lower 5-bits of offset are zero by masking it?
> The spec says:
> "These instructions operate on the cache block whose effective address
> is the sum of the base address specified in rs1 and the sign-extended
> offset encoded in imm[11:0], where imm[4:0] shall equal 0b00000. The
> effective address is translated into a corresponding physical address
> by the appropriate translation mechanisms."
> 
> So, the user of prefetch.w should keep imm[4:0] zero.

Yes, the user _should_ keep imm[4:0] zero. Unless we can validate at
compile time that all users have passed offsets with the lower 5-bits
set to zero, then I think we should mask them here, since I'd rather
not provide the user a footgun.

> Just like the
> patch has done, the whole imm[11:0] is zero.

That's just one possible use, and I think exposing the offset operand to
users makes sense for unrolled sequences of invocations, so I wouldn't
count on offset always being zero.

Thanks,
drew

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ