[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20171023185814.4797-6-jakub.kicinski@netronome.com>
Date: Mon, 23 Oct 2017 11:58:10 -0700
From: Jakub Kicinski <jakub.kicinski@...ronome.com>
To: netdev@...r.kernel.org
Cc: oss-drivers@...ronome.com,
Jakub Kicinski <jakub.kicinski@...ronome.com>
Subject: [PATCH net-next 5/9] nfp: bpf: optimize the RMW for stack accesses
When we are performing unaligned stack accesses in the 32-64B window
we have to do a read-modify-write cycle. E.g. for reading 8 bytes
from address 17:
0: tmp = stack[16]
1: gprLo = tmp >> 8
2: tmp = stack[20]
3: gprLo |= tmp << 24
4: tmp = stack[20]
5: gprHi = tmp >> 8
6: tmp = stack[24]
7: gprHi |= tmp << 24
The load on line 4 is unnecessary, because tmp already contains data
from stack[20].
For write we can optimize both loads and writebacks away.
Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
---
drivers/net/ethernet/netronome/nfp/bpf/jit.c | 33 +++++++++++++++++++++-------
1 file changed, 25 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 094acea35326..6730690cf9d8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -644,11 +644,11 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
typedef int
(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
- unsigned int size, bool new_gpr);
+ unsigned int size, bool first, bool new_gpr, bool last);
static int
wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
- unsigned int size, bool new_gpr)
+ unsigned int size, bool first, bool new_gpr, bool last)
{
u32 idx, src_byte;
enum shf_sc sc;
@@ -692,7 +692,13 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
reg = reg_lm(0, idx);
} else {
reg = imm_a(nfp_prog);
- wrp_mov(nfp_prog, reg, reg_lm(0, idx));
+ /* If it's not the first part of the load and we start a new GPR
+ * that means we are loading a second part of the LMEM word into
+ * a new GPR. IOW we've already looked that LMEM word and
+ * therefore it has been loaded into imm_a().
+ */
+ if (first || !new_gpr)
+ wrp_mov(nfp_prog, reg, reg_lm(0, idx));
}
emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
@@ -702,7 +708,7 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
static int
wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
- unsigned int size, bool new_gpr)
+ unsigned int size, bool first, bool new_gpr, bool last)
{
u32 idx, dst_byte;
enum shf_sc sc;
@@ -746,13 +752,19 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
reg = reg_lm(0, idx);
} else {
reg = imm_a(nfp_prog);
- wrp_mov(nfp_prog, reg, reg_lm(0, idx));
+ /* Only first and last LMEM locations are going to need RMW,
+ * the middle location will be overwritten fully.
+ */
+ if (first || last)
+ wrp_mov(nfp_prog, reg, reg_lm(0, idx));
}
emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
- if (idx > RE_REG_LM_IDX_MAX)
- wrp_mov(nfp_prog, reg_lm(0, idx), reg);
+ if (new_gpr || last) {
+ if (idx > RE_REG_LM_IDX_MAX)
+ wrp_mov(nfp_prog, reg_lm(0, idx), reg);
+ }
return 0;
}
@@ -762,6 +774,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int size, u8 gpr, bool clr_gpr, lmem_step step)
{
s32 off = nfp_prog->stack_depth + meta->insn.off;
+ bool first = true, last;
u8 prev_gpr = 255;
u32 gpr_byte = 0;
int ret;
@@ -777,12 +790,16 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
slice_end = min(off + slice_size, round_up(off + 1, 4));
slice_size = slice_end - off;
+ last = slice_size == size;
+
ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
- gpr != prev_gpr);
+ first, gpr != prev_gpr, last);
if (ret)
return ret;
prev_gpr = gpr;
+ first = false;
+
gpr_byte += slice_size;
if (gpr_byte >= 4) {
gpr_byte -= 4;
--
2.14.1
Powered by blists - more mailing lists