[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171215015517.409513-10-ast@kernel.org>
Date: Thu, 14 Dec 2017 17:55:13 -0800
From: Alexei Starovoitov <ast@...nel.org>
To: "David S . Miller" <davem@...emloft.net>
CC: Daniel Borkmann <daniel@...earbox.net>,
John Fastabend <john.fastabend@...il.com>,
Edward Cree <ecree@...arflare.com>,
Jakub Kicinski <jakub.kicinski@...ronome.com>,
<netdev@...r.kernel.org>, <kernel-team@...com>
Subject: [PATCH bpf-next 09/13] bpf: add support for bpf_call to interpreter
From: Alexei Starovoitov <ast@...com>
though bpf_call is still the same call instruction and
calling convention 'bpf to bpf' and 'bpf to helper' is the same
the interpreter has to oparate on 'struct bpf_insn *'.
To distinguish these two cases add a kernel internal opcode and
mark call insns with it.
This opcode is seen by interpreter only. JITs will never see it.
Also add tiny bit of debug code to aid interpreter debugging.
Signed-off-by: Alexei Starovoitov <ast@...nel.org>
Acked-by: Daniel Borkmann <daniel@...earbox.net>
---
include/linux/bpf.h | 1 +
include/linux/filter.h | 6 ++++
kernel/bpf/core.c | 90 ++++++++++++++++++++++++++++++++++++++++----------
kernel/bpf/verifier.c | 36 ++++++++++++++++++++
4 files changed, 116 insertions(+), 17 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 54dc7cae2949..8935f6f63d5f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -402,6 +402,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5feb441d3dd9..f26e6da1007b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -58,6 +58,9 @@ struct bpf_prog_aux;
/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0
+/* unused opcode to mark call to interpreter with arguments */
+#define BPF_CALL_ARGS 0xe0
+
/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
@@ -710,6 +713,9 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+#define __bpf_call_base_args \
+ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
+ __bpf_call_base)
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d32bebf4f2de..dc12c4fd006e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -217,30 +217,40 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
return 0;
}
-static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
-{
- return BPF_CLASS(insn->code) == BPF_JMP &&
- /* Call and Exit are both special jumps with no
- * target inside the BPF instruction image.
- */
- BPF_OP(insn->code) != BPF_CALL &&
- BPF_OP(insn->code) != BPF_EXIT;
-}
-
static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
{
struct bpf_insn *insn = prog->insnsi;
u32 i, insn_cnt = prog->len;
+ bool pseudo_call;
+ u8 code;
+ int off;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (!bpf_is_jmp_and_has_target(insn))
+ code = insn->code;
+ if (BPF_CLASS(code) != BPF_JMP)
continue;
+ if (BPF_OP(code) == BPF_EXIT)
+ continue;
+ if (BPF_OP(code) == BPF_CALL) {
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ pseudo_call = true;
+ else
+ continue;
+ } else {
+ pseudo_call = false;
+ }
+ off = pseudo_call ? insn->imm : insn->off;
/* Adjust offset of jmps if we cross boundaries. */
- if (i < pos && i + insn->off + 1 > pos)
- insn->off += delta;
- else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
- insn->off -= delta;
+ if (i < pos && i + off + 1 > pos)
+ off += delta;
+ else if (i > pos + delta && i + off + 1 <= pos + delta)
+ off -= delta;
+
+ if (pseudo_call)
+ insn->imm = off;
+ else
+ insn->off = off;
}
}
@@ -774,8 +784,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
*
* Decode and execute eBPF instructions.
*/
-static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
- u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
u64 tmp;
static const void *jumptable[256] = {
@@ -835,6 +844,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
@@ -1025,6 +1035,13 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
BPF_R4, BPF_R5);
CONT;
+ JMP_CALL_ARGS:
+ BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
+ BPF_R3, BPF_R4,
+ BPF_R5,
+ insn + insn->off + 1);
+ CONT;
+
JMP_TAIL_CALL: {
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1297,6 +1314,23 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
return ___bpf_prog_run(regs, insn, stack); \
}
+#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
+#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
+static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
+ const struct bpf_insn *insn) \
+{ \
+ u64 stack[stack_size / sizeof(u64)]; \
+ u64 regs[MAX_BPF_REG]; \
+\
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+ BPF_R1 = r1; \
+ BPF_R2 = r2; \
+ BPF_R3 = r3; \
+ BPF_R4 = r4; \
+ BPF_R5 = r5; \
+ return ___bpf_prog_run(regs, insn, stack); \
+}
+
#define EVAL1(FN, X) FN(X)
#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
@@ -1308,6 +1342,10 @@ EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
+EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
+EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
+EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
+
#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
static unsigned int (*interpreters[])(const void *ctx,
@@ -1316,6 +1354,24 @@ EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
+#undef PROG_NAME_LIST
+#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
+static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
+ const struct bpf_insn *insn) = {
+EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
+EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
+EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+};
+#undef PROG_NAME_LIST
+
+void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
+{
+ stack_depth = max_t(u32, stack_depth, 1);
+ insn->off = (s16) insn->imm;
+ insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
+ __bpf_call_base_args;
+ insn->code = BPF_JMP | BPF_CALL_ARGS;
+}
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index df1ded3faf1d..cdc1f043c69b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1458,6 +1458,21 @@ static int update_stack_depth(struct bpf_verifier_env *env,
return 0;
}
+static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int idx)
+{
+ int start = idx + insn->imm + 1, subprog;
+
+ subprog = find_subprog(env, start);
+ if (subprog < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ start);
+ return -EFAULT;
+ }
+ subprog++;
+ return env->subprog_stack_depth[subprog];
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -4997,6 +5012,24 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return 0;
}
+static int fixup_call_args(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ int i, depth;
+
+ for (i = 0; i < prog->len; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ depth = get_callee_stack_depth(env, insn, i);
+ if (depth < 0)
+ return depth;
+ bpf_patch_call_args(insn, depth);
+ }
+ return 0;
+}
+
/* fixup insn->imm field of bpf_call instructions
* and inline eligible helpers as explicit sequence of BPF instructions
*
@@ -5225,6 +5258,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (ret == 0)
ret = fixup_bpf_calls(env);
+ if (ret == 0)
+ ret = fixup_call_args(env);
+
if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
if (log->level && !log->ubuf) {
--
2.9.5
Powered by blists - more mailing lists