[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <201902251554.x1PFsWmr017326@userv0122.oracle.com>
Date: Mon, 25 Feb 2019 07:54:32 -0800 (PST)
From: Kris Van Hees <kris.van.hees@...cle.com>
To: netdev@...r.kernel.org
Subject: [PATCH 1/2] bpf: context casting for tail call
Currently BPF programs are executed with a context that is provided by
code that initiates the execution. Tracing tools that want to make use
of existing probes and events that allow BPF programs to be attached to
them are thus limited to the context information provided by the probe
or event source. Often, more context is needed to allow tracing tools
the ablity to implement more complex constructs (e.g. more state-full
tracing).
This patch extends the tail-call mechanism to allow a BPF program of
one type to call a BPF program of another type. E.g. a kprobe BPF
program (working with a struct pt_regs context) can call a BPF program
with a more extensive context. The BPF program type is being extended
to provide can_cast() and cast_context() callback functions to handle
the context conversion.
The program array holding BPF programs that you can tail-call into
continues to require that all programs are of the same type. But when
a compatibility check is made in a program that performs a tail-call,
the can_cast() function is called (if available) to allow the target
type to determine whether it can handle the conversion of a context
from the source type to the target type. If can_cast() is not provided
by the program type, casting is denied.
During execution, the cast_context() function is called (if available)
to perform the conversion of the current context to the context that the
target type expects. Since the program type of the executing BPF program
is not explicitly known during execution, the verifier inserts an
instruction right before the tail-call to assign the current BPF program
type to R4.
The interpreter calls cast_context() using the program type in R4 as
source program type, the program type associated with the program array
as target program type, and the context as provided in R1.
The bpf_prog_types array is now being exposed to the rest of the BPF
code (where before it was local to just the syscall handling) because
the can_cast and cat_context operations need to be accessible.
There is no noticeable effect on BPF program types that do not implement
this new feature.
A JIT implementation is not available yet in this first iteration.
Signed-off-by: Kris Van Hees <kris.van.hees@...cle.com>
Reviewed-by: Nick Alcock <nick.alcock@...cle.com>
---
include/linux/bpf.h | 4 ++++
kernel/bpf/core.c | 27 ++++++++++++++++++++++++++-
kernel/bpf/syscall.c | 2 +-
kernel/bpf/verifier.c | 16 ++++++++++++----
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de18227b3d95..117d2bae51b9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -281,6 +281,9 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
struct bpf_prog_ops {
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+ bool (*can_cast)(enum bpf_prog_type stype, enum bpf_prog_type ttype);
+ void *(*cast_context)(enum bpf_prog_type stype,
+ enum bpf_prog_type ttype, void *ctx);
};
struct bpf_verifier_ops {
@@ -528,6 +531,7 @@ extern const struct file_operations bpf_prog_fops;
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
+extern const struct bpf_prog_ops * const bpf_prog_types[];
extern const struct bpf_prog_ops bpf_offload_prog_ops;
extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
extern const struct bpf_verifier_ops xdp_analyzer_ops;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef88b167959d..1b7c718d4e9d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1426,10 +1426,12 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
CONT;
JMP_TAIL_CALL: {
+ void *ctx = (void *) (unsigned long) BPF_R1;
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog;
u32 index = BPF_R3;
+ u32 type = BPF_R4;
if (unlikely(index >= array->map.max_entries))
goto out;
@@ -1441,6 +1443,14 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
prog = READ_ONCE(array->ptrs[index]);
if (!prog)
goto out;
+ if (prog->aux->ops->cast_context) {
+ ctx = prog->aux->ops->cast_context(type, prog->type,
+ ctx);
+ if (!ctx)
+ goto out;
+
+ BPF_R1 = (u64) ctx;
+ }
/* ARG1 at this point is guaranteed to point to CTX from
* the verifier side due to the fact that the tail call is
@@ -1637,6 +1647,20 @@ bool bpf_prog_array_compatible(struct bpf_array *array,
array->owner_jited == fp->jited;
}
+bool bpf_prog_array_can_cast(struct bpf_array *array, const struct bpf_prog *fp)
+{
+ const struct bpf_prog_ops *ops;
+
+ if (array->owner_jited != fp->jited)
+ return false;
+
+ ops = bpf_prog_types[array->owner_prog_type];
+ if (ops->can_cast)
+ return ops->can_cast(fp->type, array->owner_prog_type);
+
+ return false;
+}
+
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
@@ -1650,7 +1674,8 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
continue;
array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp))
+ if (!bpf_prog_array_compatible(array, fp) &&
+ !bpf_prog_array_can_cast(array, fp))
return -EINVAL;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec7c552af76b..d558d979100f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1110,7 +1110,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
return err;
}
-static const struct bpf_prog_ops * const bpf_prog_types[] = {
+const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b9496c41383..b49820e82cf8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7629,9 +7629,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
+ cnt = 0;
aux = &env->insn_aux_data[i + delta];
if (!bpf_map_ptr_unpriv(aux))
- continue;
+ goto privileged;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
@@ -7646,13 +7647,20 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
map_ptr = BPF_MAP_PTR(aux->map_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
- map_ptr->max_entries, 2);
+ map_ptr->max_entries, 3);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
- insn_buf[2] = *insn;
- cnt = 3;
+ cnt = 2;
+
+privileged:
+ /* store the BPF program type of the currnet program in
+ * R4 so it is known in case this tail call requires
+ * casting the context to a different program type
+ */
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_4, prog->type);
+ insn_buf[cnt++] = *insn;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
--
2.20.1
Powered by blists - more mailing lists