[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220922225642.3058176-1-kafai@fb.com>
Date: Thu, 22 Sep 2022 15:56:42 -0700
From: Martin KaFai Lau <kafai@...com>
To: <bpf@...r.kernel.org>
CC: Alexei Starovoitov <ast@...nel.org>,
Andrii Nakryiko <andrii@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, <kernel-team@...com>,
<netdev@...r.kernel.org>, Martin KaFai Lau <martin.lau@...nel.org>
Subject: [PATCH bpf-next 4/5] bpf: Stop bpf_setsockopt(TCP_CONGESTION) in init ops to recur itself
From: Martin KaFai Lau <martin.lau@...nel.org>
When a bad bpf prog '.init' calls
bpf_setsockopt(TCP_CONGESTION, "itself"), it will trigger this loop:
.init => bpf_setsockopt(tcp_cc) => .init => bpf_setsockopt(tcp_cc) ...
... => .init => bpf_setsockopt(tcp_cc).
It was prevented by the prog->active counter before but the prog->active
detection cannot be used in struct_ops as explained in the earlier
patch of the set.
In this patch, the second bpf_setsockopt(tcp_cc) is not allowed
in order to break the loop. This is done by checking the
previous bpf_run_ctx has saved the same sk pointer in the
bpf_cookie.
Note that this essentially limits only the first '.init' can
call bpf_setsockopt(TCP_CONGESTION) to pick a fallback cc (eg. peer
does not support ECN) and the second '.init' cannot fallback to
another cc. This applies even the second
bpf_setsockopt(TCP_CONGESTION) will not cause a loop.
Signed-off-by: Martin KaFai Lau <martin.lau@...nel.org>
---
include/linux/filter.h | 3 +++
net/core/filter.c | 4 ++--
net/ipv4/bpf_tcp_ca.c | 54 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 98e28126c24b..9942ecc68a45 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -911,6 +911,9 @@ int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
+int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen);
+
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#define __bpf_call_base_args \
((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
diff --git a/net/core/filter.c b/net/core/filter.c
index f4cea3ff994a..e56a1ebcf1bc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5244,8 +5244,8 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
return -EINVAL;
}
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen)
+int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
{
if (sk_fullsock(sk))
sock_owned_by_me(sk);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 6da16ae6a962..a9f2cab5ffbc 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -144,6 +144,57 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_init_ops_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ struct bpf_tramp_run_ctx *run_ctx, *saved_run_ctx;
+ int ret;
+
+ if (optname != TCP_CONGESTION)
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+
+ run_ctx = (struct bpf_tramp_run_ctx *)current->bpf_ctx;
+ if (unlikely(run_ctx->saved_run_ctx &&
+ run_ctx->saved_run_ctx->type == BPF_RUN_CTX_TYPE_STRUCT_OPS)) {
+ saved_run_ctx = (struct bpf_tramp_run_ctx *)run_ctx->saved_run_ctx;
+ /* It stops this looping
+ *
+ * .init => bpf_setsockopt(tcp_cc) => .init =>
+ * bpf_setsockopt(tcp_cc)" => .init => ....
+ *
+ * The second bpf_setsockopt(tcp_cc) is not allowed
+ * in order to break the loop when both .init
+ * are the same bpf prog.
+ *
+ * This applies even the second bpf_setsockopt(tcp_cc)
+ * does not cause a loop. This limits only the first
+ * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
+ * pick a fallback cc (eg. peer does not support ECN)
+ * and the second '.init' cannot fallback to
+ * another cc.
+ */
+ if (saved_run_ctx->bpf_cookie == (uintptr_t)sk)
+ return -EBUSY;
+ }
+
+ run_ctx->bpf_cookie = (uintptr_t)sk;
+ ret = _bpf_setsockopt(sk, level, optname, optval, optlen);
+ run_ctx->bpf_cookie = 0;
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_init_ops_setsockopt_proto = {
+ .func = bpf_init_ops_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static u32 prog_ops_moff(const struct bpf_prog *prog)
{
const struct btf_member *m;
@@ -169,6 +220,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_setsockopt:
+ if (prog_ops_moff(prog) ==
+ offsetof(struct tcp_congestion_ops, init))
+ return &bpf_init_ops_setsockopt_proto;
/* Does not allow release() to call setsockopt.
* release() is called when the current bpf-tcp-cc
* is retiring. It is not allowed to call
--
2.30.2
Powered by blists - more mailing lists