[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <67a382a56d206_14e083294ef@willemb.c.googlers.com.notmuch>
Date: Wed, 05 Feb 2025 10:24:21 -0500
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Jason Xing <kerneljasonxing@...il.com>,
davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
dsahern@...nel.org,
willemdebruijn.kernel@...il.com,
willemb@...gle.com,
ast@...nel.org,
daniel@...earbox.net,
andrii@...nel.org,
martin.lau@...ux.dev,
eddyz87@...il.com,
song@...nel.org,
yonghong.song@...ux.dev,
john.fastabend@...il.com,
kpsingh@...nel.org,
sdf@...ichev.me,
haoluo@...gle.com,
jolsa@...nel.org,
horms@...nel.org
Cc: bpf@...r.kernel.org,
netdev@...r.kernel.org,
Jason Xing <kerneljasonxing@...il.com>
Subject: Re: [PATCH bpf-next v8 03/12] bpf: stop unsafely accessing TCP fields
in bpf callbacks
Jason Xing wrote:
> The "allow_tcp_access" flag is added to indicate that the callback
> site has a tcp_sock locked.
>
> Applying the new member allow_tcp_access in the existing callbacks
> where is_fullsock is set to 1 can stop UDP socket accessing struct
> tcp_sock and stop TCP socket without sk lock protecting does the
> similar thing, or else it could be catastrophe leading to panic.
>
> To keep it simple, instead of distinguishing between read and write
> access, users aren't allowed all read/write access to the tcp_sock
> through the older bpf_sock_ops ctx. The new timestamping callbacks
> can use newer helpers to read everything from a sk (e.g. bpf_core_cast),
> so nothing is lost.
>
> Signed-off-by: Jason Xing <kerneljasonxing@...il.com>
> ---
> include/linux/filter.h | 5 +++++
> include/net/tcp.h | 1 +
> net/core/filter.c | 8 ++++----
> net/ipv4/tcp_input.c | 2 ++
> net/ipv4/tcp_output.c | 2 ++
> 5 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index a3ea46281595..1569e9f31a8c 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1508,6 +1508,11 @@ struct bpf_sock_ops_kern {
> void *skb_data_end;
> u8 op;
> u8 is_fullsock;
> + u8 allow_tcp_access; /* Indicate that the callback site
> + * has a tcp_sock locked. Then it
> + * would be safe to access struct
> + * tcp_sock.
> + */
perhaps no need for explicit documentation if the variable name is
self documenting: is_locked_tcp_sock
> u8 remaining_opt_len;
> u64 temp; /* temp and everything after is not
> * initialized to 0 before calling
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 5b2b04835688..293047694710 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2649,6 +2649,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> if (sk_fullsock(sk)) {
> sock_ops.is_fullsock = 1;
> + sock_ops.allow_tcp_access = 1;
> sock_owned_by_me(sk);
> }
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1c6c07507a78..dc0e67c5776a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -10381,10 +10381,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> } \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, \
> - is_fullsock), \
> + allow_tcp_access), \
> fullsock_reg, si->src_reg, \
> offsetof(struct bpf_sock_ops_kern, \
> - is_fullsock)); \
> + allow_tcp_access)); \
> *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
> if (si->dst_reg == si->src_reg) \
> *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
> @@ -10469,10 +10469,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> temp)); \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, \
> - is_fullsock), \
> + allow_tcp_access), \
> reg, si->dst_reg, \
> offsetof(struct bpf_sock_ops_kern, \
> - is_fullsock)); \
> + allow_tcp_access)); \
> *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, sk),\
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index eb82e01da911..77185479ed5e 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -169,6 +169,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
> sock_ops.is_fullsock = 1;
> + sock_ops.allow_tcp_access = 1;
> sock_ops.sk = sk;
> bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
>
> @@ -185,6 +186,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = bpf_op;
> sock_ops.is_fullsock = 1;
> + sock_ops.allow_tcp_access = 1;
> sock_ops.sk = sk;
> /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
> if (skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 0e5b9a654254..695749807c09 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -522,6 +522,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.allow_tcp_access = 1;
> sock_ops.sk = sk;
> }
>
> @@ -567,6 +568,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.allow_tcp_access = 1;
> sock_ops.sk = sk;
> }
>
> --
> 2.43.5
>
Powered by blists - more mailing lists