netdev - Re: [PATCH bpf-next v8 03/12] bpf: stop unsafely accessing TCP fields in bpf callbacks

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <67a382a56d206_14e083294ef@willemb.c.googlers.com.notmuch>
Date: Wed, 05 Feb 2025 10:24:21 -0500
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Jason Xing <kerneljasonxing@...il.com>, 
 davem@...emloft.net, 
 edumazet@...gle.com, 
 kuba@...nel.org, 
 pabeni@...hat.com, 
 dsahern@...nel.org, 
 willemdebruijn.kernel@...il.com, 
 willemb@...gle.com, 
 ast@...nel.org, 
 daniel@...earbox.net, 
 andrii@...nel.org, 
 martin.lau@...ux.dev, 
 eddyz87@...il.com, 
 song@...nel.org, 
 yonghong.song@...ux.dev, 
 john.fastabend@...il.com, 
 kpsingh@...nel.org, 
 sdf@...ichev.me, 
 haoluo@...gle.com, 
 jolsa@...nel.org, 
 horms@...nel.org
Cc: bpf@...r.kernel.org, 
 netdev@...r.kernel.org, 
 Jason Xing <kerneljasonxing@...il.com>
Subject: Re: [PATCH bpf-next v8 03/12] bpf: stop unsafely accessing TCP fields
 in bpf callbacks

Jason Xing wrote:
> The "allow_tcp_access" flag is added to indicate that the callback
> site has a tcp_sock locked.
> 
> Applying the new member allow_tcp_access in the existing callbacks
> where is_fullsock is set to 1 can stop UDP socket accessing struct
> tcp_sock and stop TCP socket without sk lock protecting does the
> similar thing, or else it could be catastrophe leading to panic.
> 
> To keep it simple, instead of distinguishing between read and write
> access, users aren't allowed all read/write access to the tcp_sock
> through the older bpf_sock_ops ctx. The new timestamping callbacks
> can use newer helpers to read everything from a sk (e.g. bpf_core_cast),
> so nothing is lost.
> 
> Signed-off-by: Jason Xing <kerneljasonxing@...il.com>
> ---
>  include/linux/filter.h | 5 +++++
>  include/net/tcp.h      | 1 +
>  net/core/filter.c      | 8 ++++----
>  net/ipv4/tcp_input.c   | 2 ++
>  net/ipv4/tcp_output.c  | 2 ++
>  5 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index a3ea46281595..1569e9f31a8c 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1508,6 +1508,11 @@ struct bpf_sock_ops_kern {
>  	void	*skb_data_end;
>  	u8	op;
>  	u8	is_fullsock;
> +	u8	allow_tcp_access;	/* Indicate that the callback site
> +					 * has a tcp_sock locked. Then it
> +					 * would be safe to access struct
> +					 * tcp_sock.
> +					 */

perhaps no need for explicit documentation if the variable name is
self documenting: is_locked_tcp_sock

>  	u8	remaining_opt_len;
>  	u64	temp;			/* temp and everything after is not
>  					 * initialized to 0 before calling
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 5b2b04835688..293047694710 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2649,6 +2649,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
>  	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
>  	if (sk_fullsock(sk)) {
>  		sock_ops.is_fullsock = 1;
> +		sock_ops.allow_tcp_access = 1;
>  		sock_owned_by_me(sk);
>  	}
>  
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1c6c07507a78..dc0e67c5776a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -10381,10 +10381,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>  		}							      \
>  		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
>  						struct bpf_sock_ops_kern,     \
> -						is_fullsock),		      \
> +						allow_tcp_access),	      \
>  				      fullsock_reg, si->src_reg,	      \
>  				      offsetof(struct bpf_sock_ops_kern,      \
> -					       is_fullsock));		      \
> +					       allow_tcp_access));	      \
>  		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
>  		if (si->dst_reg == si->src_reg)				      \
>  			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
> @@ -10469,10 +10469,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>  					       temp));			      \
>  		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
>  						struct bpf_sock_ops_kern,     \
> -						is_fullsock),		      \
> +						allow_tcp_access),	      \
>  				      reg, si->dst_reg,			      \
>  				      offsetof(struct bpf_sock_ops_kern,      \
> -					       is_fullsock));		      \
> +					       allow_tcp_access));	      \
>  		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
>  		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
>  						struct bpf_sock_ops_kern, sk),\
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index eb82e01da911..77185479ed5e 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -169,6 +169,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
>  	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
>  	sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
>  	sock_ops.is_fullsock = 1;
> +	sock_ops.allow_tcp_access = 1;
>  	sock_ops.sk = sk;
>  	bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
>  
> @@ -185,6 +186,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
>  	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
>  	sock_ops.op = bpf_op;
>  	sock_ops.is_fullsock = 1;
> +	sock_ops.allow_tcp_access = 1;
>  	sock_ops.sk = sk;
>  	/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
>  	if (skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 0e5b9a654254..695749807c09 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -522,6 +522,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
>  		sock_owned_by_me(sk);
>  
>  		sock_ops.is_fullsock = 1;
> +		sock_ops.allow_tcp_access = 1;
>  		sock_ops.sk = sk;
>  	}
>  
> @@ -567,6 +568,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
>  		sock_owned_by_me(sk);
>  
>  		sock_ops.is_fullsock = 1;
> +		sock_ops.allow_tcp_access = 1;
>  		sock_ops.sk = sk;
>  	}
>  
> -- 
> 2.43.5
>