lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 25 Aug 2022 11:36:36 -0700
From:   sdf@...gle.com
To:     Martin KaFai Lau <kafai@...com>
Cc:     bpf@...r.kernel.org, netdev@...r.kernel.org,
        Alexei Starovoitov <ast@...nel.org>,
        Andrii Nakryiko <andrii@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        David Miller <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>, kernel-team@...com,
        Paolo Abeni <pabeni@...hat.com>
Subject: Re: [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to
 reuse do_tcp_getsockopt()

On 08/24, Martin KaFai Lau wrote:
> This patch changes bpf_getsockopt(SOL_TCP) to reuse
> do_tcp_getsockopt().  It removes the duplicated code from
> bpf_getsockopt(SOL_TCP).

> Before this patch, there were some optnames available to
> bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
> For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
> and a few more.  It surprises users from time to time.  This patch
> automatically closes this gap without duplicating more code.

> bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
> so it stays in sol_tcp_sockopt().

> For string name value like TCP_CONGESTION, bpf expects it
> is always null terminated, so sol_tcp_sockopt() decrements
> optlen by one before calling do_tcp_getsockopt() and
> the 'if (optlen < saved_optlen) memset(..,0,..);'
> in __bpf_getsockopt() will always do a null termination.

> Signed-off-by: Martin KaFai Lau <kafai@...com>
> ---
>   include/net/tcp.h |  2 ++
>   net/core/filter.c | 70 ++++++++++++++++++++++++++---------------------
>   net/ipv4/tcp.c    |  4 +--
>   3 files changed, 43 insertions(+), 33 deletions(-)

> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index c03a50c72f40..735e957f7f4b 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
>   void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
>   __poll_t tcp_poll(struct file *file, struct socket *sock,
>   		      struct poll_table_struct *wait);
> +int do_tcp_getsockopt(struct sock *sk, int level,
> +		      int optname, sockptr_t optval, sockptr_t optlen);
>   int tcp_getsockopt(struct sock *sk, int level, int optname,
>   		   char __user *optval, int __user *optlen);
>   bool tcp_bpf_bypass_getsockopt(int level, int optname);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 68b52243b306..cdbbcec46e8b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5096,8 +5096,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk,  
> int optname,
>   	return 0;
>   }

> -static int sol_tcp_setsockopt(struct sock *sk, int optname,
> -			      char *optval, int optlen)
> +static int sol_tcp_sockopt(struct sock *sk, int optname,
> +			   char *optval, int *optlen,
> +			   bool getopt)
>   {
>   	if (sk->sk_prot->setsockopt != tcp_setsockopt)
>   		return -EINVAL;
> @@ -5114,17 +5115,47 @@ static int sol_tcp_setsockopt(struct sock *sk,  
> int optname,
>   	case TCP_USER_TIMEOUT:
>   	case TCP_NOTSENT_LOWAT:
>   	case TCP_SAVE_SYN:
> -		if (optlen != sizeof(int))
> +		if (*optlen != sizeof(int))
>   			return -EINVAL;
>   		break;

[..]

>   	case TCP_CONGESTION:
> +		if (*optlen < 2)
> +			return -EINVAL;
> +		break;
> +	case TCP_SAVED_SYN:
> +		if (*optlen < 1)
> +			return -EINVAL;
>   		break;

This looks a bit inconsistent vs '*optlen != sizeof(int)' above. Maybe

if (*optlen < sizeof(u16))
if (*optlen < sizeof(u8))

?

>   	default:
> -		return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
> +		if (getopt)
> +			return -EINVAL;
> +		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
> +	}
> +
> +	if (getopt) {
> +		if (optname == TCP_SAVED_SYN) {
> +			struct tcp_sock *tp = tcp_sk(sk);
> +
> +			if (!tp->saved_syn ||
> +			    *optlen > tcp_saved_syn_len(tp->saved_syn))
> +				return -EINVAL;

You mention in the description that bpf doesn't doesn't free saved_syn,
maybe worth putting a comment with the rationale here as well?
I'm assuming we don't free from bpf because we want userspace to
have an opportunity to read it as well?

> +			memcpy(optval, tp->saved_syn->data, *optlen);
> +			return 0;
> +		}
> +
> +		if (optname == TCP_CONGESTION) {
> +			if (!inet_csk(sk)->icsk_ca_ops)
> +				return -EINVAL;

Is it worth it doing null termination more explicitly here?
For readability sake:
			/* BPF always expects NULL-terminated strings. */
			optval[*optlen-1] = '\0';
> +			(*optlen)--;
> +		}
> +
> +		return do_tcp_getsockopt(sk, SOL_TCP, optname,
> +					 KERNEL_SOCKPTR(optval),
> +					 KERNEL_SOCKPTR(optlen));
>   	}

>   	return do_tcp_setsockopt(sk, SOL_TCP, optname,
> -				 KERNEL_SOCKPTR(optval), optlen);
> +				 KERNEL_SOCKPTR(optval), *optlen);
>   }

>   static int sol_ip_setsockopt(struct sock *sk, int optname,
> @@ -5179,7 +5210,7 @@ static int __bpf_setsockopt(struct sock *sk, int  
> level, int optname,
>   	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
>   		return sol_ipv6_setsockopt(sk, optname, optval, optlen);
>   	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
> -		return sol_tcp_setsockopt(sk, optname, optval, optlen);
> +		return sol_tcp_sockopt(sk, optname, optval, &optlen, false);

>   	return -EINVAL;
>   }
> @@ -5202,31 +5233,8 @@ static int __bpf_getsockopt(struct sock *sk, int  
> level, int optname,

>   	if (level == SOL_SOCKET) {
>   		err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
> -	} else if (IS_ENABLED(CONFIG_INET) &&
> -		   level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
> -		struct inet_connection_sock *icsk;
> -		struct tcp_sock *tp;
> -
> -		switch (optname) {
> -		case TCP_CONGESTION:
> -			icsk = inet_csk(sk);
> -
> -			if (!icsk->icsk_ca_ops || optlen <= 1)
> -				goto err_clear;
> -			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
> -			optval[optlen - 1] = 0;
> -			break;
> -		case TCP_SAVED_SYN:
> -			tp = tcp_sk(sk);
> -
> -			if (optlen <= 0 || !tp->saved_syn ||
> -			    optlen > tcp_saved_syn_len(tp->saved_syn))
> -				goto err_clear;
> -			memcpy(optval, tp->saved_syn->data, optlen);
> -			break;
> -		default:
> -			goto err_clear;
> -		}
> +	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
> +		err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
>   	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
>   		struct inet_sock *inet = inet_sk(sk);

> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index ab8118225797..a47cb5662be6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -4043,8 +4043,8 @@ struct sk_buff  
> *tcp_get_timestamping_opt_stats(const struct sock *sk,
>   	return stats;
>   }

> -static int do_tcp_getsockopt(struct sock *sk, int level,
> -			     int optname, sockptr_t optval, sockptr_t optlen)
> +int do_tcp_getsockopt(struct sock *sk, int level,
> +		      int optname, sockptr_t optval, sockptr_t optlen)
>   {
>   	struct inet_connection_sock *icsk = inet_csk(sk);
>   	struct tcp_sock *tp = tcp_sk(sk);
> --
> 2.30.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ