linux-kernel - [PATCH bpf-next v2] bpf: Upgrade bpf

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <DU0P192MB1547FE6F35CC1A3EEA1AFDECD6179@DU0P192MB1547.EURP192.PROD.OUTLOOK.COM>
Date:   Sat,  3 Dec 2022 01:39:10 +0800
From:   Ji Rongfeng <SikoJobs@...look.com>
To:     bpf@...r.kernel.org, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc:     martin.lau@...ux.dev, daniel@...earbox.net,
        john.fastabend@...il.com, ast@...nel.org, andrii@...nel.org,
        song@...nel.org, yhs@...com, kpsingh@...nel.org, sdf@...gle.com,
        haoluo@...gle.com, jolsa@...nel.org, davem@...emloft.net,
        edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
        Ji Rongfeng <SikoJobs@...look.com>
Subject: [PATCH bpf-next v2] bpf: Upgrade bpf_{g,s}etsockopt return values

Returning -EINVAL almost all the time when error occurs is not very
helpful for the bpf prog to figure out what is wrong. This patch
upgrades some return values so that they will be much more helpful.

* return -ENOPROTOOPT when optname is unsupported

  The same as {g,s}etsockopt() syscall does. Before this patch,
  bpf_setsockopt(TCP_SAVED_SYN) already returns -ENOPROTOOPT, which
  may confuse the user, as -EINVAL is returned on other unsupported
  optnames. This patch also rejects TCP_SAVED_SYN right in
  sol_tcp_sockopt() when getopt is false, since do_tcp_setsockopt()
  is just the executor and it's not its duty to discover such error
  in bpf. We should maintain a precise allowlist to control whether
  an optname is supported and allowed to enter the executor or not.
  Functions like do_tcp_setsockopt(), their behaviour are not fully
  controllable by bpf. Imagine we let an optname pass, expecting
  -ENOPROTOOPT will be returned, but someday that optname is
  actually processed and unfortunately causes deadlock when calling
  from bpf. Thus, precise access control is essential.

* return -EOPNOTSUPP on level-related errors

  In do_ip_getsockopt(), -EOPNOTSUPP will be returned if level !=
  SOL_IP. In ipv6_getsockopt(), -ENOPROTOOPT will be returned if
  level != SOL_IPV6. To be distinguishable, the former is chosen.

* return -EBADFD when sk is not a full socket

  -EPERM or -EBUSY was an option, but in many cases one of them
  will be returned, especially under level SOL_TCP. -EBADFD is the
  better choice, since it is hardly returned in all cases. The bpf
  prog will be able to recognize it and decide what to do next.

Signed-off-by: Ji Rongfeng <SikoJobs@...look.com>

>From my point of view, changing these return values is acceptable,
because most of them are designed to be shown to the bpf prog
developer only and rarely shown in production environment.

I'll send another patch to update documentation in a proper way
after this patch is accepted, and add some tests if necessary.
---
 net/core/filter.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 37baaa6b8fc3..44440b7d430c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5050,12 +5050,12 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
 	case SO_BINDTODEVICE:
 		break;
 	default:
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	if (getopt) {
 		if (optname == SO_BINDTODEVICE)
-			return -EINVAL;
+			return -ENOPROTOOPT;
 		return sk_getsockopt(sk, SOL_SOCKET, optname,
 				     KERNEL_SOCKPTR(optval),
 				     KERNEL_SOCKPTR(optlen));
@@ -5105,7 +5105,7 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
 		inet_csk(sk)->icsk_rto_min = timeout;
 		break;
 	default:
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	return 0;
@@ -5169,7 +5169,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
 			   bool getopt)
 {
 	if (sk->sk_prot->setsockopt != tcp_setsockopt)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	switch (optname) {
 	case TCP_NODELAY:
@@ -5194,7 +5194,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
 		break;
 	default:
 		if (getopt)
-			return -EINVAL;
+			return -ENOPROTOOPT;
 		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
 	}
 
@@ -5215,6 +5215,9 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
 		return do_tcp_getsockopt(sk, SOL_TCP, optname,
 					 KERNEL_SOCKPTR(optval),
 					 KERNEL_SOCKPTR(optlen));
+	} else {
+		if (optname == TCP_SAVED_SYN)
+			return -ENOPROTOOPT;
 	}
 
 	return do_tcp_setsockopt(sk, SOL_TCP, optname,
@@ -5226,7 +5229,7 @@ static int sol_ip_sockopt(struct sock *sk, int optname,
 			  bool getopt)
 {
 	if (sk->sk_family != AF_INET)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	switch (optname) {
 	case IP_TOS:
@@ -5234,7 +5237,7 @@ static int sol_ip_sockopt(struct sock *sk, int optname,
 			return -EINVAL;
 		break;
 	default:
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	if (getopt)
@@ -5251,7 +5254,7 @@ static int sol_ipv6_sockopt(struct sock *sk, int optname,
 			    bool getopt)
 {
 	if (sk->sk_family != AF_INET6)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	switch (optname) {
 	case IPV6_TCLASS:
@@ -5260,7 +5263,7 @@ static int sol_ipv6_sockopt(struct sock *sk, int optname,
 			return -EINVAL;
 		break;
 	default:
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	if (getopt)
@@ -5276,7 +5279,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
 			    char *optval, int optlen)
 {
 	if (!sk_fullsock(sk))
-		return -EINVAL;
+		return -EBADFD;
 
 	if (level == SOL_SOCKET)
 		return sol_socket_sockopt(sk, optname, optval, &optlen, false);
@@ -5287,7 +5290,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
 	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
 		return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
 
-	return -EINVAL;
+	return -EOPNOTSUPP;
 }
 
 static int _bpf_setsockopt(struct sock *sk, int level, int optname,
@@ -5304,7 +5307,7 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname,
 	int err, saved_optlen = optlen;
 
 	if (!sk_fullsock(sk)) {
-		err = -EINVAL;
+		err = -EBADFD;
 		goto done;
 	}
 
@@ -5317,7 +5320,7 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname,
 	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
 		err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true);
 	else
-		err = -EINVAL;
+		err = -EOPNOTSUPP;
 
 done:
 	if (err)
-- 
2.30.2