[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <52177bd8-65a5-ef4d-b00d-47509855c3e4@linux.dev>
Date: Tue, 5 Sep 2023 12:02:52 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Daan De Meyer <daan.j.demeyer@...il.com>
Cc: kernel-team@...a.com, netdev@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next v3 4/9] bpf: Implement cgroup sockaddr hooks for
unix sockets
On 8/31/23 8:34 AM, Daan De Meyer wrote:
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 0680569f9bd0..d8f508c56055 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -14540,14 +14540,19 @@ static int check_return_code(struct bpf_verifier_env *env)
> case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
> if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
> env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
> + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
> env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
> env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
> + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
> env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
> - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
> + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
> + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
> range = tnum_range(1, 1);
A note that getpeername, getsockname, and recvmsg cannot return err (err is
value 0 for cgroup-bpf). More on this later.
> if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
> env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
> range = tnum_range(0, 3);
> + if (env->prog->expected_attach_type == BPF_CGROUP_UNIX_BIND)
> + range = tnum_range(0, 1);
A few words in the commit message is needed for the difference on the return
code between UNIX_BIND and INET[46]_BIND. (ie. the
BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
Also, the default range should be (0, 1) already (the 'struct tnum range =
tnum_range(0, 1)' at the beginning of this function). The same goes for
UNIX_SENDMSG (and the existing INET[46]_SENDMSG) which should already have the
default (0, 1) range. Thus, no need to have a special test case here.
> break;
> case BPF_PROG_TYPE_CGROUP_SKB:
> if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 3ed6cd33b268..be4e0e923aa6 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -81,6 +81,7 @@
> #include <net/xdp.h>
> #include <net/mptcp.h>
> #include <net/netfilter/nf_conntrack_bpf.h>
> +#include <linux/un.h>
Is this needed?
>
> static const struct bpf_func_proto *
> bpf_sk_base_func_proto(enum bpf_func_id func_id);
> @@ -7828,6 +7829,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> switch (prog->expected_attach_type) {
> case BPF_CGROUP_INET4_CONNECT:
> case BPF_CGROUP_INET6_CONNECT:
> + case BPF_CGROUP_UNIX_CONNECT:
> return &bpf_bind_proto;
> default:
> return NULL;
> @@ -7856,16 +7858,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> switch (prog->expected_attach_type) {
> case BPF_CGROUP_INET4_BIND:
> case BPF_CGROUP_INET6_BIND:
> + case BPF_CGROUP_UNIX_BIND:
> case BPF_CGROUP_INET4_CONNECT:
> case BPF_CGROUP_INET6_CONNECT:
> + case BPF_CGROUP_UNIX_CONNECT:
> case BPF_CGROUP_UDP4_RECVMSG:
> case BPF_CGROUP_UDP6_RECVMSG:
> + case BPF_CGROUP_UNIX_RECVMSG:
> case BPF_CGROUP_UDP4_SENDMSG:
> case BPF_CGROUP_UDP6_SENDMSG:
> + case BPF_CGROUP_UNIX_SENDMSG:
> case BPF_CGROUP_INET4_GETPEERNAME:
> case BPF_CGROUP_INET6_GETPEERNAME:
> + case BPF_CGROUP_UNIX_GETPEERNAME:
> case BPF_CGROUP_INET4_GETSOCKNAME:
> case BPF_CGROUP_INET6_GETSOCKNAME:
> + case BPF_CGROUP_UNIX_GETSOCKNAME:
> return &bpf_sock_addr_setsockopt_proto;
> default:
> return NULL;
> @@ -7874,16 +7882,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> switch (prog->expected_attach_type) {
> case BPF_CGROUP_INET4_BIND:
> case BPF_CGROUP_INET6_BIND:
> + case BPF_CGROUP_UNIX_BIND:
> case BPF_CGROUP_INET4_CONNECT:
> case BPF_CGROUP_INET6_CONNECT:
> + case BPF_CGROUP_UNIX_CONNECT:
> case BPF_CGROUP_UDP4_RECVMSG:
> case BPF_CGROUP_UDP6_RECVMSG:
> + case BPF_CGROUP_UNIX_RECVMSG:
> case BPF_CGROUP_UDP4_SENDMSG:
> case BPF_CGROUP_UDP6_SENDMSG:
> + case BPF_CGROUP_UNIX_SENDMSG:
> case BPF_CGROUP_INET4_GETPEERNAME:
> case BPF_CGROUP_INET6_GETPEERNAME:
> + case BPF_CGROUP_UNIX_GETPEERNAME:
> case BPF_CGROUP_INET4_GETSOCKNAME:
> case BPF_CGROUP_INET6_GETSOCKNAME:
> + case BPF_CGROUP_UNIX_GETSOCKNAME:
> return &bpf_sock_addr_getsockopt_proto;
> default:
> return NULL;
> @@ -8931,8 +8945,8 @@ static bool sock_addr_is_valid_access(int off, int size,
> if (off % size != 0)
> return false;
>
> - /* Disallow access to IPv6 fields from IPv4 contex and vise
> - * versa.
> + /* Disallow access to fields not belonging to the attach type's address
> + * family.
> */
> switch (off) {
> case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 86930a8ed012..94fd6f2441d8 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -116,6 +116,7 @@
> #include <linux/freezer.h>
> #include <linux/file.h>
> #include <linux/btf_ids.h>
> +#include <linux/bpf-cgroup.h>
>
> #include "scm.h"
>
> @@ -1323,6 +1324,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> struct sock *sk = sock->sk;
> int err;
>
> + if (cgroup_bpf_enabled(CGROUP_UNIX_BIND)) {
It is a dup test. The same static_key test will be done in
BPF_CGROUP_RUN_SA_PROG*() also?
The same comment for other places before calling BPF_CGROUP_RUN_PROG_UNIX_* and
BPF_CGROUP_RUN_SA_PROG().
> + err = BPF_CGROUP_RUN_PROG_UNIX_BIND_LOCK(sk, uaddr, &addr_len);
> + if (err)
> + return err;
> + }
> +
> if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
> sunaddr->sun_family == AF_UNIX)
> return unix_autobind(sk);
> @@ -1377,6 +1384,13 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
> goto out;
>
> if (addr->sa_family != AF_UNSPEC) {
> + if (cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) {
> + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr,
> + &alen);
> + if (err)
> + goto out;
> + }
> +
> err = unix_validate_addr(sunaddr, alen);
> if (err)
> goto out;
> @@ -1486,6 +1500,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> int err;
> int st;
>
> + if (cgroup_bpf_enabled(CGROUP_UNIX_CONNECT)) {
> + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr,
> + &addr_len);
> + if (err)
> + goto out;
> + }
> +
> err = unix_validate_addr(sunaddr, addr_len);
> if (err)
> goto out;
> @@ -1749,7 +1770,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> struct sock *sk = sock->sk;
> struct unix_address *addr;
> DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
> - int err = 0;
> + int addr_len = 0, err = 0;
>
> if (peer) {
> sk = unix_peer_get(sk);
> @@ -1766,14 +1787,37 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> if (!addr) {
> sunaddr->sun_family = AF_UNIX;
> sunaddr->sun_path[0] = 0;
> - err = offsetof(struct sockaddr_un, sun_path);
> + addr_len = offsetof(struct sockaddr_un, sun_path);
> } else {
> - err = addr->len;
> + addr_len = addr->len;
> memcpy(sunaddr, addr->name, addr->len);
> }
> +
> + if (peer && cgroup_bpf_enabled(CGROUP_UNIX_GETPEERNAME)) {
> + err = BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &addr_len,
> + CGROUP_UNIX_GETPEERNAME);
> + if (err)
UNIX_GETPEERNAME can only have return value 1 (OK), so no need to do err check here.
> + goto out;
> +
> + err = unix_validate_addr(sunaddr, addr_len);
Since the kfunc is specific to the unix address, how about doing the
unix_validate_addr check in the kfunc itself?
> + if (err)
> + goto out;
> + }
> +
> + if (!peer && cgroup_bpf_enabled(CGROUP_UNIX_GETSOCKNAME)) {
> + err = BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &addr_len,
> + CGROUP_UNIX_GETSOCKNAME);
> + if (err)
Same here on the unnecessary err check.
> + goto out;
> +
> + err = unix_validate_addr(sunaddr, addr_len);
> + if (err)
> + goto out;
> + }
> +
> sock_put(sk);
> out:
> - return err;
> + return err ?: addr_len;
> }
>
> static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
> @@ -1919,6 +1963,15 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
> goto out;
>
> if (msg->msg_namelen) {
> + if (cgroup_bpf_enabled(CGROUP_UNIX_SENDMSG)) {
> + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
> + msg->msg_name,
> + &msg->msg_namelen,
> + NULL);
> + if (err)
> + goto out;
> + }
> +
> err = unix_validate_addr(sunaddr, msg->msg_namelen);
> if (err)
> goto out;
> @@ -2328,14 +2381,30 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
> return unix_dgram_recvmsg(sock, msg, size, flags);
> }
>
> -static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
> +static int unix_recvmsg_copy_addr(struct msghdr *msg, struct sock *sk)
> {
> struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
> + int err;
>
> if (addr) {
> msg->msg_namelen = addr->len;
> memcpy(msg->msg_name, addr->name, addr->len);
> +
> + if (cgroup_bpf_enabled(CGROUP_UNIX_RECVMSG)) {
> + err = BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
> + msg->msg_name,
> + &msg->msg_namelen);
> + if (err)
Same here on the unnecessary err check.
> + return err;
> +
> + err = unix_validate_addr(msg->msg_name,
> + msg->msg_namelen);
If unix_validate_addr is done in the kfunc, the unix_recvmsg_copy_addr does not
need to return error and the changes in the unix_recvmsg_copy_addr's caller is
not needed also.
> + if (err)
> + return err;
> + }
> }
> +
> + return 0;
> }
>
> int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> @@ -2390,8 +2459,11 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> EPOLLOUT | EPOLLWRNORM |
> EPOLLWRBAND);
>
> - if (msg->msg_name)
> - unix_copy_addr(msg, skb->sk);
> + if (msg->msg_name) {
> + err = unix_recvmsg_copy_addr(msg, skb->sk);
> + if (err)
> + goto out_free;
> + }
>
> if (size > skb->len - skip)
> size = skb->len - skip;
> @@ -2743,7 +2815,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
> if (state->msg && state->msg->msg_name) {
> DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
> state->msg->msg_name);
> - unix_copy_addr(state->msg, skb->sk);
> + err = unix_recvmsg_copy_addr(state->msg, skb->sk);
> + if (err)
> + break;
> sunaddr = NULL;
> }
Powered by blists - more mailing lists