[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f78069e0-9885-b3ab-4ad7-8abe94b9ad0a@linux.dev>
Date: Wed, 27 Sep 2023 15:32:35 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Daan De Meyer <daan.j.demeyer@...il.com>
Cc: kernel-team@...a.com, netdev@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next v6 4/9] bpf: Implement cgroup sockaddr hooks for
unix sockets
On 9/26/23 1:27 PM, Daan De Meyer wrote:
> These hooks allows intercepting connect(), getsockname(),
> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> socket hooks get write access to the address length because the
> address length is not fixed when dealing with unix sockets and
> needs to be modified when a unix socket address is modified by
> the hook. Because abstract socket unix addresses start with a
> NUL byte, we cannot recalculate the socket address in kernelspace
> after running the hook by calculating the length of the unix socket
> path using strlen().
>
> These hooks can be used when users want to multiplex syscall to a
> single unix socket to multiple different processes behind the scenes
> by redirecting the connect() and other syscalls to process specific
> sockets.
>
> We do not implement support for intercepting bind() because when
> using bind() with unix sockets with a pathname address, this creates
> an inode in the filesystem which must be cleaned up. If we rewrite
> the address, the user might try to clean up the wrong file, leaking
> the socket in the filesystem where it is never cleaned up. Until we
> figure out a solution for this (and a use case for intercepting bind()),
> we opt to not allow rewriting the sockaddr in bind() calls.
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 31561e789715..c069f3510365 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
> CGROUP_ATYPE(CGROUP_INET6_BIND);
> CGROUP_ATYPE(CGROUP_INET4_CONNECT);
> CGROUP_ATYPE(CGROUP_INET6_CONNECT);
> + CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
> CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
> CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
> CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
> CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
> + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
> CGROUP_ATYPE(CGROUP_SYSCTL);
> CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
> CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
> + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
> CGROUP_ATYPE(CGROUP_GETSOCKOPT);
> CGROUP_ATYPE(CGROUP_SETSOCKOPT);
> CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
> CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
> + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
> CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
> CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
> + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
> CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
> default:
> return CGROUP_BPF_ATTACH_TYPE_INVALID;
> @@ -283,24 +288,36 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
> #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \
> BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
>
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) \
> + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT)
> +
Remove the no _LOCK version because it is not used.
> #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
>
> #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
>
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \
> + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
> +
> #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
>
> #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
>
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
> + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
> +
> #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
>
> #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
> BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
>
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
> + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
> +
> /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
> * fullsock and its parent fullsock cannot be traced by
> * sk_to_full_sk().
> @@ -492,10 +509,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
> #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
Same here. Not needed.
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
[ ... ]
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index ba2c57cf4046..4a4e3b1f00b1 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1455,7 +1455,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
> * @flags: Pointer to u32 which contains higher bits of BPF program
> * return value (OR'ed together).
> *
> - * socket is expected to be of type INET or INET6.
> + * socket is expected to be of type INET, INET6 or UNIX.
> *
> * This function will return %-EPERM if an attached program is found and
> * returned value != 1 during execution. In all other cases, 0 is returned.
> @@ -1479,7 +1479,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
> /* Check socket family since not all sockets represent network
> * endpoint (e.g. AF_UNIX).
> */
> - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
> + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
> + sk->sk_family != AF_UNIX)
./scripts/checkpatch.pl --strict:
CHECK: Alignment should match open parenthesis
#211: FILE: kernel/bpf/cgroup.c:1483:
+ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
+ sk->sk_family != AF_UNIX)
[ ... ]
> diff --git a/net/core/filter.c b/net/core/filter.c
> index bd1c42b28483..956f413e98a3 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -7829,6 +7829,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> switch (prog->expected_attach_type) {
> case BPF_CGROUP_INET4_CONNECT:
> case BPF_CGROUP_INET6_CONNECT:
> + case BPF_CGROUP_UNIX_CONNECT:
> return &bpf_bind_proto;
bpf_bind support is still not removed...
[ ... ]
> @@ -2744,6 +2773,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
> DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
> state->msg->msg_name);
> unix_copy_addr(state->msg, skb->sk);
> +
> + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
> + state->msg->msg_name,
> + &state->msg->msg_namelen);
> +
Re-pasting the comment from v5:
it will be useful to mention the reason in the commit message in case we need to
look back a few months later why only recvmsg is supported but not sendmsg for
connected stream.
Powered by blists - more mailing lists