[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200226183746.wvkp2mrstotyepyc@kafai-mbp>
Date: Wed, 26 Feb 2020 10:37:46 -0800
From: Martin KaFai Lau <kafai@...com>
To: Lorenz Bauer <lmb@...udflare.com>
CC: <ast@...nel.org>, <daniel@...earbox.net>,
<john.fastabend@...il.com>, <netdev@...r.kernel.org>,
<bpf@...r.kernel.org>, <kernel-team@...udflare.com>
Subject: Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
On Tue, Feb 25, 2020 at 01:56:32PM +0000, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
>
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
>
> Signed-off-by: Lorenz Bauer <lmb@...udflare.com>
> ---
> include/linux/skmsg.h | 36 ++++++++-----
> include/net/tcp.h | 1 -
> net/core/skmsg.c | 52 +++++++++++++++++++
> net/core/sock_map.c | 24 ++++-----
> net/ipv4/tcp_bpf.c | 114 ++++++++++++------------------------------
> 5 files changed, 116 insertions(+), 111 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index c881094387db..70d65ab10b5c 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -109,6 +109,16 @@ struct sk_psock {
> };
> };
>
> +struct sk_psock_hooks {
> + struct proto *base_ipv6;
> + struct proto *ipv4;
> + struct proto *ipv6;
> + spinlock_t ipv6_lock;
> + int (*rebuild_proto)(struct proto prot[], struct proto *base);
> + struct proto *(*choose_proto)(struct proto prot[],
> + struct sk_psock *psock);
> +};
> +
> int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
> int elem_first_coalesce);
> int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
> @@ -335,23 +345,14 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
> }
> }
>
> -static inline void sk_psock_update_proto(struct sock *sk,
> - struct sk_psock *psock,
> - struct proto *ops)
> -{
> - psock->saved_unhash = sk->sk_prot->unhash;
> - psock->saved_close = sk->sk_prot->close;
> - psock->saved_write_space = sk->sk_write_space;
> -
> - psock->sk_proto = sk->sk_prot;
> - /* Pairs with lockless read in sk_clone_lock() */
> - WRITE_ONCE(sk->sk_prot, ops);
> -}
> -
> static inline void sk_psock_restore_proto(struct sock *sk,
> struct sk_psock *psock)
> {
> + if (!psock->sk_proto)
> + return;
> +
> sk->sk_prot->unhash = psock->saved_unhash;
> +
> if (inet_sk(sk)->is_icsk) {
> tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
> } else {
> @@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
> psock_set_prog(&progs->skb_verdict, NULL);
> }
>
> +static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
> + struct proto *ipv4_base)
> +{
> + hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED();
> + return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);
> +
> #endif /* _LINUX_SKMSG_H */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 07f947cc80e6..ccf39d80b695 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2196,7 +2196,6 @@ struct sk_msg;
> struct sk_psock;
>
> int tcp_bpf_init(struct sock *sk);
> -void tcp_bpf_reinit(struct sock *sk);
> int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
> int flags);
> int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index eeb28cb85664..a9bdf02c2539 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -844,3 +844,55 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
> strp_stop(&parser->strp);
> parser->enabled = false;
> }
> +
> +static inline int sk_psock_hooks_init_ipv6(struct sk_psock_hooks *hooks,
> + struct proto *base)
> +{
> + int ret = 0;
> +
> + if (likely(base == smp_load_acquire(&hooks->base_ipv6)))
> + return 0;
> +
> + spin_lock_bh(&hooks->ipv6_lock);
> + if (likely(base != hooks->base_ipv6)) {
> + ret = hooks->rebuild_proto(hooks->ipv6, base);
> + if (!ret)
> + smp_store_release(&hooks->base_ipv6, base);
> + }
> + spin_unlock_bh(&hooks->ipv6_lock);
> + return ret;
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk)
> +{
> + struct sk_psock *psock = sk_psock(sk);
> + struct proto *prot_base;
> +
> + WARN_ON_ONCE(!rcu_read_lock_held());
Is this only for the earlier sk_psock(sk)?
> +
> + if (unlikely(!psock))
When will this happen?
> + return -EINVAL;
> +
> + /* Initialize saved callbacks and original proto only once.
> + * Since we've not installed the hooks, psock is not yet in use and
> + * we can initialize it without synchronization.
> + */
> + if (!psock->sk_proto) {
If I read it correctly, this is to replace the tcp_bpf_reinit_sk_prot()?
I think some of the current reinit comment is useful to keep also:
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed ... */
> + struct proto *prot = READ_ONCE(sk->sk_prot);
> +
> + if (sk->sk_family == AF_INET6 &&
> + sk_psock_hooks_init_ipv6(hooks, prot))
> + return -EINVAL;
> +
> + psock->saved_unhash = prot->unhash;
> + psock->saved_close = prot->close;
> + psock->saved_write_space = sk->sk_write_space;
> +
> + psock->sk_proto = prot;
> + }
> +
> + /* Pairs with lockless read in sk_clone_lock() */
> + prot_base = sk->sk_family == AF_INET ? hooks->ipv4 : hooks->ipv6;
> + WRITE_ONCE(sk->sk_prot, hooks->choose_proto(prot_base, psock));
> + return 0;
> +}
Powered by blists - more mailing lists