lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a76f89b3-0911-e1f1-d1c1-707b9bc5478a@gmail.com>
Date:   Sun, 18 Jul 2021 19:49:34 +0200
From:   Eric Dumazet <eric.dumazet@...il.com>
To:     Cong Wang <xiyou.wangcong@...il.com>, netdev@...r.kernel.org
Cc:     bpf@...r.kernel.org, Cong Wang <cong.wang@...edance.com>,
        John Fastabend <john.fastabend@...il.com>,
        Daniel Borkmann <daniel@...earbox.net>,
        Jakub Sitnicki <jakub@...udflare.com>,
        Lorenz Bauer <lmb@...udflare.com>
Subject: Re: [PATCH bpf-next v5 07/11] af_unix: implement
 unix_dgram_bpf_recvmsg()



On 7/4/21 9:02 PM, Cong Wang wrote:
> From: Cong Wang <cong.wang@...edance.com>
> 
> We have to implement unix_dgram_bpf_recvmsg() to replace the
> original ->recvmsg() to retrieve skmsg from ingress_msg.
> 
> AF_UNIX is again special here because the lack of
> sk_prot->recvmsg(). I simply add a special case inside
> unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.
> 
> Cc: John Fastabend <john.fastabend@...il.com>
> Cc: Daniel Borkmann <daniel@...earbox.net>
> Cc: Jakub Sitnicki <jakub@...udflare.com>
> Cc: Lorenz Bauer <lmb@...udflare.com>
> Signed-off-by: Cong Wang <cong.wang@...edance.com>
> ---
>  include/net/af_unix.h |  2 ++
>  net/unix/af_unix.c    | 19 +++++++++--
>  net/unix/unix_bpf.c   | 75 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 93 insertions(+), 3 deletions(-)
> 
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index cca645846af1..435a2c3d5a6f 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -82,6 +82,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
>  long unix_inq_len(struct sock *sk);
>  long unix_outq_len(struct sock *sk);
>  
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> +			 int flags);
>  #ifdef CONFIG_SYSCTL
>  int unix_sysctl_register(struct net *net);
>  void unix_sysctl_unregister(struct net *net);
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 573253c5b5c2..89927678c0dc 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -2098,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
>  	}
>  }
>  
> -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> -			      size_t size, int flags)
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> +			 int flags)
>  {
>  	struct scm_cookie scm;
> -	struct sock *sk = sock->sk;
> +	struct socket *sock = sk->sk_socket;
>  	struct unix_sock *u = unix_sk(sk);
>  	struct sk_buff *skb, *last;
>  	long timeo;
> @@ -2205,6 +2205,19 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
>  	return err;
>  }
>  
> +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
> +			      int flags)
> +{
> +	struct sock *sk = sock->sk;
> +
> +#ifdef CONFIG_BPF_SYSCALL
> +	if (sk->sk_prot != &unix_proto)
> +		return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> +					    flags & ~MSG_DONTWAIT, NULL);
> +#endif
> +	return __unix_dgram_recvmsg(sk, msg, size, flags);
> +}
> +
>  static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
>  			  sk_read_actor_t recv_actor)
>  {
> diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
> index b1582a659427..db0cda29fb2f 100644
> --- a/net/unix/unix_bpf.c
> +++ b/net/unix/unix_bpf.c
> @@ -6,6 +6,80 @@
>  #include <net/sock.h>
>  #include <net/af_unix.h>
>  
> +#define unix_sk_has_data(__sk, __psock)					\
> +		({	!skb_queue_empty(&__sk->sk_receive_queue) ||	\
> +			!skb_queue_empty(&__psock->ingress_skb) ||	\
> +			!list_empty(&__psock->ingress_msg);		\
> +		})
> +
> +static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
> +			      long timeo)
> +{
> +	DEFINE_WAIT_FUNC(wait, woken_wake_function);
> +	struct unix_sock *u = unix_sk(sk);
> +	int ret = 0;
> +
> +	if (sk->sk_shutdown & RCV_SHUTDOWN)
> +		return 1;
> +
> +	if (!timeo)
> +		return ret;
> +
> +	add_wait_queue(sk_sleep(sk), &wait);
> +	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> +	if (!unix_sk_has_data(sk, psock)) {
> +		mutex_unlock(&u->iolock);
> +		wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
> +		mutex_lock(&u->iolock);
> +		ret = unix_sk_has_data(sk, psock);
> +	}
> +	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> +	remove_wait_queue(sk_sleep(sk), &wait);
> +	return ret;
> +}
> +
> +static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
> +				  size_t len, int nonblock, int flags,
> +				  int *addr_len)
> +{
> +	struct unix_sock *u = unix_sk(sk);
> +	struct sk_psock *psock;
> +	int copied, ret;
> +
> +	psock = sk_psock_get(sk);
> +	if (unlikely(!psock))
> +		return __unix_dgram_recvmsg(sk, msg, len, flags);
> +
> +	mutex_lock(&u->iolock);

u->iolock mutex is owned here.

> +	if (!skb_queue_empty(&sk->sk_receive_queue) &&
> +	    sk_psock_queue_empty(psock)) {
> +		ret = __unix_dgram_recvmsg(sk, msg, len, flags);

But __unix_dgram_recvmsg() will also try to grab this mutex ?

> +		goto out;
> +	}
> +
> +msg_bytes_ready:
> +	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
> +	if (!copied) {
> +		long timeo;
> +		int data;
> +
> +		timeo = sock_rcvtimeo(sk, nonblock);
> +		data = unix_msg_wait_data(sk, psock, timeo);
> +		if (data) {
> +			if (!sk_psock_queue_empty(psock))
> +				goto msg_bytes_ready;
> +			ret = __unix_dgram_recvmsg(sk, msg, len, flags);
> +			goto out;
> +		}
> +		copied = -EAGAIN;
> +	}
> +	ret = copied;
> +out:
> +	mutex_unlock(&u->iolock);
> +	sk_psock_put(sk, psock);
> +	return ret;
> +}
> +
>  static struct proto *unix_prot_saved __read_mostly;
>  static DEFINE_SPINLOCK(unix_prot_lock);
>  static struct proto unix_bpf_prot;
> @@ -14,6 +88,7 @@ static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base
>  {
>  	*prot        = *base;
>  	prot->close  = sock_map_close;
> +	prot->recvmsg = unix_dgram_bpf_recvmsg;
>  }
>  
>  static void unix_bpf_check_needs_rebuild(struct proto *ops)
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ