[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240118214347.36109-1-kuniyu@amazon.com>
Date: Thu, 18 Jan 2024 13:43:47 -0800
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <edumazet@...gle.com>
CC: <davem@...emloft.net>, <dsahern@...nel.org>, <eric.dumazet@...il.com>,
<kuba@...nel.org>, <netdev@...r.kernel.org>, <pabeni@...hat.com>,
<willemb@...gle.com>, Kuniyuki Iwashima <kuniyu@...zon.com>
Subject: Re: [PATCH v3 net] udp: fix busy polling
From: Eric Dumazet <edumazet@...gle.com>
Date: Thu, 18 Jan 2024 20:17:49 +0000
> Generic sk_busy_loop_end() only looks at sk->sk_receive_queue
> for presence of packets.
>
> Problem is that for UDP sockets after blamed commit, some packets
> could be present in another queue: udp_sk(sk)->reader_queue
>
> In some cases, a busy poller could spin until timeout expiration,
> even if some packets are available in udp_sk(sk)->reader_queue.
>
> v3: - make sk_busy_loop_end() nicer (Willem)
>
> v2: - add a READ_ONCE(sk->sk_family) in sk_is_inet() to avoid KCSAN splats.
> - add a sk_is_inet() check in sk_is_udp() (Willem feedback)
> - add a sk_is_inet() check in sk_is_tcp().
>
> Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception")
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Reviewed-by: Paolo Abeni <pabeni@...hat.com>
> Reviewed-by: Willem de Bruijn <willemb@...gle.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@...zon.com>
IPPROTO_UDPLITE will have the same issue but not worth
adding sk_is_udp_or_udplite() for a single place given
no one has complained about duplication notice.
> ---
> include/linux/skmsg.h | 6 ------
> include/net/inet_sock.h | 5 -----
> include/net/sock.h | 18 +++++++++++++++++-
> net/core/sock.c | 11 +++++++++--
> 4 files changed, 26 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index 888a4b217829fd4d6baf52f784ce35e9ad6bd0ed..e65ec3fd27998a5b82fc2c4597c575125e653056 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -505,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
> return !!psock->saved_data_ready;
> }
>
> -static inline bool sk_is_udp(const struct sock *sk)
> -{
> - return sk->sk_type == SOCK_DGRAM &&
> - sk->sk_protocol == IPPROTO_UDP;
> -}
> -
> #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
>
> #define BPF_F_STRPARSER (1UL << 1)
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index aa86453f6b9ba367f772570a7b783bb098be6236..d94c242eb3ed20b2c5b2e5ceea3953cf96341fb7 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -307,11 +307,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
> #define inet_assign_bit(nr, sk, val) \
> assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
>
> -static inline bool sk_is_inet(struct sock *sk)
> -{
> - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
> -}
> -
> /**
> * sk_to_full_sk - Access to a full socket
> * @sk: pointer to a socket
> diff --git a/include/net/sock.h b/include/net/sock.h
> index a7f815c7cfdfdf1296be2967fd100efdb10cdd63..54ca8dcbfb4335d657b5cea323aa7d8c4316d49e 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2765,9 +2765,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
> &skb_shinfo(skb)->tskey);
> }
>
> +static inline bool sk_is_inet(const struct sock *sk)
> +{
> + int family = READ_ONCE(sk->sk_family);
> +
> + return family == AF_INET || family == AF_INET6;
> +}
> +
> static inline bool sk_is_tcp(const struct sock *sk)
> {
> - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
> + return sk_is_inet(sk) &&
> + sk->sk_type == SOCK_STREAM &&
> + sk->sk_protocol == IPPROTO_TCP;
> +}
> +
> +static inline bool sk_is_udp(const struct sock *sk)
> +{
> + return sk_is_inet(sk) &&
> + sk->sk_type == SOCK_DGRAM &&
> + sk->sk_protocol == IPPROTO_UDP;
> }
>
> static inline bool sk_is_stream_unix(const struct sock *sk)
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 158dbdebce6a3693deb63e557e856d9cdd7500ae..0a7f46c37f0cfc169e11377107c8342c229da0de 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -107,6 +107,7 @@
> #include <linux/interrupt.h>
> #include <linux/poll.h>
> #include <linux/tcp.h>
> +#include <linux/udp.h>
> #include <linux/init.h>
> #include <linux/highmem.h>
> #include <linux/user_namespace.h>
> @@ -4144,8 +4145,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
> {
> struct sock *sk = p;
>
> - return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
> - sk_busy_loop_timeout(sk, start_time);
> + if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
> + return true;
> +
> + if (sk_is_udp(sk) &&
> + !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
> + return true;
> +
> + return sk_busy_loop_timeout(sk, start_time);
> }
> EXPORT_SYMBOL(sk_busy_loop_end);
> #endif /* CONFIG_NET_RX_BUSY_POLL */
> --
> 2.43.0.429.g432eaa2c6b-goog
>
Powered by blists - more mailing lists