[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJNCK_vqBOk0nsBGmB1GAPMp7N3AT3g_SeMWDXUOH0GgQ@mail.gmail.com>
Date: Thu, 2 Dec 2021 09:18:40 -0800
From: Eric Dumazet <edumazet@...gle.com>
To: Akhmat Karakotov <hmukos@...dex-team.ru>
Cc: eric.dumazet@...il.com, mitradir@...dex-team.ru,
netdev@...r.kernel.org, tom@...bertland.com, zeil@...dex-team.ru
Subject: Re: [RFC PATCH v2 net-next 2/4] txhash: Add socket option to control
TX hash rethink behavior
On Thu, Dec 2, 2021 at 8:41 AM Akhmat Karakotov <hmukos@...dex-team.ru> wrote:
>
> Add the SO_TXREHASH socket option to control hash rethink behavior per socket.
> When default mode is set, sockets disable rehash at initialization and use
> sysctl option when entering listen state. setsockopt() overrides default
> behavior.
>
> Signed-off-by: Akhmat Karakotov <hmukos@...dex-team.ru>
> ---
> arch/alpha/include/uapi/asm/socket.h | 2 ++
> arch/mips/include/uapi/asm/socket.h | 2 ++
> arch/parisc/include/uapi/asm/socket.h | 2 ++
> arch/sparc/include/uapi/asm/socket.h | 2 ++
> include/net/sock.h | 12 +++---------
> include/uapi/asm-generic/socket.h | 2 ++
> include/uapi/linux/socket.h | 1 +
> net/core/sock.c | 13 +++++++++++++
> net/ipv4/inet_connection_sock.c | 3 +++
> 9 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index 1dd9baf4a6c2..e6b3f38f8c0e 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -131,6 +131,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 1eaf6a1ca561..2c8085ecde0a 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -142,6 +142,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index 8baaad52d799..8bb78ed36e97 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -123,6 +123,8 @@
>
> #define SO_BUF_LOCK 0x4046
>
> +#define SO_TXREHASH 0x4047
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index e80ee8641ac3..cd43a690fbac 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -124,6 +124,8 @@
>
> #define SO_BUF_LOCK 0x0051
>
> +#define SO_TXREHASH 0x0052
> +
> #if !defined(__KERNEL__)
>
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index cc83140d6502..26c0efd7aa4b 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -313,6 +313,7 @@ struct bpf_local_storage;
> * @sk_rcvtimeo: %SO_RCVTIMEO setting
> * @sk_sndtimeo: %SO_SNDTIMEO setting
> * @sk_txhash: computed flow hash for use on transmit
> + * @sk_txrehash: enable TX hash rethink
> * @sk_filter: socket filtering instructions
> * @sk_timer: sock cleanup timer
> * @sk_stamp: time stamp of last packet received
> @@ -462,6 +463,7 @@ struct sock {
> unsigned int sk_gso_max_size;
> gfp_t sk_allocation;
> __u32 sk_txhash;
> + u8 sk_txrehash;
This adds a 7 byte hole.
Perhaps put this right after sk_prefer_busy_poll, since there is a
1-byte hole there.
Same cache line.
>
> /*
> * Because of non atomicity rules, all
> @@ -1954,18 +1956,10 @@ static inline void sk_set_txhash(struct sock *sk)
>
> static inline bool sk_rethink_txhash(struct sock *sk)
> {
> - u8 rehash;
> -
> - if (!sk->sk_txhash)
> - return false;
> -
> - rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
> -
> - if (rehash) {
> + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
> sk_set_txhash(sk);
> return true;
> }
> -
> return false;
> }
>
> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> index 1f0a2b4864e4..6c17e477ec9f 100644
> --- a/include/uapi/asm-generic/socket.h
> +++ b/include/uapi/asm-generic/socket.h
> @@ -126,6 +126,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
> diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
> index 0accd6102ece..75fab2ada8cf 100644
> --- a/include/uapi/linux/socket.h
> +++ b/include/uapi/linux/socket.h
> @@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage {
>
> #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
>
> +#define SOCK_TXREHASH_DEFAULT -1
> #define SOCK_TXREHASH_DISABLED 0
> #define SOCK_TXREHASH_ENABLED 1
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 62627e868e03..ca349ca4c31d 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1367,6 +1367,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
> ~SOCK_BUF_LOCK_MASK);
> break;
>
> + case SO_TXREHASH:
> + if (val < -1 || val > 1) {
> + ret = -EINVAL;
> + break;
> + }
> + sk->sk_txrehash = val;
> + break;
> +
> default:
> ret = -ENOPROTOOPT;
> break;
> @@ -1733,6 +1741,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
> v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
> break;
>
> + case SO_TXREHASH:
> + v.val = sk->sk_txrehash;
> + break;
> +
> default:
> /* We implement the SO_SNDLOWAT etc to not be settable
> * (1003.1g 7).
> @@ -3165,6 +3177,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
> sk->sk_pacing_rate = ~0UL;
> WRITE_ONCE(sk->sk_pacing_shift, 10);
> sk->sk_incoming_cpu = -1;
> + sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
>
> sk_rx_queue_clear(sk);
> /*
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index f25d02ad4a8a..0d477c816309 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -1046,6 +1046,9 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> sk->sk_ack_backlog = 0;
> inet_csk_delack_init(sk);
>
> + if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
> + sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
> +
> /* There is race window here: we announce ourselves listening,
> * but this transition is still not validated by get_port().
> * It is OK, because this socket enters to hash table only
> --
> 2.17.1
>
Powered by blists - more mailing lists