[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f21dfe61-58e6-ba03-cc0a-b5d2fd0a88c6@gmail.com>
Date: Mon, 25 Oct 2021 14:05:39 -0700
From: Eric Dumazet <eric.dumazet@...il.com>
To: Akhmat Karakotov <hmukos@...dex-team.ru>, netdev@...r.kernel.org
Cc: tom@...bertland.com, mitradir@...dex-team.ru, zeil@...dex-team.ru
Subject: Re: [RFC PATCH net-next 2/4] txhash: Add socket option to control TX
hash rethink behavior
On 10/25/21 1:35 PM, Akhmat Karakotov wrote:
> Add the SO_TXREHASH socket option to control hash rethink behavior per socket.
> When default mode is set, sockets disable rehash at initialization and use
> sysctl option when entering listen state. setsockopt() overrides default
> behavior.
What values are accepted, and what are their meaning ?
It seems weird to do anything in inet_csk_listen_start()
For sockets that have not used SO_TXREHASH
(this includes passive sockets where their parent did not use SO_TXREHASH),
the sysctl _current_ value should be used every time we consider a rehash.
>
> Signed-off-by: Akhmat Karakotov <hmukos@...dex-team.ru>
> ---
> arch/alpha/include/uapi/asm/socket.h | 2 ++
> arch/mips/include/uapi/asm/socket.h | 2 ++
> arch/parisc/include/uapi/asm/socket.h | 2 ++
> arch/sparc/include/uapi/asm/socket.h | 2 ++
> include/net/sock.h | 12 +++---------
> include/uapi/asm-generic/socket.h | 2 ++
> include/uapi/linux/socket.h | 1 +
> net/core/net_namespace.c | 2 +-
> net/core/sock.c | 13 +++++++++++++
> net/ipv4/inet_connection_sock.c | 3 +++
> 10 files changed, 31 insertions(+), 10 deletions(-)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index 1dd9baf4a6c2..e6b3f38f8c0e 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -131,6 +131,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 1eaf6a1ca561..2c8085ecde0a 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -142,6 +142,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index 8baaad52d799..8bb78ed36e97 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -123,6 +123,8 @@
>
> #define SO_BUF_LOCK 0x4046
>
> +#define SO_TXREHASH 0x4047
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index e80ee8641ac3..cd43a690fbac 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -124,6 +124,8 @@
>
> #define SO_BUF_LOCK 0x0051
>
> +#define SO_TXREHASH 0x0052
> +
> #if !defined(__KERNEL__)
>
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index d8a73edb1629..ec4f736ad085 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -313,6 +313,7 @@ struct bpf_local_storage;
> * @sk_rcvtimeo: %SO_RCVTIMEO setting
> * @sk_sndtimeo: %SO_SNDTIMEO setting
> * @sk_txhash: computed flow hash for use on transmit
> + * @sk_txrehash: enable TX hash rethink
> * @sk_filter: socket filtering instructions
> * @sk_timer: sock cleanup timer
> * @sk_stamp: time stamp of last packet received
> @@ -462,6 +463,7 @@ struct sock {
> unsigned int sk_gso_max_size;
> gfp_t sk_allocation;
> __u32 sk_txhash;
> + unsigned int sk_txrehash;
Using 32bit for this socket option is too much.
>
> /*
> * Because of non atomicity rules, all
> @@ -1954,18 +1956,10 @@ static inline void sk_set_txhash(struct sock *sk)
>
> static inline bool sk_rethink_txhash(struct sock *sk)
> {
> - unsigned int rehash;
> -
> - if (!sk->sk_txhash)
> - return false;
> -
> - rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
> -
> - if (rehash) {
> + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
> sk_set_txhash(sk);
> return true;
> }
> -
> return false;
> }
>
> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> index 1f0a2b4864e4..6c17e477ec9f 100644
> --- a/include/uapi/asm-generic/socket.h
> +++ b/include/uapi/asm-generic/socket.h
> @@ -126,6 +126,8 @@
>
> #define SO_BUF_LOCK 72
>
> +#define SO_TXREHASH 73
> +
> #if !defined(__KERNEL__)
>
> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
> diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
> index 0accd6102ece..75fab2ada8cf 100644
> --- a/include/uapi/linux/socket.h
> +++ b/include/uapi/linux/socket.h
> @@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage {
>
> #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
>
> +#define SOCK_TXREHASH_DEFAULT -1
What is default ?
> #define SOCK_TXREHASH_DISABLED 0
> #define SOCK_TXREHASH_ENABLED 1
>
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 0d833b861f00..537a8532ff8a 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -360,7 +360,7 @@ static int __net_init net_defaults_init_net(struct net *net)
> {
> net->core.sysctl_somaxconn = SOMAXCONN;
>
> - net->core.sysctl_txrehash = SOCK_TXREHASH_DISABLED;
> + net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
This is very confusing.
>
> return 0;
> }
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 62627e868e03..ca349ca4c31d 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1367,6 +1367,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
> ~SOCK_BUF_LOCK_MASK);
> break;
>
> + case SO_TXREHASH:
> + if (val < -1 || val > 1) {
> + ret = -EINVAL;
> + break;
> + }
> + sk->sk_txrehash = val;
> + break;
> +
> default:
> ret = -ENOPROTOOPT;
> break;
> @@ -1733,6 +1741,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
> v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
> break;
>
> + case SO_TXREHASH:
> + v.val = sk->sk_txrehash;
> + break;
> +
> default:
> /* We implement the SO_SNDLOWAT etc to not be settable
> * (1003.1g 7).
> @@ -3165,6 +3177,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
> sk->sk_pacing_rate = ~0UL;
> WRITE_ONCE(sk->sk_pacing_shift, 10);
> sk->sk_incoming_cpu = -1;
> + sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
>
> sk_rx_queue_clear(sk);
> /*
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index f25d02ad4a8a..0d477c816309 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -1046,6 +1046,9 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> sk->sk_ack_backlog = 0;
> inet_csk_delack_init(sk);
>
> + if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
> + sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
> +
> /* There is race window here: we announce ourselves listening,
> * but this transition is still not validated by get_port().
> * It is OK, because this socket enters to hash table only
>
Powered by blists - more mailing lists