[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54d428e5-651e-2de6-efed-78b61581b9ca@csgroup.eu>
Date: Sun, 2 Oct 2022 08:58:10 +0000
From: Christophe Leroy <christophe.leroy@...roup.eu>
To: Eric Dumazet <eric.dumazet@...il.com>,
"David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
CC: netdev <netdev@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>, Willy Tarreau <w@....eu>
Subject: Re: [PATCH net-next] once: add DO_ONCE_SLOW() for sleepable contexts
Le 01/10/2022 à 22:51, Eric Dumazet a écrit :
> From: Eric Dumazet <edumazet@...gle.com>
>
> Christophe Leroy reported a ~80ms latency spike
> happening at first TCP connect() time.
>
> This is because __inet_hash_connect() uses get_random_once()
> to populate a perturbation table which became quite big
> after commit 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16")
>
> get_random_once() uses DO_ONCE(), which block hard irqs for the duration
> of the operation.
>
> This patch adds DO_ONCE_SLOW() which uses a mutex instead of a spinlock
> for operations where we prefer to stay in process context.
>
> Then __inet_hash_connect() can use get_random_slow_once()
> to populate its perturbation table.
Many thanks for your quick answer and your patch.
It works great, now the irqsoff tracer reports a 2ms latency in a spi
transfert. So the issue with tcp connect is gone.
>
> Fixes: 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16")
> Fixes: 190cc82489f4 ("tcp: change source port randomizarion at connect() time")
> Reported-by: Christophe Leroy <christophe.leroy@...roup.eu>
> Link: https://lore.kernel.org/netdev/CANn89iLAEYBaoYajy0Y9UmGFff5GPxDUoG-ErVB2jDdRNQ5Tug@mail.gmail.com/T/#t
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Cc: Willy Tarreau <w@....eu>
Tested-by: Christophe Leroy <christophe.leroy@...roup.eu>
> ---
> include/linux/once.h | 28 ++++++++++++++++++++++++++++
> lib/once.c | 30 ++++++++++++++++++++++++++++++
> net/ipv4/inet_hashtables.c | 4 ++--
> 3 files changed, 60 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/once.h b/include/linux/once.h
> index b14d8b309d52b198bb144689fe67d9ed235c2b3e..176ab75b42df740a738d04d8480821a0b3b65ba9 100644
> --- a/include/linux/once.h
> +++ b/include/linux/once.h
> @@ -5,10 +5,18 @@
> #include <linux/types.h>
> #include <linux/jump_label.h>
>
> +/* Helpers used from arbitrary contexts.
> + * Hard irqs are blocked, be cautious.
> + */
> bool __do_once_start(bool *done, unsigned long *flags);
> void __do_once_done(bool *done, struct static_key_true *once_key,
> unsigned long *flags, struct module *mod);
>
> +/* Variant for process contexts only. */
> +bool __do_once_slow_start(bool *done);
> +void __do_once_slow_done(bool *done, struct static_key_true *once_key,
> + struct module *mod);
> +
> /* Call a function exactly once. The idea of DO_ONCE() is to perform
> * a function call such as initialization of random seeds, etc, only
> * once, where DO_ONCE() can live in the fast-path. After @func has
> @@ -52,7 +60,27 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
> ___ret; \
> })
>
> +/* Variant of DO_ONCE() for process/sleepable contexts. */
> +#define DO_ONCE_SLOW(func, ...) \
> + ({ \
> + bool ___ret = false; \
> + static bool __section(".data.once") ___done = false; \
> + static DEFINE_STATIC_KEY_TRUE(___once_key); \
> + if (static_branch_unlikely(&___once_key)) { \
> + ___ret = __do_once_slow_start(&___done); \
> + if (unlikely(___ret)) { \
> + func(__VA_ARGS__); \
> + __do_once_slow_done(&___done, &___once_key, \
> + THIS_MODULE); \
> + } \
> + } \
> + ___ret; \
> + })
> +
> #define get_random_once(buf, nbytes) \
> DO_ONCE(get_random_bytes, (buf), (nbytes))
>
> +#define get_random_slow_once(buf, nbytes) \
> + DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes))
> +
> #endif /* _LINUX_ONCE_H */
> diff --git a/lib/once.c b/lib/once.c
> index 59149bf3bfb4a97e4fa7febee737155d700bae48..351f66aad310a47f17d0636da0ed5b2b4460522d 100644
> --- a/lib/once.c
> +++ b/lib/once.c
> @@ -66,3 +66,33 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
> once_disable_jump(once_key, mod);
> }
> EXPORT_SYMBOL(__do_once_done);
> +
> +static DEFINE_MUTEX(once_mutex);
> +
> +bool __do_once_slow_start(bool *done)
> + __acquires(once_mutex)
> +{
> + mutex_lock(&once_mutex);
> + if (*done) {
> + mutex_unlock(&once_mutex);
> + /* Keep sparse happy by restoring an even lock count on
> + * this mutex. In case we return here, we don't call into
> + * __do_once_done but return early in the DO_ONCE_SLOW() macro.
> + */
> + __acquire(once_mutex);
> + return false;
> + }
> +
> + return true;
> +}
> +EXPORT_SYMBOL(__do_once_slow_start);
> +
> +void __do_once_slow_done(bool *done, struct static_key_true *once_key,
> + struct module *mod)
> + __releases(once_mutex)
> +{
> + *done = true;
> + mutex_unlock(&once_mutex);
> + once_disable_jump(once_key, mod);
> +}
> +EXPORT_SYMBOL(__do_once_slow_done);
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index 49db8c597eea83a27e91edc429c2c4779b0a5cd7..dc1c5629cd0d61716d6d99131c57b49717785709 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -958,8 +958,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
> if (likely(remaining > 1))
> remaining &= ~1U;
>
> - net_get_random_once(table_perturb,
> - INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
> + get_random_slow_once(table_perturb,
> + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
> index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
>
> offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
> --
> 2.38.0.rc1.362.ged0d419d3c-goog
>
Powered by blists - more mailing lists