[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87msebf7nv.ffs@tglx>
Date: Mon, 24 Feb 2025 20:45:56 +0100
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Anna-Maria Behnsen <anna-maria@...utronix.de>, Frederic Weisbecker
<frederic@...nel.org>, Benjamin Segall <bsegall@...gle.com>, Eric Dumazet
<edumazet@...gle.com>, Andrey Vagin <avagin@...nvz.org>, Pavel Tikhomirov
<ptikhomirov@...tuozzo.com>, Peter Zijlstra <peterz@...radead.org>
Subject: Re: [patch 07/11] posix-timers: Improve hash table performance
On Mon, Feb 24 2025 at 11:15, Thomas Gleixner wrote:
There are two more long hanging fruits:
1) The hashing is suboptimal and can simply be improved by using
jhash32(), which gives a way better distribution in the
pathological test case with 1.2M timers
2) Avoid false sharing
struct k_itimer has the hlist_node which is used for lookup in
the hash bucket and the timer lock in the same cache line.
That's obviously bad, if one CPU fiddles with a timer and the
other is walking the hash bucket on which that timer is queued.
That can be avoided by restructuring struct k_itimer, so that
the read mostly (only modified during setup and teardown)
fields are in the first cache line and the lock and the rest of
the fields which get written to are in cacheline 2-N.
Combo patch below.
Thanks,
tglx
---
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -179,23 +179,26 @@ static inline void posix_cputimers_init_
* @rcu: RCU head for freeing the timer.
*/
struct k_itimer {
- struct hlist_node list;
- struct hlist_node ignored_list;
+ /* 1st cacheline contains read-mostly fields */
struct hlist_node t_hash;
- spinlock_t it_lock;
- const struct k_clock *kclock;
- clockid_t it_clock;
+ struct hlist_node list;
timer_t it_id;
+ clockid_t it_clock;
+ int it_sigev_notify;
+ enum pid_type it_pid_type;
+ struct signal_struct *it_signal;
+ const struct k_clock *kclock;
+
+ /* 2nd cacheline and above contain fields which are modified regularly */
+ spinlock_t it_lock;
int it_status;
bool it_sig_periodic;
s64 it_overrun;
s64 it_overrun_last;
unsigned int it_signal_seq;
unsigned int it_sigqueue_seq;
- int it_sigev_notify;
- enum pid_type it_pid_type;
ktime_t it_interval;
- struct signal_struct *it_signal;
+ struct hlist_node ignored_list;
union {
struct pid *it_pid;
struct task_struct *it_process;
@@ -212,7 +215,7 @@ struct k_itimer {
} alarm;
} it;
struct rcu_head rcu;
-};
+} ____cacheline_aligned_in_smp;
void run_posix_cpu_timers(void);
void posix_cpu_timers_exit(struct task_struct *task);
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -11,8 +11,8 @@
*/
#include <linux/compat.h>
#include <linux/compiler.h>
-#include <linux/hash.h>
#include <linux/init.h>
+#include <linux/jhash.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/memblock.h>
@@ -48,11 +48,11 @@ struct timer_hash_bucket {
static struct {
struct timer_hash_bucket *buckets;
- unsigned long bits;
+ unsigned long mask;
} __timer_data __ro_after_init __aligned(2*sizeof(long));
#define timer_buckets (__timer_data.buckets)
-#define timer_hashbits (__timer_data.bits)
+#define timer_hashmask (__timer_data.mask)
static const struct k_clock * const posix_clocks[];
static const struct k_clock *clockid_to_kclock(const clockid_t id);
@@ -74,15 +74,15 @@ static struct k_itimer *__lock_timer(tim
__timr; \
})
-static int hash(struct signal_struct *sig, unsigned int nr)
+static struct timer_hash_bucket *hash_bucket(struct signal_struct *sig, unsigned int nr)
{
- return hash_32(hash32_ptr(sig) ^ nr, timer_hashbits);
+ return &timer_buckets[jhash2((u32 *)&sig, sizeof(sig) / sizeof(u32), nr) & timer_hashmask];
}
static struct k_itimer *posix_timer_by_id(timer_t id)
{
struct signal_struct *sig = current->signal;
- struct timer_hash_bucket *bucket = &timer_buckets[hash(sig, id)];
+ struct timer_hash_bucket *bucket = hash_bucket(sig, id);
struct k_itimer *timer;
hlist_for_each_entry_rcu(timer, &bucket->head, t_hash) {
@@ -119,7 +119,7 @@ static bool posix_timer_hashed(struct ti
static bool posix_timer_add_at(struct k_itimer *timer, struct signal_struct *sig, unsigned int id)
{
- struct timer_hash_bucket *bucket = &timer_buckets[hash(sig, id)];
+ struct timer_hash_bucket *bucket = hash_bucket(sig, id);
scoped_guard (spinlock, &bucket->lock) {
/*
@@ -260,9 +260,9 @@ static int posix_get_hrtimer_res(clockid
static __init int init_posix_timers(void)
{
- posix_timers_cache = kmem_cache_create("posix_timers_cache",
- sizeof(struct k_itimer), 0,
- SLAB_PANIC | SLAB_ACCOUNT, NULL);
+ posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof(struct k_itimer),
+ __alignof__(struct k_itimer),
+ SLAB_PANIC | SLAB_ACCOUNT, NULL);
return 0;
}
__initcall(init_posix_timers);
@@ -424,8 +424,7 @@ void posixtimer_free_timer(struct k_itim
static void posix_timer_unhash_and_free(struct k_itimer *tmr)
{
- unsigned int idx = hash(posix_sig_owner(tmr), tmr->it_id);
- struct timer_hash_bucket *bucket = &timer_buckets[idx];
+ struct timer_hash_bucket *bucket = hash_bucket(posix_sig_owner(tmr), tmr->it_id);
scoped_guard (spinlock, &bucket->lock)
hlist_del_rcu(&tmr->t_hash);
@@ -1611,7 +1610,7 @@ static int __init posixtimer_init(void)
timer_buckets = alloc_large_system_hash("posixtimers", sizeof(*timer_buckets),
size, 0, 0, &shift, NULL, size, size);
size = 1UL << shift;
- timer_hashbits = ilog2(size);
+ timer_hashmask = size - 1;
for (i = 0; i < size; i++) {
spin_lock_init(&timer_buckets[i].lock);
Powered by blists - more mailing lists