lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87msebf7nv.ffs@tglx>
Date: Mon, 24 Feb 2025 20:45:56 +0100
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Anna-Maria Behnsen <anna-maria@...utronix.de>, Frederic Weisbecker
 <frederic@...nel.org>, Benjamin Segall <bsegall@...gle.com>, Eric Dumazet
 <edumazet@...gle.com>, Andrey Vagin <avagin@...nvz.org>, Pavel Tikhomirov
 <ptikhomirov@...tuozzo.com>, Peter Zijlstra <peterz@...radead.org>
Subject: Re: [patch 07/11] posix-timers: Improve hash table performance

On Mon, Feb 24 2025 at 11:15, Thomas Gleixner wrote:

There are two more long hanging fruits:

      1) The hashing is suboptimal and can simply be improved by using
         jhash32(), which gives a way better distribution in the
         pathological test case with 1.2M timers

      2) Avoid false sharing

         struct k_itimer has the hlist_node which is used for lookup in
         the hash bucket and the timer lock in the same cache line.

         That's obviously bad, if one CPU fiddles with a timer and the
         other is walking the hash bucket on which that timer is queued.

         That can be avoided by restructuring struct k_itimer, so that
         the read mostly (only modified during setup and teardown)
         fields are in the first cache line and the lock and the rest of
         the fields which get written to are in cacheline 2-N.

Combo patch below.

Thanks,

        tglx
---
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -179,23 +179,26 @@ static inline void posix_cputimers_init_
  * @rcu:		RCU head for freeing the timer.
  */
 struct k_itimer {
-	struct hlist_node	list;
-	struct hlist_node	ignored_list;
+	/* 1st cacheline contains read-mostly fields */
 	struct hlist_node	t_hash;
-	spinlock_t		it_lock;
-	const struct k_clock	*kclock;
-	clockid_t		it_clock;
+	struct hlist_node	list;
 	timer_t			it_id;
+	clockid_t		it_clock;
+	int			it_sigev_notify;
+	enum pid_type		it_pid_type;
+	struct signal_struct	*it_signal;
+	const struct k_clock	*kclock;
+
+	/* 2nd cacheline and above contain fields which are modified regularly */
+	spinlock_t		it_lock;
 	int			it_status;
 	bool			it_sig_periodic;
 	s64			it_overrun;
 	s64			it_overrun_last;
 	unsigned int		it_signal_seq;
 	unsigned int		it_sigqueue_seq;
-	int			it_sigev_notify;
-	enum pid_type		it_pid_type;
 	ktime_t			it_interval;
-	struct signal_struct	*it_signal;
+	struct hlist_node	ignored_list;
 	union {
 		struct pid		*it_pid;
 		struct task_struct	*it_process;
@@ -212,7 +215,7 @@ struct k_itimer {
 		} alarm;
 	} it;
 	struct rcu_head		rcu;
-};
+} ____cacheline_aligned_in_smp;
 
 void run_posix_cpu_timers(void);
 void posix_cpu_timers_exit(struct task_struct *task);
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -11,8 +11,8 @@
  */
 #include <linux/compat.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/init.h>
+#include <linux/jhash.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/memblock.h>
@@ -48,11 +48,11 @@ struct timer_hash_bucket {
 
 static struct {
 	struct timer_hash_bucket	*buckets;
-	unsigned long			bits;
+	unsigned long			mask;
 } __timer_data __ro_after_init __aligned(2*sizeof(long));
 
 #define timer_buckets	(__timer_data.buckets)
-#define timer_hashbits	(__timer_data.bits)
+#define timer_hashmask	(__timer_data.mask)
 
 static const struct k_clock * const posix_clocks[];
 static const struct k_clock *clockid_to_kclock(const clockid_t id);
@@ -74,15 +74,15 @@ static struct k_itimer *__lock_timer(tim
 	__timr;								   \
 })
 
-static int hash(struct signal_struct *sig, unsigned int nr)
+static struct timer_hash_bucket *hash_bucket(struct signal_struct *sig, unsigned int nr)
 {
-	return hash_32(hash32_ptr(sig) ^ nr, timer_hashbits);
+	return &timer_buckets[jhash2((u32 *)&sig, sizeof(sig) / sizeof(u32), nr) & timer_hashmask];
 }
 
 static struct k_itimer *posix_timer_by_id(timer_t id)
 {
 	struct signal_struct *sig = current->signal;
-	struct timer_hash_bucket *bucket = &timer_buckets[hash(sig, id)];
+	struct timer_hash_bucket *bucket = hash_bucket(sig, id);
 	struct k_itimer *timer;
 
 	hlist_for_each_entry_rcu(timer, &bucket->head, t_hash) {
@@ -119,7 +119,7 @@ static bool posix_timer_hashed(struct ti
 
 static bool posix_timer_add_at(struct k_itimer *timer, struct signal_struct *sig, unsigned int id)
 {
-	struct timer_hash_bucket *bucket = &timer_buckets[hash(sig, id)];
+	struct timer_hash_bucket *bucket = hash_bucket(sig, id);
 
 	scoped_guard (spinlock, &bucket->lock) {
 		/*
@@ -260,9 +260,9 @@ static int posix_get_hrtimer_res(clockid
 
 static __init int init_posix_timers(void)
 {
-	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof(struct k_itimer), 0,
-					SLAB_PANIC | SLAB_ACCOUNT, NULL);
+	posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof(struct k_itimer),
+					       __alignof__(struct k_itimer),
+					       SLAB_PANIC | SLAB_ACCOUNT, NULL);
 	return 0;
 }
 __initcall(init_posix_timers);
@@ -424,8 +424,7 @@ void posixtimer_free_timer(struct k_itim
 
 static void posix_timer_unhash_and_free(struct k_itimer *tmr)
 {
-	unsigned int idx = hash(posix_sig_owner(tmr), tmr->it_id);
-	struct timer_hash_bucket *bucket = &timer_buckets[idx];
+	struct timer_hash_bucket *bucket = hash_bucket(posix_sig_owner(tmr), tmr->it_id);
 
 	scoped_guard (spinlock, &bucket->lock)
 		hlist_del_rcu(&tmr->t_hash);
@@ -1611,7 +1610,7 @@ static int __init posixtimer_init(void)
 	timer_buckets = alloc_large_system_hash("posixtimers", sizeof(*timer_buckets),
 						size, 0, 0, &shift, NULL, size, size);
 	size = 1UL << shift;
-	timer_hashbits = ilog2(size);
+	timer_hashmask = size - 1;
 
 	for (i = 0; i < size; i++) {
 		spin_lock_init(&timer_buckets[i].lock);

         

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ