lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250312151848.RlB_XuHA@linutronix.de>
Date: Wed, 12 Mar 2025 16:18:48 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
	Darren Hart <dvhart@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>, Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Waiman Long <longman@...hat.com>
Subject: Re: [PATCH v10 00/21] futex: Add support task local hash maps,
 FUTEX2_NUMA and FUTEX2_MPOL

On 2025-03-12 16:16:13 [+0100], To linux-kernel@...r.kernel.org wrote:
> The complete tree is at
> 	https://git.kernel.org/pub/scm/linux/kernel/git/bigeasy/staging.git/log/?h=futex_local_v10
> 	https://git.kernel.org/pub/scm/linux/kernel/git/bigeasy/staging.git futex_local_v10
> 
> v9…v10: https://lore.kernel.org/all/20250225170914.289358-1-bigeasy@linutronix.de/
The exact diff vs peterz/locking/futex:

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 0cdd5882e89c1..19c37afa0432a 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -82,12 +82,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 int futex_hash_prctl(unsigned long arg2, unsigned long arg3);
 
-#ifdef CONFIG_BASE_SMALL
-static inline int futex_hash_allocate_default(void) { return 0; }
-static inline void futex_hash_free(struct mm_struct *mm) { }
-static inline void futex_mm_init(struct mm_struct *mm) { }
-#else /* !CONFIG_BASE_SMALL */
-
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 int futex_hash_allocate_default(void);
 void futex_hash_free(struct mm_struct *mm);
 
@@ -97,7 +92,11 @@ static inline void futex_mm_init(struct mm_struct *mm)
 	mutex_init(&mm->futex_hash_lock);
 }
 
-#endif /* CONFIG_BASE_SMALL */
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate_default(void) { return 0; }
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
 
 #else /* !CONFIG_FUTEX */
 static inline void futex_init_task(struct task_struct *tsk) { }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9399ee7d40201..e0e8adbe66bdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -938,7 +938,7 @@ struct mm_struct {
 		 */
 		seqcount_t mm_lock_seq;
 #endif
-#if defined(CONFIG_FUTEX) && !defined(CONFIG_BASE_SMALL)
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 		struct mutex			futex_hash_lock;
 		struct futex_private_hash	__rcu *futex_phash;
 		struct futex_private_hash	*futex_phash_new;
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
index 6322d8c1c6b42..2fb2af6d98249 100644
--- a/include/linux/rcuref.h
+++ b/include/linux/rcuref.h
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
  * rcuref_read - Read the number of held reference counts of a rcuref
  * @ref:	Pointer to the reference count
  *
- * Return: The number of held references (0 ... N)
+ * Return: The number of held references (0 ... N). The value 0 does not
+ * indicate that it is safe to schedule the object, protected by this reference
+ * counter, for deconstruction.
+ * If you want to know if the reference counter has been marked DEAD (as
+ * signaled by rcuref_put()) please use rcuread_is_dead().
  */
 static inline unsigned int rcuref_read(rcuref_t *ref)
 {
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
 	return c >= RCUREF_RELEASED ? 0 : c + 1;
 }
 
+/**
+ * rcuref_is_dead -	Check if the rcuref has been already marked dead
+ * @ref:		Pointer to the reference count
+ *
+ * Return: True if the object has been marked DEAD. This signals that a previous
+ * invocation of rcuref_put() returned true on this reference counter meaning
+ * the protected object can safely be scheduled for deconstruction.
+ * Otherwise, returns false.
+ */
+static inline bool rcuref_is_dead(rcuref_t *ref)
+{
+	unsigned int c = atomic_read(&ref->refcnt);
+
+	return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
+}
+
 extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
 
 /**
diff --git a/init/Kconfig b/init/Kconfig
index a0ea04c177842..a4502a9077e03 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1683,6 +1683,16 @@ config FUTEX_PI
 	depends on FUTEX && RT_MUTEXES
 	default y
 
+config FUTEX_PRIVATE_HASH
+	bool
+	depends on FUTEX && !BASE_SMALL && MMU
+	default y
+
+config FUTEX_MPOL
+	bool
+	depends on FUTEX && NUMA
+	default y
+
 config EPOLL
 	bool "Enable eventpoll support" if EXPERT
 	default y
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 976a487bf3ad5..65523f3cfe32e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -136,7 +136,7 @@ static inline bool futex_key_is_private(union futex_key *key)
 static struct futex_hash_bucket *
 __futex_hash(union futex_key *key, struct futex_private_hash *fph);
 
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 static struct futex_hash_bucket *
 __futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
 {
@@ -196,12 +196,12 @@ static bool __futex_pivot_hash(struct mm_struct *mm,
 {
 	struct futex_private_hash *fph;
 
-	lockdep_assert_held(&mm->futex_hash_lock);
 	WARN_ON_ONCE(mm->futex_phash_new);
 
-	fph = mm->futex_phash;
+	fph = rcu_dereference_protected(mm->futex_phash,
+					lockdep_is_held(&mm->futex_hash_lock));
 	if (fph) {
-		if (rcuref_read(&fph->users) != 0) {
+		if (!rcuref_is_dead(&fph->users)) {
 			mm->futex_phash_new = new;
 			return false;
 		}
@@ -262,6 +262,10 @@ bool futex_private_hash_get(struct futex_private_hash *fph)
 
 void futex_private_hash_put(struct futex_private_hash *fph)
 {
+	/*
+	 * Ignore the result; the DEAD state is picked up
+	 * when rcuref_get() starts failing via rcuref_is_dead().
+	 */
 	if (rcuref_put(&fph->users))
 		wake_up_var(fph->mm);
 }
@@ -301,7 +305,7 @@ void futex_hash_put(struct futex_hash_bucket *hb)
 	futex_private_hash_put(fph);
 }
 
-#else
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
 
 static inline struct futex_hash_bucket *
 __futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
@@ -314,8 +318,9 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
 	return __futex_hash(key, NULL);
 }
 
-#endif /* CONFIG_BASE_SMALL */
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
 
+#ifdef CONFIG_FUTEX_MPOL
 static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma = vma_lookup(mm, addr);
@@ -325,7 +330,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
 	if (!vma)
 		return FUTEX_NO_NODE;
 
-	mpol = vma->vm_policy;
+	mpol = vma_policy(vma);
 	if (!mpol)
 		return FUTEX_NO_NODE;
 
@@ -373,6 +378,14 @@ static int futex_mpol(struct mm_struct *mm, unsigned long addr)
 	guard(mmap_read_lock)(mm);
 	return __futex_key_to_node(mm, addr);
 }
+#else /* !CONFIG_FUTEX_MPOL */
+
+static int futex_mpol(struct mm_struct *mm, unsigned long addr)
+{
+	return FUTEX_NO_NODE;
+}
+
+#endif /* CONFIG_FUTEX_MPOL */
 
 /**
  * futex_hash - Return the hash bucket in the global hash
@@ -420,7 +433,6 @@ __futex_hash(union futex_key *key, struct futex_private_hash *fph)
 	return &futex_queues[node][hash & futex_hashmask];
 }
 
-
 /**
  * futex_setup_timer - set up the sleeping hrtimer.
  * @time:	ptr to the given timeout value
@@ -932,9 +944,6 @@ int futex_unqueue(struct futex_q *q)
 
 void futex_q_lockptr_lock(struct futex_q *q)
 {
-#if 0
-	struct futex_hash_bucket *hb;
-#endif
 	spinlock_t *lock_ptr;
 
 	/*
@@ -949,18 +958,6 @@ void futex_q_lockptr_lock(struct futex_q *q)
 		spin_unlock(lock_ptr);
 		goto retry;
 	}
-#if 0
-	hb = container_of(lock_ptr, struct futex_hash_bucket, lock);
-	/*
-	 * The caller needs to either hold a reference on the hash (to ensure
-	 * that the hash is not resized) _or_ be enqueued on the hash. This
-	 * ensures that futex_q::lock_ptr is updated while moved to the new
-	 * hash during resize.
-	 * Once the hash bucket is locked the resize operation, which might be
-	 * in progress, will block on the lock.
-	 */
-	return hb;
-#endif
 }
 
 /*
@@ -1497,7 +1494,7 @@ void futex_exit_release(struct task_struct *tsk)
 static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
 				   struct futex_private_hash *fph)
 {
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 	fhb->priv = fph;
 #endif
 	atomic_set(&fhb->waiters, 0);
@@ -1505,21 +1502,30 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
 	spin_lock_init(&fhb->lock);
 }
 
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 void futex_hash_free(struct mm_struct *mm)
 {
+	struct futex_private_hash *fph;
+
 	kvfree(mm->futex_phash_new);
-	kvfree(mm->futex_phash);
+	fph = rcu_dereference_raw(mm->futex_phash);
+	if (fph) {
+		WARN_ON_ONCE(rcuref_read(&fph->users) > 1);
+		kvfree(fph);
+	}
 }
 
 static bool futex_pivot_pending(struct mm_struct *mm)
 {
+	struct futex_private_hash *fph;
+
 	guard(rcu)();
 
 	if (!mm->futex_phash_new)
 		return false;
 
-	return !rcuref_read(&mm->futex_phash->users);
+	fph = rcu_dereference(mm->futex_phash);
+	return !rcuref_read(&fph->users);
 }
 
 static bool futex_hash_less(struct futex_private_hash *a,
@@ -1560,7 +1566,7 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
 	 */
 	scoped_guard (rcu) {
 		fph = rcu_dereference(mm->futex_phash);
-		if (fph && !mm->futex_phash->hash_mask) {
+		if (fph && !fph->hash_mask) {
 			if (custom)
 				return -EBUSY;
 			return 0;
@@ -1591,7 +1597,8 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
 		struct futex_private_hash *free __free(kvfree) = NULL;
 		struct futex_private_hash *cur, *new;
 
-		cur = mm->futex_phash;
+		cur = rcu_dereference_protected(mm->futex_phash,
+						lockdep_is_held(&mm->futex_hash_lock));
 		new = mm->futex_phash_new;
 		mm->futex_phash_new = NULL;
 
@@ -1602,7 +1609,7 @@ static int futex_hash_allocate(unsigned int hash_slots, bool custom)
 				 * allocated a replacement hash, drop the initial
 				 * reference on the existing hash.
 				 */
-				futex_private_hash_put(mm->futex_phash);
+				futex_private_hash_put(cur);
 			}
 
 			if (new) {
@@ -1683,7 +1690,7 @@ static int futex_hash_get_slots(void)
 
 static int futex_hash_allocate(unsigned int hash_slots, bool custom)
 {
-	return 0;
+	return -EINVAL;
 }
 
 static int futex_hash_get_slots(void)
@@ -1723,6 +1730,7 @@ static int __init futex_init(void)
 #else
 	hashsize = 256 * num_possible_cpus();
 	hashsize /= num_possible_nodes();
+	hashsize = max(4, hashsize);
 	hashsize = roundup_pow_of_two(hashsize);
 #endif
 	futex_hashshift = ilog2(hashsize);
@@ -1740,12 +1748,15 @@ static int __init futex_init(void)
 		BUG_ON(!table);
 
 		for (i = 0; i < hashsize; i++)
-			futex_hash_bucket_init(&table[i], 0);
+			futex_hash_bucket_init(&table[i], NULL);
 
 		futex_queues[n] = table;
 	}
 
 	futex_hashmask = hashsize - 1;
+	pr_info("futex hash table entries: %lu (%lu bytes on %d NUMA nodes, total %lu KiB, %s).\n",
+		hashsize, size, num_possible_nodes(), size * num_possible_nodes() / 1024,
+		order > MAX_PAGE_ORDER ? "vmalloc" : "linear");
 	return 0;
 }
 core_initcall(futex_init);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 40f06523a3565..52e9c0c4b6c87 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -223,14 +223,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 
 extern struct futex_hash_bucket *futex_hash(union futex_key *key);
 
-#ifndef CONFIG_BASE_SMALL
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
 extern void futex_hash_get(struct futex_hash_bucket *hb);
 extern void futex_hash_put(struct futex_hash_bucket *hb);
 
 extern struct futex_private_hash *futex_private_hash(void);
 extern bool futex_private_hash_get(struct futex_private_hash *fph);
 extern void futex_private_hash_put(struct futex_private_hash *fph);
-#else
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
 static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
 static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index baa79abdaf037..d04e601a8f4d7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -209,6 +209,11 @@ EXPORT_SYMBOL(vmalloc_noprof);
 
 void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof);
 
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
+{
+	return vmalloc_huge_noprof(size, gfp_mask);
+}
+
 /*
  *	vzalloc - allocate virtually contiguous memory with zero fill
  *
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 39fe43183a64f..69247b46413ca 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3968,9 +3968,9 @@ EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
 
 void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
 {
-	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-				    gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
-				    node, __builtin_return_address(0));
+	return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+					   gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+					   node, __builtin_return_address(0));
 }
 
 /**


Sebastian

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ