lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230524093246.GP83892@hirez.programming.kicks-ass.net>
Date:   Wed, 24 May 2023 11:32:46 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     torvalds@...ux-foundation.org
Cc:     corbet@....net, will@...nel.org, boqun.feng@...il.com,
        mark.rutland@....com, catalin.marinas@....com, dennis@...nel.org,
        tj@...nel.org, cl@...ux.com, hca@...ux.ibm.com, gor@...ux.ibm.com,
        agordeev@...ux.ibm.com, borntraeger@...ux.ibm.com,
        svens@...ux.ibm.com, tglx@...utronix.de, mingo@...hat.com,
        bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org,
        hpa@...or.com, joro@...tes.org, suravee.suthikulpanit@....com,
        robin.murphy@....com, dwmw2@...radead.org,
        baolu.lu@...ux.intel.com, Arnd Bergmann <arnd@...db.de>,
        Herbert Xu <herbert@...dor.apana.org.au>, davem@...emloft.net,
        penberg@...nel.org, rientjes@...gle.com, iamjoonsoo.kim@....com,
        Andrew Morton <akpm@...ux-foundation.org>, vbabka@...e.cz,
        roman.gushchin@...ux.dev, 42.hyeyoo@...il.com,
        linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, linux-s390@...r.kernel.org,
        iommu@...ts.linux.dev, linux-arch@...r.kernel.org,
        linux-crypto@...r.kernel.org, sfr@...b.auug.org.au,
        mpe@...erman.id.au
Subject: Re: [PATCH v3 08/11] slub: Replace cmpxchg_double()

On Mon, May 15, 2023 at 09:57:07AM +0200, Peter Zijlstra wrote:

> @@ -3008,6 +3029,22 @@ static inline bool pfmemalloc_match(stru
>  }
>  
>  #ifndef CONFIG_SLUB_TINY
> +static inline bool
> +__update_cpu_freelist_fast(struct kmem_cache *s,
> +			   void *freelist_old, void *freelist_new,
> +			   unsigned long tid)
> +{
> +#ifdef system_has_freelist_aba
> +	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
> +	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
> +
> +	return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
> +					 old.full, new.full) == old.full;
> +#else
> +	return false;
> +#endif
> +}
> +
>  /*
>   * Check the slab->freelist and either transfer the freelist to the
>   * per cpu freelist or deactivate the slab.
> @@ -3359,11 +3396,7 @@ static __always_inline void *__slab_allo
>  		 * against code executing on this cpu *not* from access by
>  		 * other cpus.
>  		 */
> -		if (unlikely(!this_cpu_cmpxchg_double(
> -				s->cpu_slab->freelist, s->cpu_slab->tid,
> -				object, tid,
> -				next_object, next_tid(tid)))) {
> -
> +		if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
>  			note_cmpxchg_failure("slab_alloc", s, tid);
>  			goto redo;
>  		}
> @@ -3736,11 +3769,7 @@ static __always_inline void do_slab_free
>  
>  		set_freepointer(s, tail_obj, freelist);
>  
> -		if (unlikely(!this_cpu_cmpxchg_double(
> -				s->cpu_slab->freelist, s->cpu_slab->tid,
> -				freelist, tid,
> -				head, next_tid(tid)))) {
> -
> +		if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
>  			note_cmpxchg_failure("slab_free", s, tid);
>  			goto redo;
>  		}

This isn't right; the this_cpu_cmpxchg_double() was unconditional and
relied on the local_irq_save() fallback when no native cmpxchg128 is
present.

The below delta makes things boot again when system_has_cmpxchg128 is
not defined.

I'm going to zap these patches from tip/locking/core for a few days and
fold the below back into the series and let it run through the robots
again.

---
 mm/slab.h | 20 +++++++++++---------
 mm/slub.c |  6 +-----
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 5880c70de3d6..b191bf68e6e0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -6,36 +6,36 @@
  */
 void __init kmem_cache_init(void);
 
-#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
 #ifdef CONFIG_64BIT
 # ifdef system_has_cmpxchg128
 # define system_has_freelist_aba()	system_has_cmpxchg128()
 # define try_cmpxchg_freelist		try_cmpxchg128
-# define this_cpu_cmpxchg_freelist	this_cpu_cmpxchg128
-typedef u128 freelist_full_t;
 # endif
+#define this_cpu_cmpxchg_freelist	this_cpu_cmpxchg128
+typedef u128 freelist_full_t;
 #else /* CONFIG_64BIT */
 # ifdef system_has_cmpxchg64
 # define system_has_freelist_aba()	system_has_cmpxchg64()
 # define try_cmpxchg_freelist		try_cmpxchg64
-# define this_cpu_cmpxchg_freelist	this_cpu_cmpxchg64
-typedef u64 freelist_full_t;
 # endif
+#define this_cpu_cmpxchg_freelist	this_cpu_cmpxchg64
+typedef u64 freelist_full_t;
 #endif /* CONFIG_64BIT */
-#endif /* CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
+
+#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+#undef system_has_freelist_aba
+#endif
 
 /*
  * Freelist pointer and counter to cmpxchg together, avoids the typical ABA
  * problems with cmpxchg of just a pointer.
  */
 typedef union {
-#ifdef system_has_freelist_aba
 	struct {
 		void *freelist;
 		unsigned long counter;
 	};
 	freelist_full_t full;
-#endif
 } freelist_aba_t;
 
 /* Reuses the bits in struct page */
@@ -82,7 +82,9 @@ struct slab {
 						};
 					};
 				};
+#ifdef system_has_freelist_aba
 				freelist_aba_t freelist_counter;
+#endif
 			};
 		};
 		struct rcu_head rcu_head;
@@ -110,7 +112,7 @@ SLAB_MATCH(memcg_data, memcg_data);
 #undef SLAB_MATCH
 static_assert(sizeof(struct slab) <= sizeof(struct page));
 #if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
-static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
+static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
 #endif
 
 /**
diff --git a/mm/slub.c b/mm/slub.c
index 161b091746b7..af92c770606d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3034,15 +3034,11 @@ __update_cpu_freelist_fast(struct kmem_cache *s,
 			   void *freelist_old, void *freelist_new,
 			   unsigned long tid)
 {
-#ifdef system_has_freelist_aba
 	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
 	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
 
 	return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
 					 old.full, new.full) == old.full;
-#else
-	return false;
-#endif
 }
 
 /*

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ