lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 28 Nov 2020 15:27:23 +0100
From:   Oleksandr Natalenko <oleksandr@...alenko.name>
To:     linux-kernel@...r.kernel.org
Cc:     linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
        Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Steven Rostedt <rostedt@...dmis.org>,
        Mike Galbraith <efault@....de>,
        Thomas Gleixner <tglx@...utronix.de>,
        linux-rt-users@...r.kernel.org
Subject: Re: scheduling while atomic in z3fold

On Sat, Nov 28, 2020 at 03:09:24PM +0100, Oleksandr Natalenko wrote:
> > While running v5.10-rc5-rt11 I bumped into the following:
> > 
> > ```
> > BUG: scheduling while atomic: git/18695/0x00000002
> > Preemption disabled at:
> > [<ffffffffbb93fcb3>] z3fold_zpool_malloc+0x463/0x6e0
> > …
> > Call Trace:
> >  dump_stack+0x6d/0x88
> >  __schedule_bug.cold+0x88/0x96
> >  __schedule+0x69e/0x8c0
> >  preempt_schedule_lock+0x51/0x150
> >  rt_spin_lock_slowlock_locked+0x117/0x2c0
> >  rt_spin_lock_slowlock+0x58/0x80
> >  rt_spin_lock+0x2a/0x40
> >  z3fold_zpool_malloc+0x4c1/0x6e0
> >  zswap_frontswap_store+0x39c/0x980
> >  __frontswap_store+0x6e/0xf0
> >  swap_writepage+0x39/0x70
> >  shmem_writepage+0x31b/0x490
> >  pageout+0xf4/0x350
> >  shrink_page_list+0xa28/0xcc0
> >  shrink_inactive_list+0x300/0x690
> >  shrink_lruvec+0x59a/0x770
> >  shrink_node+0x2d6/0x8d0
> >  do_try_to_free_pages+0xda/0x530
> >  try_to_free_pages+0xff/0x260
> >  __alloc_pages_slowpath.constprop.0+0x3d5/0x1230
> >  __alloc_pages_nodemask+0x2f6/0x350
> >  allocate_slab+0x3da/0x660
> >  ___slab_alloc+0x4ff/0x760
> >  __slab_alloc.constprop.0+0x7a/0x100
> >  kmem_cache_alloc+0x27b/0x2c0
> >  __d_alloc+0x22/0x230
> >  d_alloc_parallel+0x67/0x5e0
> >  __lookup_slow+0x5c/0x150
> >  path_lookupat+0x2ea/0x4d0
> >  filename_lookup+0xbf/0x210
> >  vfs_statx.constprop.0+0x4d/0x110
> >  __do_sys_newlstat+0x3d/0x80
> >  do_syscall_64+0x33/0x40
> >  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > ```
> > 
> > The preemption seems to be disabled here:
> > 
> > ```
> > $ scripts/faddr2line mm/z3fold.o z3fold_zpool_malloc+0x463
> > z3fold_zpool_malloc+0x463/0x6e0:
> > add_to_unbuddied at mm/z3fold.c:645
> > (inlined by) z3fold_alloc at mm/z3fold.c:1195
> > (inlined by) z3fold_zpool_malloc at mm/z3fold.c:1737
> > ```
> > 
> > The call to the rt_spin_lock() seems to be here:
> > 
> > ```
> > $ scripts/faddr2line mm/z3fold.o z3fold_zpool_malloc+0x4c1
> > z3fold_zpool_malloc+0x4c1/0x6e0:
> > add_to_unbuddied at mm/z3fold.c:649
> > (inlined by) z3fold_alloc at mm/z3fold.c:1195
> > (inlined by) z3fold_zpool_malloc at mm/z3fold.c:1737
> > ```
> > 
> > Or, in source code:
> > 
> > ```
> >  639 /* Add to the appropriate unbuddied list */
> >  640 static inline void add_to_unbuddied(struct z3fold_pool *pool,
> >  641                 struct z3fold_header *zhdr)
> >  642 {
> >  643     if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
> >  644             zhdr->middle_chunks == 0) {
> >  645         struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
> >  646
> >  647         int freechunks = num_free_chunks(zhdr);
> >  648         spin_lock(&pool->lock);
> >  649         list_add(&zhdr->buddy, &unbuddied[freechunks]);
> >  650         spin_unlock(&pool->lock);
> >  651         zhdr->cpu = smp_processor_id();
> >  652         put_cpu_ptr(pool->unbuddied);
> >  653     }
> >  654 }
> > ```
> > 
> > Shouldn't the list manipulation be protected with
> > local_lock+this_cpu_ptr instead of get_cpu_ptr+spin_lock?

Totally untested:

```
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 18feaa0bc537..53fcb80c6167 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,6 +41,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/local_lock.h>
 #include <linux/zpool.h>
 #include <linux/magic.h>
 #include <linux/kmemleak.h>
@@ -156,6 +157,7 @@ struct z3fold_pool {
 	const char *name;
 	spinlock_t lock;
 	spinlock_t stale_lock;
+	local_lock_t llock;
 	struct list_head *unbuddied;
 	struct list_head lru;
 	struct list_head stale;
@@ -642,14 +644,17 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
 {
 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
 			zhdr->middle_chunks == 0) {
-		struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
+		struct list_head *unbuddied;
+		int freechunks;
+		local_lock(&pool->llock);
+		unbuddied = *this_cpu_ptr(&pool->unbuddied);
 
-		int freechunks = num_free_chunks(zhdr);
+		freechunks = num_free_chunks(zhdr);
 		spin_lock(&pool->lock);
 		list_add(&zhdr->buddy, &unbuddied[freechunks]);
 		spin_unlock(&pool->lock);
 		zhdr->cpu = smp_processor_id();
-		put_cpu_ptr(pool->unbuddied);
+		local_unlock(&pool->llock);
 	}
 }
 
@@ -887,7 +892,8 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
 
 lookup:
 	/* First, try to find an unbuddied z3fold page. */
-	unbuddied = get_cpu_ptr(pool->unbuddied);
+	local_lock(&pool->llock);
+	unbuddied = *this_cpu_ptr(&pool->unbuddied);
 	for_each_unbuddied_list(i, chunks) {
 		struct list_head *l = &unbuddied[i];
 
@@ -905,7 +911,7 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
 		    !z3fold_page_trylock(zhdr)) {
 			spin_unlock(&pool->lock);
 			zhdr = NULL;
-			put_cpu_ptr(pool->unbuddied);
+			local_unlock(&pool->llock);
 			if (can_sleep)
 				cond_resched();
 			goto lookup;
@@ -919,7 +925,7 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
 		    test_bit(PAGE_CLAIMED, &page->private)) {
 			z3fold_page_unlock(zhdr);
 			zhdr = NULL;
-			put_cpu_ptr(pool->unbuddied);
+			local_unlock(&pool->llock);
 			if (can_sleep)
 				cond_resched();
 			goto lookup;
@@ -934,7 +940,7 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
 		kref_get(&zhdr->refcount);
 		break;
 	}
-	put_cpu_ptr(pool->unbuddied);
+	local_unlock(&pool->llock);
 
 	if (!zhdr) {
 		int cpu;
@@ -1005,6 +1011,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 		goto out_c;
 	spin_lock_init(&pool->lock);
 	spin_lock_init(&pool->stale_lock);
+	local_lock_init(&pool->llock);
 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
 	if (!pool->unbuddied)
 		goto out_pool;

```

-- 
  Oleksandr Natalenko (post-factum)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ