lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 09 Dec 2020 07:13:31 +0100
From:   Mike Galbraith <efault@....de>
To:     Vitaly Wool <vitaly.wool@...sulko.com>
Cc:     Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Oleksandr Natalenko <oleksandr@...alenko.name>,
        LKML <linux-kernel@...r.kernel.org>,
        Linux-MM <linux-mm@...ck.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        linux-rt-users@...r.kernel.org
Subject: Re: scheduling while atomic in z3fold

On Wed, 2020-12-09 at 00:26 +0100, Vitaly Wool wrote:
> Hi Mike,
>
> On 2020-12-07 16:41, Mike Galbraith wrote:
> > On Mon, 2020-12-07 at 16:21 +0100, Vitaly Wool wrote:
> >> On Mon, Dec 7, 2020 at 1:34 PM Mike Galbraith <efault@....de> wrote:
> >>>
> >>
> >>> Unfortunately, that made zero difference.
> >>
> >> Okay, I suggest that you submit the patch that changes read_lock() to
> >> write_lock() in __release_z3fold_page() and I'll ack it then.
> >> I would like to rewrite the code so that write_lock is not necessary
> >> there but I don't want to hold you back and it isn't likely that I'll
> >> complete this today.
> >
> > Nah, I'm in no rush... especially not to sign off on "Because the
> > little voices in my head said this bit should look like that bit over
> > yonder, and testing _seems_ to indicate they're right about that" :)
> >
> > 	-Mike
> >
>
> okay, thanks. Would this make things better:

Yup, z3fold became RT tolerant with this (un-munged and) applied.

BTW, I don't think my write_lock() hacklet actually plugged the hole
that leads to the below.  I think it just reduced the odds of the two
meeting enough to make it look ~solid in fairly limited test time.

[ 3369.373023] kworker/-7413      4.....12 3368809247us : do_compact_page: zhdr: ffff95d93abd8000 zhdr->slots: ffff95d951f5df80 zhdr->slots->slot[0]: 0
[ 3369.373025] kworker/-7413      4.....12 3368809248us : do_compact_page: old_handle ffff95d951f5df98 *old_handle was ffff95d93abd804f now is ffff95da3ab8104c
[ 3369.373027] kworker/-7413      4.....11 3368809249us : __release_z3fold_page.constprop.25: freed ffff95d951f5df80
[ 3369.373028] ---------------------------------
[ 3369.373029] CR2: 0000000000000018
crash> p debug_handle | grep '\[2'
  [2]: ffff95dc1ecac0d0
crash> rd ffff95dc1ecac0d0
ffff95dc1ecac0d0:  ffff95d951f5df98                    ...Q....
crash> p debug_zhdr | grep '\[2'
  [2]: ffff95dc1ecac0c8
crash> rd ffff95dc1ecac0c8
ffff95dc1ecac0c8:  ffff95da3ab81000                    ...:....  <== kworker is not working on same zhdr as free_handle()..
crash> p debug_slots | grep '\[2'
  [2]: ffff95dc1ecac0c0
crash> rd ffff95dc1ecac0c0
ffff95dc1ecac0c0:  ffff95d951f5df80                    ...Q....  <== ..but is the same slots, and frees it under free_handle().
crash> bt -sx                                                          leading to use after free+corruption+explosion 1us later.
PID: 9334   TASK: ffff95d95a1eb3c0  CPU: 2   COMMAND: "swapoff"
 #0 [ffffb4248847f8f0] machine_kexec+0x16e at ffffffffa605f8ce
 #1 [ffffb4248847f938] __crash_kexec+0xd2 at ffffffffa614c162
 #2 [ffffb4248847f9f8] crash_kexec+0x30 at ffffffffa614d350
 #3 [ffffb4248847fa08] oops_end+0xca at ffffffffa602680a
 #4 [ffffb4248847fa28] no_context+0x14d at ffffffffa606d7cd
 #5 [ffffb4248847fa88] exc_page_fault+0x2b8 at ffffffffa68bdb88
 #6 [ffffb4248847fae0] asm_exc_page_fault+0x1e at ffffffffa6a00ace
 #7 [ffffb4248847fb68] mark_wakeup_next_waiter+0x51 at ffffffffa60ea121
 #8 [ffffb4248847fbd0] rt_mutex_futex_unlock+0x4f at ffffffffa68c93ef
 #9 [ffffb4248847fc10] z3fold_zpool_free+0x593 at ffffffffc0ecb663 [z3fold]
#10 [ffffb4248847fc78] zswap_free_entry+0x43 at ffffffffa627c823
#11 [ffffb4248847fc88] zswap_frontswap_invalidate_page+0x8a at ffffffffa627c92a
#12 [ffffb4248847fcb0] __frontswap_invalidate_page+0x48 at ffffffffa627c018
#13 [ffffb4248847fcd8] swapcache_free_entries+0x1ee at ffffffffa6276f5e
#14 [ffffb4248847fd20] free_swap_slot+0x9f at ffffffffa627b8ff
#15 [ffffb4248847fd40] delete_from_swap_cache+0x61 at ffffffffa6274621
#16 [ffffb4248847fd60] try_to_free_swap+0x70 at ffffffffa6277520
#17 [ffffb4248847fd70] unuse_vma+0x55c at ffffffffa627869c
#18 [ffffb4248847fe90] try_to_unuse+0x139 at ffffffffa6278e89
#19 [ffffb4248847fee8] __x64_sys_swapoff+0x1eb at ffffffffa62798cb
#20 [ffffb4248847ff40] do_syscall_64+0x33 at ffffffffa68b9ab3
#21 [ffffb4248847ff50] entry_SYSCALL_64_after_hwframe+0x44 at ffffffffa6a0007c
    RIP: 00007fbd835a5d17  RSP: 00007ffd60634458  RFLAGS: 00000202
    RAX: ffffffffffffffda  RBX: 0000559540e34b60  RCX: 00007fbd835a5d17
    RDX: 0000000000000001  RSI: 0000000000000001  RDI: 0000559540e34b60
    RBP: 0000000000000001   R8: 00007ffd606344c0   R9: 0000000000000003
    R10: 0000559540e34721  R11: 0000000000000202  R12: 0000000000000001
    R13: 0000000000000000  R14: 00007ffd606344c0  R15: 0000000000000000
    ORIG_RAX: 00000000000000a8  CS: 0033  SS: 002b
crash>

>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 18feaa0bc537..340c38a5ffac 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -303,10 +303,9 @@ static inline void put_z3fold_header(struct
> z3fold_header *zhdr)
>   		z3fold_page_unlock(zhdr);
>   }
>
> -static inline void free_handle(unsigned long handle)
> +static inline void free_handle(unsigned long handle, struct
> z3fold_header *zhdr)
>   {
>   	struct z3fold_buddy_slots *slots;
> -	struct z3fold_header *zhdr;
>   	int i;
>   	bool is_free;
>
> @@ -316,22 +315,13 @@ static inline void free_handle(unsigned long handle)
>   	if (WARN_ON(*(unsigned long *)handle == 0))
>   		return;
>
> -	zhdr = handle_to_z3fold_header(handle);
>   	slots = handle_to_slots(handle);
>   	write_lock(&slots->lock);
>   	*(unsigned long *)handle = 0;
> -	if (zhdr->slots == slots) {
> -		write_unlock(&slots->lock);
> -		return; /* simple case, nothing else to do */
> -	}
> +	if (zhdr->slots != slots)
> +		zhdr->foreign_handles--;
>
> -	/* we are freeing a foreign handle if we are here */
> -	zhdr->foreign_handles--;
>   	is_free = true;
> -	if (!test_bit(HANDLES_ORPHANED, &slots->pool)) {
> -		write_unlock(&slots->lock);
> -		return;
> -	}
>   	for (i = 0; i <= BUDDY_MASK; i++) {
>   		if (slots->slot[i]) {
>   			is_free = false;
> @@ -343,6 +333,8 @@ static inline void free_handle(unsigned long handle)
>   	if (is_free) {
>   		struct z3fold_pool *pool = slots_to_pool(slots);
>
> +		if (zhdr->slots == slots)
> +			zhdr->slots = NULL;
>   		kmem_cache_free(pool->c_handle, slots);
>   	}
>   }
> @@ -525,8 +517,6 @@ static void __release_z3fold_page(struct
> z3fold_header *zhdr, bool locked)
>   {
>   	struct page *page = virt_to_page(zhdr);
>   	struct z3fold_pool *pool = zhdr_to_pool(zhdr);
> -	bool is_free = true;
> -	int i;
>
>   	WARN_ON(!list_empty(&zhdr->buddy));
>   	set_bit(PAGE_STALE, &page->private);
> @@ -536,21 +526,6 @@ static void __release_z3fold_page(struct
> z3fold_header *zhdr, bool locked)
>   		list_del_init(&page->lru);
>   	spin_unlock(&pool->lock);
>
> -	/* If there are no foreign handles, free the handles array */
> -	read_lock(&zhdr->slots->lock);
> -	for (i = 0; i <= BUDDY_MASK; i++) {
> -		if (zhdr->slots->slot[i]) {
> -			is_free = false;
> -			break;
> -		}
> -	}
> -	if (!is_free)
> -		set_bit(HANDLES_ORPHANED, &zhdr->slots->pool);
> -	read_unlock(&zhdr->slots->lock);
> -
> -	if (is_free)
> -		kmem_cache_free(pool->c_handle, zhdr->slots);
> -
>   	if (locked)
>   		z3fold_page_unlock(zhdr);
>
> @@ -973,6 +948,9 @@ static inline struct z3fold_header
> *__z3fold_alloc(struct z3fold_pool *pool,
>   		}
>   	}
>
> +	if (zhdr && !zhdr->slots)
> +		zhdr->slots = alloc_slots(pool,
> +					can_sleep ? GFP_NOIO : GFP_ATOMIC);
>   	return zhdr;
>   }
>
> @@ -1270,7 +1248,7 @@ static void z3fold_free(struct z3fold_pool *pool,
> unsigned long handle)
>   	}
>
>   	if (!page_claimed)
> -		free_handle(handle);
> +		free_handle(handle, zhdr);
>   	if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
>   		atomic64_dec(&pool->pages_nr);
>   		return;
> @@ -1429,19 +1407,19 @@ static int z3fold_reclaim_page(struct
> z3fold_pool *pool, unsigned int retries)
>   			ret = pool->ops->evict(pool, middle_handle);
>   			if (ret)
>   				goto next;
> -			free_handle(middle_handle);
> +			free_handle(middle_handle, zhdr);
>   		}
>   		if (first_handle) {
>   			ret = pool->ops->evict(pool, first_handle);
>   			if (ret)
>   				goto next;
> -			free_handle(first_handle);
> +			free_handle(first_handle, zhdr);
>   		}
>   		if (last_handle) {
>   			ret = pool->ops->evict(pool, last_handle);
>   			if (ret)
>   				goto next;
> -			free_handle(last_handle);
> +			free_handle(last_handle, zhdr);
>   		}
>   next:
>   		if (test_bit(PAGE_HEADLESS, &page->private)) {
>
> --
>
> Best regards,
>     Vitaly

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ