[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20180418160755.GA10227@roeck-us.net>
Date: Wed, 18 Apr 2018 09:07:55 -0700
From: Guenter Roeck <linux@...ck-us.net>
To: Vitaly Wool <vitalywool@...il.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
mawilcox@...rosoft.com, asavery@...omium.org, gwendal@...omium.org
Subject: Re: Crashes/hung tasks with z3pool under memory pressure
On Wed, Apr 18, 2018 at 10:13:17AM +0200, Vitaly Wool wrote:
> Den tis 17 apr. 2018 kl 18:35 skrev Guenter Roeck <linux@...ck-us.net>:
>
> <snip>
>
> > Getting better; the log is much less noisy. Unfortunately, there are still
> > locking problems, resulting in a hung task. I copied the log message to [1].
> > This is with [2] applied on top of v4.17-rc1.
>
> Now this version (this is a full patch to be applied instead of the previous one) should have the above problem resolved too:
>
Excellent - I can not reproduce the problem with this patch
applied.
Guenter
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index c0bca6153b95..901c0b07cbda 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -144,7 +144,8 @@ enum z3fold_page_flags {
> PAGE_HEADLESS = 0,
> MIDDLE_CHUNK_MAPPED,
> NEEDS_COMPACTING,
> - PAGE_STALE
> + PAGE_STALE,
> + UNDER_RECLAIM
> };
>
> /*****************
> @@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
> clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> clear_bit(NEEDS_COMPACTING, &page->private);
> clear_bit(PAGE_STALE, &page->private);
> + clear_bit(UNDER_RECLAIM, &page->private);
>
> spin_lock_init(&zhdr->page_lock);
> kref_init(&zhdr->refcount);
> @@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> atomic64_dec(&pool->pages_nr);
> return;
> }
> + if (test_bit(UNDER_RECLAIM, &page->private)) {
> + z3fold_page_unlock(zhdr);
> + return;
> + }
> if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> z3fold_page_unlock(zhdr);
> return;
> @@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> kref_get(&zhdr->refcount);
> list_del_init(&zhdr->buddy);
> zhdr->cpu = -1;
> + set_bit(UNDER_RECLAIM, &page->private);
> + break;
> }
>
> list_del_init(&page->lru);
> @@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> goto next;
> }
> next:
> - spin_lock(&pool->lock);
> if (test_bit(PAGE_HEADLESS, &page->private)) {
> if (ret == 0) {
> - spin_unlock(&pool->lock);
> free_z3fold_page(page);
> return 0;
> }
> - } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
> - atomic64_dec(&pool->pages_nr);
> + spin_lock(&pool->lock);
> + list_add(&page->lru, &pool->lru);
> + spin_unlock(&pool->lock);
> + } else {
> + z3fold_page_lock(zhdr);
> + clear_bit(UNDER_RECLAIM, &page->private);
> + if (kref_put(&zhdr->refcount,
> + release_z3fold_page_locked)) {
> + atomic64_dec(&pool->pages_nr);
> + return 0;
> + }
> + /*
> + * if we are here, the page is still not completely
> + * free. Take the global pool lock then to be able
extra then ?
> + * to add it back to the lru list
> + */
> + spin_lock(&pool->lock);
> + list_add(&page->lru, &pool->lru);
> spin_unlock(&pool->lock);
> - return 0;
> + z3fold_page_unlock(zhdr);
> }
>
> - /*
> - * Add to the beginning of LRU.
> - * Pool lock has to be kept here to ensure the page has
> - * not already been released
> - */
> - list_add(&page->lru, &pool->lru);
> + /* We started off locked to we need to lock the pool back */
> + spin_lock(&pool->lock);
> }
> spin_unlock(&pool->lock);
> return -EAGAIN;
Powered by blists - more mailing lists