linux-kernel - Re: [PATCH 3/5] mm: Do not stall in synchronous compaction for THP allocations

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 18 Nov 2011 18:34:02 +0100
From:	Andrea Arcangeli <aarcange@...hat.com>
To:	Mel Gorman <mgorman@...e.de>
Cc:	Linux-MM <linux-mm@...ck.org>, Minchan Kim <minchan.kim@...il.com>,
	Jan Kara <jack@...e.cz>, Andy Isaacson <adi@...apodia.org>,
	Johannes Weiner <jweiner@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 3/5] mm: Do not stall in synchronous compaction for THP
 allocations

On Fri, Nov 18, 2011 at 04:58:42PM +0000, Mel Gorman wrote:
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 3a76faf..ef1b1af 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -83,7 +83,18 @@ struct vm_area_struct;
>  #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
>  #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
>  
> +/*
> + * __GFP_NO_KSWAPD indicates that the VM should favour failing the allocation
> + * over excessive disruption of the system. Currently this means
> + * 1. Do not wake kswapd (hence the flag name)
> + * 2. Do not use stall in synchronous compaction for high-order allocations
> + *    as this may cause the caller to stall writing out pages
> + *
> + * This flag it primarily intended for use with transparent hugepage support.
> + * If the flag is used outside the VM, linux-mm should be cc'd for review.
> + */
>  #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
> +
>  #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
>  
>  /*
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 9dd443d..7a5c5b1 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2168,7 +2168,14 @@ rebalance:
>  					sync_migration);
>  	if (page)
>  		goto got_pg;
> -	sync_migration = true;
> +
> +	/*
> +	 * Do not use sync migration if __GFP_NO_KSWAPD is used to indicate
> +	 * the system should not be heavily disrupted. In practice, this is
> +	 * to avoid THP callers being stalled in writeback during migration
> +	 * as it's preferable for the the allocations to fail than to stall
> +	 */
> +	sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
>  
>  	/* Try direct reclaim and then allocating */
>  	page = __alloc_pages_direct_reclaim(gfp_mask, order,

I don't like this anymore. Also with sync migration compaction will
only scan only the migrate movable pageblocks.

What I'm running right now is this:

From: Andrea Arcangeli <aarcange@...hat.com>
Subject: [PATCH] compaction: avoid overwork in migrate sync mode

Add a lightweight sync migration (sync == 2) mode that avoids overwork
so more suitable to be used by compaction to provide lower latency but
still write throttling.

Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
 include/linux/migrate.h |    8 ++--
 mm/compaction.c         |    2 +-
 mm/memory-failure.c     |    2 +-
 mm/memory_hotplug.c     |    2 +-
 mm/mempolicy.c          |    4 +-
 mm/migrate.c            |   95 +++++++++++++++++++++++++++++++----------------
 6 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e39aeec..f26fc0e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -14,10 +14,10 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			int sync);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			int sync);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -36,10 +36,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		int sync) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		int sync) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index 899d956..48a3579 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -555,7 +555,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync);
+				cc->sync ? 2 : 0);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479..d8a41d3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags)
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-								0, true);
+				    false, 1);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2168489..e1d6176 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		}
 		/* this function returns # of failed pages */
 		ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
-								true, true);
+				    true, 1);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index adc3954..0bf88ed 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -933,7 +933,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-								false, true);
+				    false, 1);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1154,7 +1154,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
-						false, true);
+						false, 1);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e291..612c3ba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -564,7 +564,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-					int remap_swapcache, bool sync)
+			    int remap_swapcache, bool force)
 {
 	struct address_space *mapping;
 	int rc;
@@ -588,11 +588,11 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 		rc = migrate_page(mapping, newpage, page);
 	else {
 		/*
-		 * Do not writeback pages if !sync and migratepage is
+		 * Do not writeback pages if !force and migratepage is
 		 * not pointing to migrate_page() which is nonblocking
 		 * (swapcache/tmpfs uses migratepage = migrate_page).
 		 */
-		if (PageDirty(page) && !sync &&
+		if (PageDirty(page) && !force &&
 		    mapping->a_ops->migratepage != migrate_page)
 			rc = -EBUSY;
 		else if (mapping->a_ops->migratepage)
@@ -622,7 +622,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-				int force, bool offlining, bool sync)
+			    bool force, bool offlining)
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
@@ -631,7 +631,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
-		if (!force || !sync)
+		if (!force)
 			goto out;
 
 		/*
@@ -676,14 +676,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	BUG_ON(charge);
 
 	if (PageWriteback(page)) {
-		/*
-		 * For !sync, there is no point retrying as the retry loop
-		 * is expected to be too short for PageWriteback to be cleared
-		 */
-		if (!sync) {
-			rc = -EBUSY;
-			goto uncharge;
-		}
 		if (!force)
 			goto uncharge;
 		wait_on_page_writeback(page);
@@ -751,7 +743,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 skip_unmap:
 	if (!page_mapped(page))
-		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+		rc = move_to_new_page(newpage, page, remap_swapcache, force);
 
 	if (rc && remap_swapcache)
 		remove_migration_ptes(page, page);
@@ -774,7 +766,7 @@ out:
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, bool offlining, bool sync)
+			  struct page *page, bool force, bool offlining)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -792,7 +784,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		if (unlikely(split_huge_page(page)))
 			goto out;
 
-	rc = __unmap_and_move(page, newpage, force, offlining, sync);
+	rc = __unmap_and_move(page, newpage, force, offlining);
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -840,7 +832,7 @@ out:
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				unsigned long private, struct page *hpage,
-				int force, bool offlining, bool sync)
+				bool force, bool offlining)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -853,7 +845,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	rc = -EAGAIN;
 
 	if (!trylock_page(hpage)) {
-		if (!force || !sync)
+		if (!force)
 			goto out;
 		lock_page(hpage);
 	}
@@ -864,7 +856,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1, sync);
+		rc = move_to_new_page(new_hpage, hpage, 1, force);
 
 	if (rc)
 		remove_migration_ptes(hpage, hpage);
@@ -907,11 +899,11 @@ out:
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		int sync)
 {
 	int retry = 1;
 	int nr_failed = 0;
-	int pass = 0;
+	int pass, passes;
 	struct page *page;
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
@@ -920,15 +912,34 @@ int migrate_pages(struct list_head *from,
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
 
-	for(pass = 0; pass < 10 && retry; pass++) {
+	switch (sync) {
+	case 0:
+		/* 3 async  0 sync */
+		passes = 3;
+		break;
+	case 1:
+		/* 3 async, 7 sync */
+		passes = 10;
+		break;
+	case 2:
+		/*
+		 * sync = 2 asks for a lightweight synchronous mode:
+		 * 3 async, 2 sync.
+		 */
+		passes = 5;
+		break;
+	default:
+		BUG();
+	}
+
+	for(pass = 0; pass < passes && retry; pass++) {
 		retry = 0;
 
 		list_for_each_entry_safe(page, page2, from, lru) {
 			cond_resched();
 
-			rc = unmap_and_move(get_new_page, private,
-						page, pass > 2, offlining,
-						sync);
+			rc = unmap_and_move(get_new_page, private, page,
+					    pass > 2, offlining);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -958,24 +969,44 @@ out:
 
 int migrate_huge_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		int sync)
 {
 	int retry = 1;
 	int nr_failed = 0;
-	int pass = 0;
+	int pass, passes;
 	struct page *page;
 	struct page *page2;
 	int rc;
 
-	for (pass = 0; pass < 10 && retry; pass++) {
+	switch (sync) {
+	case 0:
+		/* 3 async  0 sync */
+		passes = 3;
+		break;
+	case 1:
+		/* 3 async, 7 sync */
+		passes = 10;
+		break;
+	case 2:
+		/*
+		 * sync = 2 asks for a lightweight synchronous mode:
+		 * 3 async, 2 sync.
+		 */
+		passes = 5;
+		break;
+	default:
+		BUG();
+	}
+
+	for (pass = 0; pass < passes && retry; pass++) {
 		retry = 0;
 
 		list_for_each_entry_safe(page, page2, from, lru) {
 			cond_resched();
 
-			rc = unmap_and_move_huge_page(get_new_page,
-					private, page, pass > 2, offlining,
-					sync);
+			rc = unmap_and_move_huge_page(get_new_page, private,
+						      page, pass > 2,
+						      offlining);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1104,7 +1135,7 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, true);
+				    (unsigned long)pm, false, 1);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/