linux-kernel - Re: [patch 01/10] mm: memcg: fix compaction/migration failing due to memcg limits

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120712085354.GA3181@kernel>
Date:	Thu, 12 Jul 2012 16:54:07 +0800
From:	Wanpeng Li <liwp.linux@...il.com>
To:	Johannes Weiner <hannes@...xchg.org>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
	Michal Hocko <mhocko@...e.cz>, Hugh Dickins <hughd@...gle.com>,
	David Rientjes <rientjes@...gle.com>,
	Wanpeng Li <liwp.linux@...il.com>, linux-mm@...ck.org,
	cgroups@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [patch 01/10] mm: memcg: fix compaction/migration failing due to
 memcg limits

On Wed, Jul 11, 2012 at 07:02:13PM +0200, Johannes Weiner wrote:
>Compaction (and page migration in general) can currently be hindered
>through pages being owned by memory cgroups that are at their limits
>and unreclaimable.
>
>The reason is that the replacement page is being charged against the
>limit while the page being replaced is also still charged.  But this
>seems unnecessary, given that only one of the two pages will still be
>in use after migration finishes.
>
>This patch changes the memcg migration sequence so that the
>replacement page is not charged.  Whatever page is still in use after
>successful or failed migration gets to keep the charge of the page
>that was going to be replaced.
>
>The replacement page will still show up temporarily in the rss/cache
>statistics, this can be fixed in a later patch as it's less urgent.
>

So I want to know after this patch be merged if mem_cgroup_wait_acct_move
still make sense, if the answer is no, I will send a patch to remove it.

>Reported-by: David Rientjes <rientjes@...gle.com>
>Signed-off-by: Johannes Weiner <hannes@...xchg.org>
>Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
>Acked-by: Michal Hocko <mhocko@...e.cz>
>---
> include/linux/memcontrol.h |   11 +++----
> mm/memcontrol.c            |   67 +++++++++++++++++++++++--------------------
> mm/migrate.c               |   27 ++++--------------
> 3 files changed, 47 insertions(+), 58 deletions(-)
>
>diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>index 5a3ee64..8d9489f 100644
>--- a/include/linux/memcontrol.h
>+++ b/include/linux/memcontrol.h
>@@ -98,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
> 
> extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
> 
>-extern int
>-mem_cgroup_prepare_migration(struct page *page,
>-	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
>+extern void
>+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>+			     struct mem_cgroup **memcgp);
> extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
> 	struct page *oldpage, struct page *newpage, bool migration_ok);
> 
>@@ -276,11 +276,10 @@ static inline struct cgroup_subsys_state
> 	return NULL;
> }
> 
>-static inline int
>+static inline void
> mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>-	struct mem_cgroup **memcgp, gfp_t gfp_mask)
>+			     struct mem_cgroup **memcgp)
> {
>-	return 0;
> }
> 
> static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>index e8ddc00..12ee2de 100644
>--- a/mm/memcontrol.c
>+++ b/mm/memcontrol.c
>@@ -2977,7 +2977,8 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
>  * uncharge if !page_mapped(page)
>  */
> static struct mem_cgroup *
>-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
>+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
>+			     bool end_migration)
> {
> 	struct mem_cgroup *memcg = NULL;
> 	unsigned int nr_pages = 1;
>@@ -3021,7 +3022,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
> 		/* fallthrough */
> 	case MEM_CGROUP_CHARGE_TYPE_DROP:
> 		/* See mem_cgroup_prepare_migration() */
>-		if (page_mapped(page) || PageCgroupMigration(pc))
>+		if (page_mapped(page))
>+			goto unlock_out;
>+		/*
>+		 * Pages under migration may not be uncharged.  But
>+		 * end_migration() /must/ be the one uncharging the
>+		 * unused post-migration page and so it has to call
>+		 * here with the migration bit still set.  See the
>+		 * res_counter handling below.
>+		 */
>+		if (!end_migration && PageCgroupMigration(pc))
> 			goto unlock_out;
> 		break;
> 	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
>@@ -3055,7 +3065,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
> 		mem_cgroup_swap_statistics(memcg, true);
> 		mem_cgroup_get(memcg);
> 	}
>-	if (!mem_cgroup_is_root(memcg))
>+	/*
>+	 * Migration does not charge the res_counter for the
>+	 * replacement page, so leave it alone when phasing out the
>+	 * page that is unused after the migration.
>+	 */
>+	if (!end_migration && !mem_cgroup_is_root(memcg))
> 		mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
> 
> 	return memcg;
>@@ -3071,14 +3086,14 @@ void mem_cgroup_uncharge_page(struct page *page)
> 	if (page_mapped(page))
> 		return;
> 	VM_BUG_ON(page->mapping && !PageAnon(page));
>-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
>+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
> }
> 
> void mem_cgroup_uncharge_cache_page(struct page *page)
> {
> 	VM_BUG_ON(page_mapped(page));
> 	VM_BUG_ON(page->mapping);
>-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
>+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
> }
> 
> /*
>@@ -3142,7 +3157,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
> 	if (!swapout) /* this was a swap cache but the swap is unused ! */
> 		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
> 
>-	memcg = __mem_cgroup_uncharge_common(page, ctype);
>+	memcg = __mem_cgroup_uncharge_common(page, ctype, false);
> 
> 	/*
> 	 * record memcg information,  if swapout && memcg != NULL,
>@@ -3232,19 +3247,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
>  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
>  * page belongs to.
>  */
>-int mem_cgroup_prepare_migration(struct page *page,
>-	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
>+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>+				  struct mem_cgroup **memcgp)
> {
> 	struct mem_cgroup *memcg = NULL;
> 	struct page_cgroup *pc;
> 	enum charge_type ctype;
>-	int ret = 0;
> 
> 	*memcgp = NULL;
> 
> 	VM_BUG_ON(PageTransHuge(page));
> 	if (mem_cgroup_disabled())
>-		return 0;
>+		return;
> 
> 	pc = lookup_page_cgroup(page);
> 	lock_page_cgroup(pc);
>@@ -3289,24 +3303,9 @@ int mem_cgroup_prepare_migration(struct page *page,
> 	 * we return here.
> 	 */
> 	if (!memcg)
>-		return 0;
>+		return;
> 
> 	*memcgp = memcg;
>-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
>-	css_put(&memcg->css);/* drop extra refcnt */
>-	if (ret) {
>-		if (PageAnon(page)) {
>-			lock_page_cgroup(pc);
>-			ClearPageCgroupMigration(pc);
>-			unlock_page_cgroup(pc);
>-			/*
>-			 * The old page may be fully unmapped while we kept it.
>-			 */
>-			mem_cgroup_uncharge_page(page);
>-		}
>-		/* we'll need to revisit this error code (we have -EINTR) */
>-		return -ENOMEM;
>-	}
> 	/*
> 	 * We charge new page before it's used/mapped. So, even if unlock_page()
> 	 * is called before end_migration, we can catch all events on this new
>@@ -3319,8 +3318,12 @@ int mem_cgroup_prepare_migration(struct page *page,
> 		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
> 	else
> 		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
>+	/*
>+	 * The page is committed to the memcg, but it's not actually
>+	 * charged to the res_counter since we plan on replacing the
>+	 * old one and only one page is going to be left afterwards.
>+	 */
> 	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
>-	return ret;
> }
> 
> /* remove redundant charge if migration failed*/
>@@ -3342,6 +3345,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
> 		used = newpage;
> 		unused = oldpage;
> 	}
>+	anon = PageAnon(used);
>+	__mem_cgroup_uncharge_common(unused,
>+		anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>+		     : MEM_CGROUP_CHARGE_TYPE_CACHE,
>+		true);
>+	css_put(&memcg->css);
> 	/*
> 	 * We disallowed uncharge of pages under migration because mapcount
> 	 * of the page goes down to zero, temporarly.
>@@ -3351,10 +3360,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
> 	lock_page_cgroup(pc);
> 	ClearPageCgroupMigration(pc);
> 	unlock_page_cgroup(pc);
>-	anon = PageAnon(used);
>-	__mem_cgroup_uncharge_common(unused,
>-		anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>-		     : MEM_CGROUP_CHARGE_TYPE_CACHE);
> 
> 	/*
> 	 * If a page is a file cache, radix-tree replacement is very atomic
>diff --git a/mm/migrate.c b/mm/migrate.c
>index 8137aea..aa06bf4 100644
>--- a/mm/migrate.c
>+++ b/mm/migrate.c
>@@ -687,7 +687,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
> {
> 	int rc = -EAGAIN;
> 	int remap_swapcache = 1;
>-	int charge = 0;
> 	struct mem_cgroup *mem;
> 	struct anon_vma *anon_vma = NULL;
> 
>@@ -729,12 +728,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
> 	}
> 
> 	/* charge against new page */
>-	charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
>-	if (charge == -ENOMEM) {
>-		rc = -ENOMEM;
>-		goto unlock;
>-	}
>-	BUG_ON(charge);
>+	mem_cgroup_prepare_migration(page, newpage, &mem);
> 
> 	if (PageWriteback(page)) {
> 		/*
>@@ -824,8 +818,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
> 		put_anon_vma(anon_vma);
> 
> uncharge:
>-	if (!charge)
>-		mem_cgroup_end_migration(mem, page, newpage, rc == 0);
>+	mem_cgroup_end_migration(mem, page, newpage, rc == 0);
> unlock:
> 	unlock_page(page);
> out:
>@@ -1519,10 +1512,9 @@ migrate_misplaced_page(struct page *page, struct mm_struct *mm, int node)
> {
> 	struct page *oldpage = page, *newpage;
> 	struct address_space *mapping = page_mapping(page);
>-	struct mem_cgroup *mcg;
>+	struct mem_cgroup *memcg;
> 	unsigned int gfp;
> 	int rc = 0;
>-	int charge = -ENOMEM;
> 
> 	VM_BUG_ON(!PageLocked(page));
> 	VM_BUG_ON(page_mapcount(page));
>@@ -1556,12 +1548,7 @@ migrate_misplaced_page(struct page *page, struct mm_struct *mm, int node)
> 	if (!trylock_page(newpage))
> 		BUG();		/* new page should be unlocked!!! */
> 
>-	// XXX hnaz, is this right?
>-	charge = mem_cgroup_prepare_migration(page, newpage, &mcg, gfp);
>-	if (charge == -ENOMEM) {
>-		rc = charge;
>-		goto out;
>-	}
>+	mem_cgroup_prepare_migration(page, newpage, &memcg);
> 
> 	newpage->index = page->index;
> 	newpage->mapping = page->mapping;
>@@ -1581,11 +1568,9 @@ migrate_misplaced_page(struct page *page, struct mm_struct *mm, int node)
> 		page = newpage;
> 	}
> 
>+	mem_cgroup_end_migration(memcg, oldpage, newpage, !rc);
> out:
>-	if (!charge)
>-		mem_cgroup_end_migration(mcg, oldpage, newpage, !rc);
>-
>-       if (oldpage != page)
>+	if (oldpage != page)
>                put_page(oldpage);
> 
> 	if (rc) {
>-- 
>1.7.7.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/