linux-kernel - [PATCH 1/5] mm: uncharge kmem pages from generic free

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bd8dc6295b2984a55233904fe6e85ff3b32052d7.1443262808.git.vdavydov@parallels.com>
Date:	Sat, 26 Sep 2015 13:45:53 +0300
From:	Vladimir Davydov <vdavydov@...allels.com>
To:	Andrew Morton <akpm@...ux-foundation.org>
CC:	Johannes Weiner <hannes@...xchg.org>,
	Michal Hocko <mhocko@...nel.org>, Tejun Heo <tj@...nel.org>,
	<linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>
Subject: [PATCH 1/5] mm: uncharge kmem pages from generic free_page path

Currently, to charge a page to kmemcg one should use alloc_kmem_pages
helper. When the page is not needed anymore it must be freed with
free_kmem_pages helper, which will uncharge the page before freeing it.
Such a design is acceptable for thread info pages and kmalloc large
allocations, which are currently the only users of alloc_kmem_pages, but
it gets extremely inconvenient if one wants to make use of batched free
(e.g. to charge page tables - see release_pages) or page reference
counter (pipe buffers - see anon_pipe_buf_release).

To overcome this limitation, this patch moves kmemcg uncharge code to
the generic free path and zaps free_kmem_pages helper. To distinguish
kmem pages from other page types, it makes alloc_kmem_pages initialize
page->_mapcount to a special value and introduces a new PageKmem helper,
which returns true if it sees this value.

Signed-off-by: Vladimir Davydov <vdavydov@...allels.com>
---
 include/linux/gfp.h        |  3 ---
 include/linux/page-flags.h | 22 ++++++++++++++++++++++
 kernel/fork.c              |  2 +-
 mm/page_alloc.c            | 26 ++++++++------------------
 mm/slub.c                  |  2 +-
 mm/swap.c                  |  3 ++-
 6 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..b46147c45966 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -384,9 +384,6 @@ extern void *__alloc_page_frag(struct page_frag_cache *nc,
 			       unsigned int fragsz, gfp_t gfp_mask);
 extern void __free_page_frag(void *addr);
 
-extern void __free_kmem_pages(struct page *page, unsigned int order);
-extern void free_kmem_pages(unsigned long addr, unsigned int order);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 416509e26d6d..a190719c2f46 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -594,6 +594,28 @@ static inline void __ClearPageBalloon(struct page *page)
 }
 
 /*
+ * PageKmem() returns true if the page was allocated with alloc_kmem_pages().
+ */
+#define PAGE_KMEM_MAPCOUNT_VALUE (-512)
+
+static inline int PageKmem(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_KMEM_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageKmem(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_KMEM_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageKmem(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageKmem(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
+/*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2845623fb582..c23f8a17e99e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -169,7 +169,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48aaf7b9f253..88d85367c81e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -942,6 +942,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
 
 	if (PageAnon(page))
 		page->mapping = NULL;
+	if (PageKmem(page)) {
+		memcg_kmem_uncharge_pages(page, order);
+		__ClearPageKmem(page);
+	}
 	bad += free_pages_check(page);
 	for (i = 1; i < (1 << order); i++) {
 		if (compound)
@@ -3434,6 +3438,8 @@ struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
 		return NULL;
 	page = alloc_pages(gfp_mask, order);
 	memcg_kmem_commit_charge(page, memcg, order);
+	if (page)
+		__SetPageKmem(page);
 	return page;
 }
 
@@ -3446,27 +3452,11 @@ struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 		return NULL;
 	page = alloc_pages_node(nid, gfp_mask, order);
 	memcg_kmem_commit_charge(page, memcg, order);
+	if (page)
+		__SetPageKmem(page);
 	return page;
 }
 
-/*
- * __free_kmem_pages and free_kmem_pages will free pages allocated with
- * alloc_kmem_pages.
- */
-void __free_kmem_pages(struct page *page, unsigned int order)
-{
-	memcg_kmem_uncharge_pages(page, order);
-	__free_pages(page, order);
-}
-
-void free_kmem_pages(unsigned long addr, unsigned int order)
-{
-	if (addr != 0) {
-		VM_BUG_ON(!virt_addr_valid((void *)addr));
-		__free_kmem_pages(virt_to_page((void *)addr), order);
-	}
-}
-
 static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
 {
 	if (addr) {
diff --git a/mm/slub.c b/mm/slub.c
index f614b5dc396b..f5248a7d9438 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3516,7 +3516,7 @@ void kfree(const void *x)
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kfree_hook(x);
-		__free_kmem_pages(page, compound_order(page));
+		__free_pages(page, compound_order(page));
 		return;
 	}
 	slab_free(page->slab_cache, page, object, _RET_IP_);
diff --git a/mm/swap.c b/mm/swap.c
index 983f692a47fd..8d8d03118a18 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -64,7 +64,8 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
-	mem_cgroup_uncharge(page);
+	if (!PageKmem(page))
+		mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/