lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  8 Nov 2023 14:58:14 +0800
From:   Huan Yang <link@...o.com>
To:     Tejun Heo <tj@...nel.org>, Zefan Li <lizefan.x@...edance.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Jonathan Corbet <corbet@....net>,
        Michal Hocko <mhocko@...nel.org>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Shakeel Butt <shakeelb@...gle.com>,
        Muchun Song <muchun.song@...ux.dev>,
        Andrew Morton <akpm@...ux-foundation.org>,
        David Hildenbrand <david@...hat.com>,
        Matthew Wilcox <willy@...radead.org>,
        Huang Ying <ying.huang@...el.com>,
        Kefeng Wang <wangkefeng.wang@...wei.com>,
        "Vishal Moola (Oracle)" <vishal.moola@...il.com>,
        Yosry Ahmed <yosryahmed@...gle.com>,
        Liu Shixin <liushixin2@...wei.com>,
        Peter Xu <peterx@...hat.com>, Hugh Dickins <hughd@...gle.com>,
        cgroups@...r.kernel.org, linux-doc@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:     opensource.kernel@...o.com, Huan Yang <link@...o.com>
Subject: [PATCH 3/4] mm: memcg: implement unbalance proactive reclaim

This patch add swappiness arg into proactive reclaim interface.
User can type "bytes swappiness" into memory.reclaim to control
reclaim tendency.
Specially, user can type below to only reclaim anon folios:
`"100M" 200 > memory.reclaim`
Or, type below to only reclaim file folios:
`100M 1 > memory.reclaim`

User can only type key into memory.reclaim to keep original use.

Signed-off-by: Huan Yang <link@...o.com>
---
 Documentation/admin-guide/cgroup-v2.rst | 16 ++++---
 include/linux/swap.h                    |  2 +
 mm/memcontrol.c                         | 55 +++++++++++++++++++++----
 mm/vmscan.c                             |  2 +
 4 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index c153d0c75f34..5a471ac7f0c3 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1275,16 +1275,22 @@ PAGE_SIZE multiple when read back.
 	This is a simple interface to trigger memory reclaim in the
 	target cgroup.
 
-	This file accepts a single key, the number of bytes to reclaim.
-	No nested keys are currently supported.
+	This file accepts a few key, the number of bytes to reclaim.
+	Few nested keys are currently supported.
 
 	Example::
 
 	  echo "1G" > memory.reclaim
 
-	The interface can be later extended with nested keys to
-	configure the reclaim behavior. For example, specify the
-	type of memory to reclaim from (anon, file, ..).
+	The interface extended with nested keys to configure the
+	reclaim behavior. For example, specify the swappiness of
+	memory to reclaim from (anon, file, ..).
+
+	Example::
+
+	  echo "1G" 200 > memory.reclaim (only reclaim anon)
+	  echo "1G" 0  > memory.reclaim (only reclaim file)
+	  echo "1G" 1  > memory.reclaim (only reclaim file)
 
 	Please note that the kernel can over or under reclaim from
 	the target cgroup. If less bytes are reclaimed than the
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3ba146ae7cf5..a024194301d4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -407,6 +407,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MEMCG_RECLAIM_ANON (1 << 3)
+#define MEMCG_RECLAIM_FILE (1 << 4)
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index be2ad117515e..a0e460abd41c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6953,19 +6953,47 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
 	unsigned long nr_to_reclaim, nr_reclaimed = 0;
 	unsigned int reclaim_options;
+	int swappiness = -1, org_swappiness, n;
+	char *tmpbuf;
 	int err;
 
-	buf = strstrip(buf);
-	err = page_counter_memparse(buf, "", &nr_to_reclaim);
+	tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
+	if (unlikely(!tmpbuf))
+		return -ENOMEM;
+
+	buf = skip_spaces(buf);
+	n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
+	if (n < 1) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (n == 2 && (swappiness > 200 || swappiness < 0)) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
 	if (err)
-		return err;
+		goto out_free;
 
 	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+	if (swappiness != -1) {
+		org_swappiness = memcg->swappiness;
+		memcg->swappiness = swappiness;
+		if (swappiness == 200)
+			reclaim_options |= MEMCG_RECLAIM_ANON;
+		else if (swappiness == 0 || swappiness == 1)
+			reclaim_options |= MEMCG_RECLAIM_FILE;
+	}
+
 	while (nr_reclaimed < nr_to_reclaim) {
 		unsigned long reclaimed;
 
-		if (signal_pending(current))
-			return -EINTR;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			goto out;
+		}
 
 		/*
 		 * This is the final attempt, drain percpu lru caches in the
@@ -6979,13 +7007,26 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 					min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
 					GFP_KERNEL, reclaim_options);
 
-		if (!reclaimed && !nr_retries--)
-			return -EAGAIN;
+		if (!reclaimed && !nr_retries--) {
+			err = -EAGAIN;
+			goto out;
+		}
 
 		nr_reclaimed += reclaimed;
 	}
 
+	if (swappiness != -1)
+		memcg->swappiness = org_swappiness;
+
 	return nbytes;
+
+out:
+	if (swappiness != -1)
+		memcg->swappiness = org_swappiness;
+
+out_free:
+	kvfree(tmpbuf);
+	return err;
 }
 
 static struct cftype memory_files[] = {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9243a1f0d606..f4221ec833db 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6505,6 +6505,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+		.unbalance_anon = !!(reclaim_options & MEMCG_RECLAIM_ANON),
+		.unbalance_file = !!(reclaim_options & MEMCG_RECLAIM_FILE),
 	};
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ