lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201005081313.732745-3-andrea.righi@canonical.com>
Date:   Mon,  5 Oct 2020 10:13:13 +0200
From:   Andrea Righi <andrea.righi@...onical.com>
To:     Michal Hocko <mhocko@...nel.org>,
        Vladimir Davydov <vdavydov.dev@...il.com>
Cc:     Li Zefan <lizefan@...wei.com>, Tejun Heo <tj@...nel.org>,
        Johannes Weiner <hannes@...xchg.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Luigi Semenzato <semenzato@...gle.com>,
        "Rafael J . Wysocki" <rafael@...nel.org>, cgroups@...r.kernel.org,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        linux-doc@...r.kernel.org
Subject: [PATCH RFC v2 2/2] mm: memcontrol: introduce opportunistic memory reclaim

Opportunistic memory reclaim allows user-space to trigger an artificial
memory pressure condition and force the system to reclaim memory (drop
caches, swap out anonymous memory, etc.).

This feature is provided by adding a new file to each memcg:
memory.swap.reclaim.

Writing a number to this file forces a memcg to reclaim memory up to
that number of bytes ("max" means as much memory as possible). Reading
from the this file returns the amount of bytes reclaimed in the last
opportunistic memory reclaim attempt.

Memory reclaim can be interrupted sending a signal to the process that
is writing to memory.swap.reclaim (i.e., to set a timeout for the whole
memory reclaim run).

Signed-off-by: Andrea Righi <andrea.righi@...onical.com>
---
 Documentation/admin-guide/cgroup-v2.rst | 18 ++++++++
 include/linux/memcontrol.h              |  4 ++
 mm/memcontrol.c                         | 59 +++++++++++++++++++++++++
 3 files changed, 81 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index baa07b30845e..2850a5cb4b1e 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1409,6 +1409,24 @@ PAGE_SIZE multiple when read back.
 	Swap usage hard limit.  If a cgroup's swap usage reaches this
 	limit, anonymous memory of the cgroup will not be swapped out.
 
+  memory.swap.reclaim
+        A read-write single value file that can be used to trigger
+        opportunistic memory reclaim.
+
+        The string written to this file represents the amount of memory to be
+        reclaimed (special value "max" means "as much memory as possible").
+
+        When opportunistic memory reclaim is started the system will be put
+        into an artificial memory pressure condition and memory will be
+        reclaimed by dropping clean page cache pages, swapping out anonymous
+        pages, etc.
+
+        NOTE: it is possible to interrupt the memory reclaim sending a signal
+        to the writer of this file.
+
+        Reading from memory.swap.reclaim returns the amount of bytes reclaimed
+        in the last attempt.
+
   memory.swap.events
 	A read-only flat-keyed file which exists on non-root cgroups.
 	The following entries are defined.  Unless specified
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d0b036123c6a..0c90d989bdc1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -306,6 +306,10 @@ struct mem_cgroup {
 	bool			tcpmem_active;
 	int			tcpmem_pressure;
 
+#ifdef CONFIG_MEMCG_SWAP
+	unsigned long		nr_swap_reclaimed;
+#endif
+
 #ifdef CONFIG_MEMCG_KMEM
         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6877c765b8d0..b98e9bbd61b0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7346,6 +7346,60 @@ static int swap_events_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+/*
+ * Try to reclaim some memory in the system, stop when one of the following
+ * conditions occurs:
+ *  - at least "nr_pages" have been reclaimed
+ *  - no more pages can be reclaimed
+ *  - current task explicitly interrupted by a signal (e.g., user space
+ *    timeout)
+ *
+ *  @nr_pages - amount of pages to be reclaimed (0 means "as many pages as
+ *  possible").
+ */
+static unsigned long
+do_mm_reclaim(struct mem_cgroup *memcg, unsigned long nr_pages)
+{
+	unsigned long nr_reclaimed = 0;
+
+	while (nr_pages > 0) {
+		unsigned long reclaimed;
+
+		if (signal_pending(current))
+			break;
+		reclaimed = __shrink_all_memory(nr_pages, memcg);
+		if (!reclaimed)
+			break;
+		nr_reclaimed += reclaimed;
+		nr_pages -= min_t(unsigned long, reclaimed, nr_pages);
+	}
+	return nr_reclaimed;
+}
+
+static ssize_t swap_reclaim_write(struct kernfs_open_file *of,
+				  char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned long nr_to_reclaim;
+	int err;
+
+	buf = strstrip(buf);
+	err = page_counter_memparse(buf, "max", &nr_to_reclaim);
+	if (err)
+		return err;
+	memcg->nr_swap_reclaimed = do_mm_reclaim(memcg, nr_to_reclaim);
+
+	return nbytes;
+}
+
+static u64 swap_reclaim_read(struct cgroup_subsys_state *css,
+			     struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return memcg->nr_swap_reclaimed << PAGE_SHIFT;
+}
+
 static struct cftype swap_files[] = {
 	{
 		.name = "swap.current",
@@ -7370,6 +7424,11 @@ static struct cftype swap_files[] = {
 		.file_offset = offsetof(struct mem_cgroup, swap_events_file),
 		.seq_show = swap_events_show,
 	},
+	{
+		.name = "swap.reclaim",
+		.write = swap_reclaim_write,
+		.read_u64 = swap_reclaim_read,
+	},
 	{ }	/* terminate */
 };
 
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ