lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220416053902.68517-5-dave@stgolabs.net>
Date:   Fri, 15 Apr 2022 22:39:00 -0700
From:   Davidlohr Bueso <dave@...olabs.net>
To:     linux-mm@...ck.org
Cc:     mhocko@...nel.org, akpm@...ux-foundation.org, rientjes@...gle.com,
        yosryahmed@...gle.com, hannes@...xchg.org, shakeelb@...gle.com,
        dave.hansen@...ux.intel.com, tim.c.chen@...ux.intel.com,
        roman.gushchin@...ux.dev, gthelen@...gle.com,
        a.manzanares@...sung.com, heekwon.p@...sung.com,
        gim.jongmin@...sung.com, dave@...olabs.net,
        linux-kernel@...r.kernel.org
Subject: [PATCH 4/6] mm: introduce per-node proactive reclaim interface

This patch introduces a mechanism to trigger memory reclaim
as a per-node sysfs interface, inspired by compaction's
equivalent; ie:

	 echo 1G > /sys/devices/system/node/nodeX/reclaim

It is based on the discussions from David's thread[1] as
well as the current upstreaming of the memcg[2] interface
(which has nice explanations for the benefits of userspace
reclaim overall). In both cases conclusions were that either
way of inducing proactive reclaim should be KISS, and can be
later extended. So this patch does not allow the user much
fine tuning beyond the size of the reclaim, such as anon/file
or whether or semantics of demotion.

[1] https://lore.kernel.org/all/5df21376-7dd1-bf81-8414-32a73cea45dd@google.com/
[2] https://lore.kernel.org/all/20220408045743.1432968-1-yosryahmed@google.com/

Signed-off-by: Davidlohr Bueso <dave@...olabs.net>
---
 Documentation/ABI/stable/sysfs-devices-node | 10 ++++
 drivers/base/node.c                         |  2 +
 include/linux/swap.h                        | 16 ++++++
 mm/vmscan.c                                 | 59 +++++++++++++++++++++
 4 files changed, 87 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 8db67aa472f1..3c935e1334f7 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -182,3 +182,13 @@ Date:		November 2021
 Contact:	Jarkko Sakkinen <jarkko@...nel.org>
 Description:
 		The total amount of SGX physical memory in bytes.
+
+What:		/sys/devices/system/node/nodeX/reclaim
+Date:		April 2022
+Contact:	Davidlohr Bueso <dave@...olabs.net>
+Description:
+		Write the amount of bytes to induce memory reclaim in this node.
+		This file accepts a single key, the number of bytes to reclaim.
+		When it completes successfully, the specified amount or more memory
+		will have been reclaimed, and -EAGAIN if less bytes are reclaimed
+		than the specified amount.
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 6cdf25fd26c3..d80c478e2a6e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -670,6 +670,7 @@ static int register_node(struct node *node, int num)
 
 	hugetlb_register_node(node);
 	compaction_register_node(node);
+	reclaim_register_node(node);
 	return 0;
 }
 
@@ -685,6 +686,7 @@ void unregister_node(struct node *node)
 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
 	node_remove_accesses(node);
 	node_remove_caches(node);
+	reclaim_unregister_node(node);
 	device_unregister(&node->dev);
 }
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 27093b477c5f..cca43ae6d770 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -398,6 +398,22 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 long remove_mapping(struct address_space *mapping, struct folio *folio);
 
+#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
+extern int reclaim_register_node(struct node *node);
+extern void reclaim_unregister_node(struct node *node);
+
+#else
+
+static inline int reclaim_register_node(struct node *node)
+{
+	return 0;
+}
+
+static inline void reclaim_unregister_node(struct node *node)
+{
+}
+#endif /* CONFIG_SYSFS && CONFIG_NUMA */
+
 extern unsigned long reclaim_pages(struct list_head *page_list);
 #ifdef CONFIG_NUMA
 extern int node_reclaim_mode;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1735c302831c..3539f8a0f0ea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4819,3 +4819,62 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 	}
 }
 EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
+
+#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
+static ssize_t reclaim_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int err, nid = dev->id;
+	gfp_t gfp_mask = GFP_KERNEL;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	unsigned long nr_to_reclaim, nr_reclaimed = 0;
+	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
+	struct scan_control sc = {
+		.gfp_mask = current_gfp_context(gfp_mask),
+		.reclaim_idx = gfp_zone(gfp_mask),
+		.priority = NODE_RECLAIM_PRIORITY,
+		.may_writepage = !laptop_mode,
+		.may_unmap = 1,
+		.may_swap = 1,
+	};
+
+	buf = strstrip((char *)buf);
+	err = page_counter_memparse(buf, "", &nr_to_reclaim);
+	if (err)
+		return err;
+
+	sc.nr_to_reclaim = max(nr_to_reclaim, SWAP_CLUSTER_MAX);
+
+	while (nr_reclaimed < nr_to_reclaim) {
+		unsigned long reclaimed;
+
+		if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags))
+			return -EAGAIN;
+
+		/* does cond_resched() */
+		reclaimed = __node_reclaim(pgdat, gfp_mask,
+					   nr_to_reclaim - nr_reclaimed, &sc);
+
+		clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);
+
+		if (!reclaimed && !nr_retries--)
+			break;
+
+		nr_reclaimed += reclaimed;
+	}
+
+	return nr_reclaimed < nr_to_reclaim ? -EAGAIN : count;
+}
+
+static DEVICE_ATTR_WO(reclaim);
+int reclaim_register_node(struct node *node)
+{
+	return device_create_file(&node->dev, &dev_attr_reclaim);
+}
+
+void reclaim_unregister_node(struct node *node)
+{
+	return device_remove_file(&node->dev, &dev_attr_reclaim);
+}
+#endif
-- 
2.26.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ