[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240819023145.2415299-7-usamaarif642@gmail.com>
Date: Mon, 19 Aug 2024 03:30:59 +0100
From: Usama Arif <usamaarif642@...il.com>
To: akpm@...ux-foundation.org,
linux-mm@...ck.org
Cc: hannes@...xchg.org,
riel@...riel.com,
shakeel.butt@...ux.dev,
roman.gushchin@...ux.dev,
yuzhao@...gle.com,
david@...hat.com,
baohua@...nel.org,
ryan.roberts@....com,
rppt@...nel.org,
willy@...radead.org,
cerasuolodomenico@...il.com,
ryncsn@...il.com,
corbet@....net,
linux-kernel@...r.kernel.org,
linux-doc@...r.kernel.org,
kernel-team@...a.com,
Usama Arif <usamaarif642@...il.com>
Subject: [PATCH v4 6/6] mm: add sysfs entry to disable splitting underused THPs
If disabled, THPs faulted in or collapsed will not be added to
_deferred_list, and therefore won't be considered for splitting under
memory pressure if underused.
Signed-off-by: Usama Arif <usamaarif642@...il.com>
---
Documentation/admin-guide/mm/transhuge.rst | 10 +++++++++
mm/huge_memory.c | 26 ++++++++++++++++++++++
2 files changed, 36 insertions(+)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 40741b892aff..02ae7bc9efbd 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+All THPs at fault and collapse time will be added to _deferred_list,
+and will therefore be split under memory presure if they are considered
+"underused". A THP is underused if the number of zero-filled pages in
+the THP is above max_ptes_none (see below). It is possible to disable
+this behaviour by writing 0 to shrink_underused, and enable it by writing
+1 to it::
+
+ echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+ echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+
khugepaged will be automatically started when PMD-sized THP is enabled
(either of the per-size anon control or the top-level control are set
to "always" or "madvise"), and it'll be automatically shutdown when
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5363cf900f9..5d67d3b3c1b2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -74,6 +74,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc);
+static bool split_underused_thp = true;
static atomic_t huge_zero_refcount;
struct folio *huge_zero_folio __read_mostly;
@@ -439,6 +440,27 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj,
static struct kobj_attribute hpage_pmd_size_attr =
__ATTR_RO(hpage_pmd_size);
+static ssize_t split_underused_thp_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", split_underused_thp);
+}
+
+static ssize_t split_underused_thp_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err = kstrtobool(buf, &split_underused_thp);
+
+ if (err < 0)
+ return err;
+
+ return count;
+}
+
+static struct kobj_attribute split_underused_thp_attr = __ATTR(
+ shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store);
+
static struct attribute *hugepage_attr[] = {
&enabled_attr.attr,
&defrag_attr.attr,
@@ -447,6 +469,7 @@ static struct attribute *hugepage_attr[] = {
#ifdef CONFIG_SHMEM
&shmem_enabled_attr.attr,
#endif
+ &split_underused_thp_attr.attr,
NULL,
};
@@ -3477,6 +3500,9 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped)
if (folio_order(folio) <= 1)
return;
+ if (!partially_mapped && !split_underused_thp)
+ return;
+
/*
* The try_to_unmap() in page reclaim path might reach here too,
* this may cause a race condition to corrupt deferred split queue.
--
2.43.5
Powered by blists - more mailing lists