[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241030083311.965933-4-gutierrez.asier@huawei-partners.com>
Date: Wed, 30 Oct 2024 16:33:11 +0800
From: <gutierrez.asier@...wei-partners.com>
To: <akpm@...ux-foundation.org>, <david@...hat.com>, <ryan.roberts@....com>,
<baohua@...nel.org>, <willy@...radead.org>, <peterx@...hat.com>,
<hannes@...xchg.org>, <hocko@...nel.org>, <roman.gushchin@...ux.dev>,
<shakeel.butt@...ux.dev>, <muchun.song@...ux.dev>
CC: <cgroups@...r.kernel.org>, <linux-mm@...ck.org>,
<linux-kernel@...r.kernel.org>, <stepanov.anatoly@...wei.com>,
<alexander.kozhevnikov@...wei-partners.com>, <guohanjun@...wei.com>,
<weiyongjun1@...wei.com>, <wangkefeng.wang@...wei.com>,
<judy.chenhui@...wei.com>, <yusongping@...wei.com>, <artem.kuzin@...wei.com>,
<kang.sun@...wei.com>
Subject: [RFC PATCH 3/3] mm: Add thp_defrag control for cgroup
From: Asier Gutierrez <gutierrez.asier@...wei-partners.com>
This patch exposes a new file in memory cgroups: memory.thp_defrag, which
follows the /sys/kernel/mm/transparent_hugepage/defrag style. Support for
different defrag THP defrag policies for memory cgroups were also added.
Signed-off-by: Asier Gutierrez <gutierrez.asier@...wei-partners.com>
Signed-off-by: Anatoly Stepanov <stepanov.anatoly@...wei.com>
Reviewed-by: Alexander Kozhevnikov <alexander.kozhevnikov@...wei-partners.com>
---
include/linux/huge_mm.h | 8 +++
include/linux/memcontrol.h | 4 +-
mm/huge_memory.c | 116 ++++++++++++++++++++++---------------
mm/memcontrol.c | 31 ++++++++++
4 files changed, 112 insertions(+), 47 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f99ac9b7e5bc..177c7d3578ed 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -56,6 +56,12 @@ enum transparent_hugepage_flag {
#define HUGEPAGE_FLAGS_ENABLED_MASK ((1UL << TRANSPARENT_HUGEPAGE_FLAG) |\
(1UL << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+#define HUGEPAGE_FLAGS_DEFRAG_MASK ((1UL << TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG) |\
+ (1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG) |\
+ (1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG) |\
+ (1UL << TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) |\
+ (1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
+
struct kobject;
struct kobj_attribute;
@@ -442,7 +448,9 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmdp, struct folio *folio);
int thp_enabled_parse(const char *buf, unsigned long *flags);
+int thp_defrag_parse(const char *buf, unsigned long *flags);
const char *thp_enabled_string(unsigned long flags);
+const char *thp_defrag_string(unsigned long flags);
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool folio_test_pmd_mappable(struct folio *folio)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d78318782af8..a0edf15b3a07 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1634,9 +1634,11 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int memory_thp_enabled_show(struct seq_file *m, void *v);
+int memory_thp_defrag_show(struct seq_file *m, void *v);
ssize_t memory_thp_enabled_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off);
-
+ssize_t memory_thp_defrag_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off);
int mem_cgroup_thp_flags_update_all(unsigned long flags, unsigned long mask);
unsigned long memcg_get_thp_flags_all(unsigned long mask);
unsigned long memcg_get_thp_flags(struct vm_area_struct *vma);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fdffdfc8605c..6e1886b220d9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -311,6 +311,28 @@ const char *thp_enabled_string(unsigned long flags)
return output;
}
+const char *thp_defrag_string(unsigned long flags)
+{
+ const char *output;
+
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+ &flags))
+ output = "[always] defer defer+madvise madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+ &flags))
+ output = "always [defer] defer+madvise madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
+ &flags))
+ output = "always defer [defer+madvise] madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+ &flags))
+ output = "always defer defer+madvise [madvise] never";
+ else
+ output = "always defer defer+madvise madvise [never]";
+
+ return output;
+}
+
int thp_enabled_parse(const char *buf, unsigned long *flags)
{
if (sysfs_streq(buf, "always")) {
@@ -328,6 +350,39 @@ int thp_enabled_parse(const char *buf, unsigned long *flags)
return 0;
}
+int thp_defrag_parse(const char *buf, unsigned long *flags)
+{
+ if (sysfs_streq(buf, "always")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+ } else if (sysfs_streq(buf, "defer+madvise")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+ } else if (sysfs_streq(buf, "defer")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+ } else if (sysfs_streq(buf, "madvise")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+ } else if (sysfs_streq(buf, "never")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
#ifdef CONFIG_SYSFS
static ssize_t enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -394,60 +449,29 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj,
static ssize_t defrag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- const char *output;
-
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
- &transparent_hugepage_flags))
- output = "[always] defer defer+madvise madvise never";
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
- &transparent_hugepage_flags))
- output = "always [defer] defer+madvise madvise never";
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
- &transparent_hugepage_flags))
- output = "always defer [defer+madvise] madvise never";
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
- &transparent_hugepage_flags))
- output = "always defer defer+madvise [madvise] never";
- else
- output = "always defer defer+madvise madvise [never]";
-
- return sysfs_emit(buf, "%s\n", output);
+ unsigned long flags = transparent_hugepage_flags;
+ return sysfs_emit(buf, "%s\n", thp_defrag_string(flags));
}
static ssize_t defrag_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- if (sysfs_streq(buf, "always")) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- } else if (sysfs_streq(buf, "defer+madvise")) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- } else if (sysfs_streq(buf, "defer")) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
- } else if (sysfs_streq(buf, "madvise")) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- } else if (sysfs_streq(buf, "never")) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- } else
- return -EINVAL;
+ ssize_t ret = count;
+ int err;
- return count;
+ ret = thp_defrag_parse(buf, &transparent_hugepage_flags) ? : count;
+ if (ret > 0 && IS_ENABLED(CONFIG_MEMCG) &&
+ !mem_cgroup_disabled()) {
+ err = mem_cgroup_thp_flags_update_all(transparent_hugepage_flags,
+ HUGEPAGE_FLAGS_DEFRAG_MASK);
+ if (err)
+ ret = err;
+ }
+
+ return ret;
}
+
static struct kobj_attribute defrag_attr = __ATTR_RW(defrag);
static ssize_t use_zero_page_show(struct kobject *kobj,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 938e6894c0b3..53384f0a69af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3706,6 +3706,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG));
WRITE_ONCE(memcg->thp_anon_orders_inherit, BIT(PMD_ORDER));
#endif
@@ -4490,6 +4492,30 @@ ssize_t memory_thp_enabled_write(struct kernfs_open_file *of, char *buf,
mutex_unlock(&memcg_thp_flags_mutex);
return ret;
}
+
+int memory_thp_defrag_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+ unsigned long flags = READ_ONCE(memcg->thp_flags);
+
+ seq_printf(m, "%s\n", thp_defrag_string(flags));
+ return 0;
+}
+
+ssize_t memory_thp_defrag_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ int ret = nbytes;
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+ buf = strstrip(buf);
+
+ mutex_lock(&memcg_thp_flags_mutex);
+ ret = thp_defrag_parse(buf, &memcg->thp_flags) ? : nbytes;
+ mutex_unlock(&memcg_thp_flags_mutex);
+
+ return ret;
+}
#endif
static struct cftype memory_files[] = {
@@ -4566,6 +4592,11 @@ static struct cftype memory_files[] = {
.seq_show = memory_thp_enabled_show,
.write = memory_thp_enabled_write,
},
+ {
+ .name = "thp_defrag",
+ .seq_show = memory_thp_defrag_show,
+ .write = memory_thp_defrag_write,
+ },
#endif
{ } /* terminate */
};
--
2.34.1
Powered by blists - more mailing lists