[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230911075437.74027-3-zeil@nebius.com>
Date: Mon, 11 Sep 2023 07:55:20 +0000
From: "Yakunin, Dmitry (Nebius)" <zeil@...ius.com>
To: "cgroups@...r.kernel.org" <cgroups@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-mm@...ck.org" <linux-mm@...ck.org>
CC: NB-Core Team <NB-CoreTeam@...ius.com>,
"tj@...nel.org" <tj@...nel.org>,
"hannes@...xchg.org" <hannes@...xchg.org>,
"mhocko@...nel.org" <mhocko@...nel.org>,
"Yakunin, Dmitry (Nebius)" <zeil@...ius.com>,
Konstantin Khlebnikov <khlebnikov@...dex-team.ru>,
Andrey Ryabinin <arbn@...dex-team.com>
Subject: [RFC PATCH 2/3] proc/kpagecgroup: report also inode numbers of
offline cgroups
By default this interface reports inode number of closest online ancestor
if cgroups is offline (removed). Information about real owner is required
for detecting which pages keep removed cgroup.
This patch adds per-file mode which is changed by writing 64-bit flags
into opened /proc/kpagecgroup. For now only first bit is used.
Link: https://lore.kernel.org/lkml/153414348994.737150.10057219558779418929.stgit@buzz
Suggested-by: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
Reviewed-by: Andrey Ryabinin <arbn@...dex-team.com>
Signed-off-by: Dmitry Yakunin <zeil@...ius.com>
---
fs/proc/page.c | 24 ++++++++++++++++++++++--
include/linux/memcontrol.h | 2 +-
mm/memcontrol.c | 5 +++--
mm/memory-failure.c | 2 +-
4 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 195b077c0fac..ae6feca2bbc7 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -278,6 +278,7 @@ static const struct proc_ops kpageflags_proc_ops = {
static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ unsigned long flags = (unsigned long)file->private_data;
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
struct page *ppage;
@@ -301,7 +302,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
ppage = pfn_to_online_page(pfn);
if (ppage)
- ino = page_cgroup_ino(ppage);
+ ino = page_cgroup_ino(ppage, !(flags & 1));
else
ino = 0;
@@ -323,10 +324,29 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t kpagecgroup_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 flags;
+
+ if (count != 8)
+ return -EINVAL;
+
+ if (get_user(flags, buf))
+ return -EFAULT;
+
+ if (flags > 1)
+ return -EINVAL;
+
+ file->private_data = (void *)(unsigned long)flags;
+ return count;
+}
+
static const struct proc_ops kpagecgroup_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecgroup_read,
+ .proc_write = kpagecgroup_write,
};
#endif /* CONFIG_MEMCG */
@@ -335,7 +355,7 @@ static int __init proc_page_init(void)
proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
#ifdef CONFIG_MEMCG
- proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
+ proc_create("kpagecgroup", 0600, NULL, &kpagecgroup_proc_ops);
#endif
return 0;
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 222d7370134c..bbbddaa260d3 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -892,7 +892,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
}
struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
-ino_t page_cgroup_ino(struct page *page);
+ino_t page_cgroup_ino(struct page *page, bool online);
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7b3d4a10ac63..48cfe3695e06 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -380,6 +380,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
/**
* page_cgroup_ino - return inode number of the memcg a page is charged to
* @page: the page
+ * @online: return closest online ancestor
*
* Look up the closest online ancestor of the memory cgroup @page is charged to
* and return its inode number or 0 if @page is not charged to any cgroup. It
@@ -390,7 +391,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
* after page_cgroup_ino() returns, so it only should be used by callers that
* do not care (such as procfs interfaces).
*/
-ino_t page_cgroup_ino(struct page *page)
+ino_t page_cgroup_ino(struct page *page, bool online)
{
struct mem_cgroup *memcg;
unsigned long ino = 0;
@@ -399,7 +400,7 @@ ino_t page_cgroup_ino(struct page *page)
/* page_folio() is racy here, but the entire function is racy anyway */
memcg = folio_memcg_check(page_folio(page));
- while (memcg && !(memcg->css.flags & CSS_ONLINE))
+ while (memcg && online && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg);
if (memcg)
ino = cgroup_ino(memcg->css.cgroup);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5b663eca1f29..6734489b2435 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -267,7 +267,7 @@ static int hwpoison_filter_task(struct page *p)
if (!hwpoison_filter_memcg)
return 0;
- if (page_cgroup_ino(p) != hwpoison_filter_memcg)
+ if (page_cgroup_ino(p, true) != hwpoison_filter_memcg)
return -EINVAL;
return 0;
--
2.25.1
Powered by blists - more mailing lists