[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <69612b83.050a0220.1c677c.03cf.GAE@google.com>
Date: Fri, 09 Jan 2026 08:23:31 -0800
From: syzbot <syzbot+d97580a8cceb9b03c13e@...kaller.appspotmail.com>
To: linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com
Subject: Forwarded: [PATCH] mm/swap_cgroup: fix kernel BUG in swap_cgroup_record
For archival purposes, forwarding an incoming command email to
linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com.
***
Subject: [PATCH] mm/swap_cgroup: fix kernel BUG in swap_cgroup_record
Author: kartikey406@...il.com
#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
When using MADV_PAGEOUT, pages can remain in swapcache with their swap
entries assigned. If MADV_PAGEOUT is called again on these pages, they
reuse the same swap entries, causing memcg1_swapout() to call
swap_cgroup_record() with an already-recorded entry.
The existing code assumes swap entries are always being recorded for the
first time (oldid == 0), triggering VM_BUG_ON when it encounters an
already-recorded entry:
------------[ cut here ]------------
kernel BUG at mm/swap_cgroup.c:78!
Oops: invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 0 UID: 0 PID: 6176 Comm: syz.0.30 Not tainted
RIP: 0010:swap_cgroup_record+0x19c/0x1c0 mm/swap_cgroup.c:78
Call Trace:
memcg1_swapout+0x2fa/0x830 mm/memcontrol-v1.c:623
__remove_mapping+0xac5/0xe30 mm/vmscan.c:773
shrink_folio_list+0x2786/0x4f40 mm/vmscan.c:1528
reclaim_folio_list+0xeb/0x4e0 mm/vmscan.c:2208
reclaim_pages+0x454/0x520 mm/vmscan.c:2245
madvise_cold_or_pageout_pte_range+0x19a0/0x1ce0 mm/madvise.c:563
...
do_madvise+0x1bc/0x270 mm/madvise.c:2030
__do_sys_madvise mm/madvise.c:2039
This bug occurs because pages in swapcache can be targeted by
MADV_PAGEOUT multiple times without being swapped in between. Each time,
the same swap entry is reused, but swap_cgroup_record() expects to only
record new, unused entries.
Fix this by checking if the swap entry already has the correct cgroup ID
recorded before attempting to record it. Add a new helper function
swap_cgroup_lookup() to read the current cgroup ID without modifying it.
In memcg1_swapout(), check if the entry is already correctly recorded and
return early if so, avoiding unnecessary work and the crash. Only call
swap_cgroup_record() when the entry needs to be set or updated.
This approach is more efficient than making swap_cgroup_record()
idempotent, as it avoids unnecessary atomic operations, reference count
manipulations, and statistics updates when the entry is already correct.
Link: https://syzkaller.appspot.com/bug?extid=d97580a8cceb9b03c13e
Reported-by: syzbot+d97580a8cceb9b03c13e@...kaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@...il.com>
---
include/linux/swap_cgroup.h | 6 ++++++
mm/memcontrol-v1.c | 7 +++++++
mm/swap_cgroup.c | 18 ++++++++++++++++++
3 files changed, 31 insertions(+)
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index 91cdf12190a0..fd79e7bf8917 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -7,6 +7,7 @@
#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
+extern unsigned short swap_cgroup_lookup(swp_entry_t ent);
extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
@@ -19,6 +20,11 @@ void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
{
}
+static inline unsigned short swap_cgroup_lookup(swp_entry_t ent)
+{
+ return 0;
+}
+
static inline
unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents)
{
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 56d27baf93ab..37899d156b2a 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -614,6 +614,7 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
{
struct mem_cgroup *memcg, *swap_memcg;
unsigned int nr_entries;
+ unsigned short oldid;
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
@@ -630,6 +631,12 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
if (!memcg)
return;
+ oldid = swap_cgroup_lookup(entry);
+ if (oldid == mem_cgroup_id(memcg)) {
+ return;
+ }
+ VM_WARN_ON_ONCE(oldid != 0);
+
/*
* In case the memcg owning these pages has been offlined and doesn't
* have an ID allocated to it anymore, charge the closest online
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index de779fed8c21..083eda4b67d6 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -51,6 +51,24 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
return old_id;
}
+unsigned short swap_cgroup_lookup(swp_entry_t ent)
+{
+ struct swap_cgroup *sc;
+ unsigned short id;
+ pgoff_t offset = swp_offset(ent);
+ unsigned short type = swp_type(ent);
+
+ if (type >= MAX_SWAPFILES)
+ return 0;
+
+ sc = swap_cgroup_ctrl[type].map;
+ if (!sc)
+ return 0;
+ id = (unsigned short)atomic_read(&sc[offset].ids);
+ return id;
+
+}
+
/**
* swap_cgroup_record - record mem_cgroup for a set of swap entries.
* These entries must belong to one single folio, and that folio
--
2.43.0
Powered by blists - more mailing lists