[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231113233502.587879658@redhat.com>
Date: Mon, 13 Nov 2023 20:34:22 -0300
From: Marcelo Tosatti <mtosatti@...hat.com>
To: linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc: Michal Hocko <mhocko@...e.com>, Vlastimil Babka <vbabka@...e.cz>,
Andrew Morton <akpm@...ux-foundation.org>,
David Hildenbrand <david@...hat.com>,
Peter Xu <peterx@...hat.com>,
Marcelo Tosatti <mtosatti@...hat.com>
Subject: [patch 2/2] mm: vmstat: use node_page_state_snapshot in too_many_isolated
A customer reported seeing processes hung at too_many_isolated,
while analysis indicated that the problem occurred due to out
of sync per-CPU stats (see below).
Fix is to use node_page_state_snapshot to avoid the out of stale values.
2136 static unsigned long
2137 shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
2138 struct scan_control *sc, enum lru_list lru)
2139 {
:
2145 bool file = is_file_lru(lru);
:
2147 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
:
2150 while (unlikely(too_many_isolated(pgdat, file, sc))) {
2151 if (stalled)
2152 return 0;
2153
2154 /* wait a bit for the reclaimer. */
2155 msleep(100); <--- some processes were sleeping here, with pending SIGKILL.
2156 stalled = true;
2157
2158 /* We are about to die and free our memory. Return now. */
2159 if (fatal_signal_pending(current))
2160 return SWAP_CLUSTER_MAX;
2161 }
msleep() must be called only when there are too many isolated pages:
2019 static int too_many_isolated(struct pglist_data *pgdat, int file,
2020 struct scan_control *sc)
2021 {
:
2030 if (file) {
2031 inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
2032 isolated = node_page_state(pgdat, NR_ISOLATED_FILE);
2033 } else {
:
2046 return isolated > inactive;
The return value was true since:
crash> p ((struct pglist_data *) 0xffff00817fffe580)->vm_stat[NR_INACTIVE_FILE]
$8 = {
counter = 1
}
crash> p ((struct pglist_data *) 0xffff00817fffe580)->vm_stat[NR_ISOLATED_FILE]
$9 = {
counter = 2
while per_cpu stats had:
crash> p ((struct pglist_data *) 0xffff00817fffe580)->per_cpu_nodestats
$85 = (struct per_cpu_nodestat *) 0xffff8000118832e0
crash> p/x 0xffff8000118832e0 + __per_cpu_offset[42]
$86 = 0xffff00917fcc32e0
crash> p ((struct per_cpu_nodestat *) 0xffff00917fcc32e0)->vm_node_stat_diff[NR_ISOLATED_FILE]
$87 = -1 '\377'
crash> p/x 0xffff8000118832e0 + __per_cpu_offset[44]
$89 = 0xffff00917fe032e0
crash> p ((struct per_cpu_nodestat *) 0xffff00917fe032e0)->vm_node_stat_diff[NR_ISOLATED_FILE]
$91 = -1 '\377'
It seems that processes were trapped in direct reclaim/compaction loop
because these nodes had few free pages lower than watermark min.
crash> kmem -z | grep -A 3 Normal
:
NODE: 4 ZONE: 1 ADDR: ffff00817fffec40 NAME: "Normal"
SIZE: 8454144 PRESENT: 98304 MIN/LOW/HIGH: 68/166/264
VM_STAT:
NR_FREE_PAGES: 68
--
NODE: 5 ZONE: 1 ADDR: ffff00897fffec40 NAME: "Normal"
SIZE: 118784 MIN/LOW/HIGH: 82/200/318
VM_STAT:
NR_FREE_PAGES: 45
--
NODE: 6 ZONE: 1 ADDR: ffff00917fffec40 NAME: "Normal"
SIZE: 118784 MIN/LOW/HIGH: 82/200/318
VM_STAT:
NR_FREE_PAGES: 53
--
NODE: 7 ZONE: 1 ADDR: ffff00997fbbec40 NAME: "Normal"
SIZE: 118784 MIN/LOW/HIGH: 82/200/318
VM_STAT:
NR_FREE_PAGES: 52
Signed-off-by: Marcelo Tosatti <mtosatti@...hat.com>
---
mm/compaction.c | 6 +++---
mm/vmscan.c | 8 ++++----
2 files changed, 7 insertions(+), 7 deletions(-)
Index: linux/mm/compaction.c
===================================================================
--- linux.orig/mm/compaction.c
+++ linux/mm/compaction.c
@@ -791,11 +791,11 @@ static bool too_many_isolated(struct com
unsigned long active, inactive, isolated;
- inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
+ inactive = node_page_state_snapshot(pgdat, NR_INACTIVE_FILE) +
node_page_state(pgdat, NR_INACTIVE_ANON);
- active = node_page_state(pgdat, NR_ACTIVE_FILE) +
+ active = node_page_state_snapshot(pgdat, NR_ACTIVE_FILE) +
node_page_state(pgdat, NR_ACTIVE_ANON);
- isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
+ isolated = node_page_state_snapshot(pgdat, NR_ISOLATED_FILE) +
node_page_state(pgdat, NR_ISOLATED_ANON);
/*
Index: linux/mm/vmscan.c
===================================================================
--- linux.orig/mm/vmscan.c
+++ linux/mm/vmscan.c
@@ -1756,11 +1756,11 @@ static int too_many_isolated(struct pgli
return 0;
if (file) {
- inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
- isolated = node_page_state(pgdat, NR_ISOLATED_FILE);
+ inactive = node_page_state_snapshot(pgdat, NR_INACTIVE_FILE);
+ isolated = node_page_state_snapshot(pgdat, NR_ISOLATED_FILE);
} else {
- inactive = node_page_state(pgdat, NR_INACTIVE_ANON);
- isolated = node_page_state(pgdat, NR_ISOLATED_ANON);
+ inactive = node_page_state_snapshot(pgdat, NR_INACTIVE_ANON);
+ isolated = node_page_state_snapshot(pgdat, NR_ISOLATED_ANON);
}
/*
Powered by blists - more mailing lists