[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220630083044.997474-1-yosryahmed@google.com>
Date: Thu, 30 Jun 2022 08:30:44 +0000
From: Yosry Ahmed <yosryahmed@...gle.com>
To: Johannes Weiner <hannes@...xchg.org>,
Michal Hocko <mhocko@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeelb@...gle.com>,
Muchun Song <songmuchun@...edance.com>,
Andrew Morton <akpm@...ux-foundation.org>
Cc: Matthew Wilcox <willy@...radead.org>,
Vlastimil Babka <vbabka@...e.cz>,
David Hildenbrand <david@...hat.com>,
Miaohe Lin <linmiaohe@...wei.com>, NeilBrown <neilb@...e.de>,
Alistair Popple <apopple@...dia.com>,
Suren Baghdasaryan <surenb@...gle.com>,
Peter Xu <peterx@...hat.com>, linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org, linux-mm@...ck.org,
Yosry Ahmed <yosryahmed@...gle.com>
Subject: [PATCH v3] mm: vmpressure: don't count proactive reclaim in vmpressure
vmpressure is used in cgroup v1 to notify userspace of reclaim
efficiency events, and is also used in both cgroup v1 and v2 as a signal
for memory pressure for networking, see
mem_cgroup_under_socket_pressure().
Proactive reclaim intends to probe memcgs for cold memory, without
affecting their performance. Hence, reclaim caused by writing to
memory.reclaim should not trigger vmpressure.
Signed-off-by: Yosry Ahmed <yosryahmed@...gle.com>
---
Changes in v3:
- Limited the vmpressure change to memory.reclaim, dropped psi changes,
updated changelog to reflect new behavior (Michal, Shakeel)
Changes in v2:
- Removed unnecessary initializations to zero (Andrew).
- Separate declarations and initializations when it causes line wrapping
(Andrew).
---
include/linux/swap.h | 5 ++++-
mm/memcontrol.c | 27 ++++++++++++++++++---------
mm/vmscan.c | 27 +++++++++++++++++----------
3 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0c0fed1b348f2..f6e9eaa2339fe 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -411,10 +411,13 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
+
+#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap);
+ unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f2731d62eef2..8dc29cbb5fbf7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2319,6 +2319,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
gfp_t gfp_mask)
{
unsigned long nr_reclaimed = 0;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
do {
unsigned long pflags;
@@ -2331,7 +2332,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
- gfp_mask, true);
+ gfp_mask,
+ reclaim_options);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2576,7 +2578,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
struct page_counter *counter;
unsigned long nr_reclaimed;
bool passed_oom = false;
- bool may_swap = true;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
bool drained = false;
unsigned long pflags;
@@ -2593,7 +2595,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
mem_over_limit = mem_cgroup_from_counter(counter, memory);
} else {
mem_over_limit = mem_cgroup_from_counter(counter, memsw);
- may_swap = false;
+ reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
}
if (batch > nr_pages) {
@@ -2620,7 +2622,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, may_swap);
+ gfp_mask, reclaim_options);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3369,6 +3371,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
int ret;
bool limits_invariant;
struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory;
+ unsigned int reclaim_options = memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP;
do {
if (signal_pending(current)) {
@@ -3403,7 +3406,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, !memsw)) {
+ GFP_KERNEL, reclaim_options)) {
ret = -EBUSY;
break;
}
@@ -3502,6 +3505,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
{
int nr_retries = MAX_RECLAIM_RETRIES;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
/* we call try-to-free pages for make this cgroup empty */
lru_add_drain_all();
@@ -3513,7 +3517,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
if (signal_pending(current))
return -EINTR;
- if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true))
+ if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
+ reclaim_options))
nr_retries--;
}
@@ -6222,6 +6227,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
bool drained = false;
unsigned long high;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
int err;
buf = strstrip(buf);
@@ -6248,7 +6254,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, true);
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
break;
@@ -6271,6 +6277,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
unsigned int nr_reclaims = MAX_RECLAIM_RETRIES;
bool drained = false;
unsigned long max;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
int err;
buf = strstrip(buf);
@@ -6297,7 +6304,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, true))
+ GFP_KERNEL, reclaim_options))
nr_reclaims--;
continue;
}
@@ -6426,6 +6433,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ unsigned int reclaim_options;
int err;
buf = strstrip(buf);
@@ -6433,6 +6441,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
if (err)
return err;
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;
@@ -6449,7 +6458,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
- GFP_KERNEL, true);
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f7d9a683e3a7d..0969e6408a539 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -102,6 +102,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;
+ /* Proactive reclaim invoked by userspace through memory.reclaim */
+ unsigned int proactive:1;
+
/*
* Cgroup memory below memory.low is protected as long as we
* don't threaten to OOM. If any cgroup is reclaimed at
@@ -3125,9 +3128,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
sc->priority);
/* Record the group's reclaim efficiency */
- vmpressure(sc->gfp_mask, memcg, false,
- sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
+ if (!sc->proactive)
+ vmpressure(sc->gfp_mask, memcg, false,
+ sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);
} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
}
@@ -3250,9 +3254,10 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
}
/* Record the subtree's reclaim efficiency */
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
- sc->nr_scanned - nr_scanned,
- sc->nr_reclaimed - nr_reclaimed);
+ if (!sc->proactive)
+ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+ sc->nr_scanned - nr_scanned,
+ sc->nr_reclaimed - nr_reclaimed);
if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true;
@@ -3534,8 +3539,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do {
- vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
- sc->priority);
+ if (!sc->proactive)
+ vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
+ sc->priority);
sc->nr_scanned = 0;
shrink_zones(zonelist, sc);
@@ -3825,7 +3831,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap)
+ unsigned int reclaim_options)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
@@ -3838,7 +3844,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
.may_unmap = 1,
- .may_swap = may_swap,
+ .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
+ .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
--
2.37.0.rc0.161.g10f37bed90-goog
Powered by blists - more mailing lists