linux-kernel - Re: [PATCH] memcg: remove mem_cgroup_reclaimable check from soft reclaim

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20141021182239.GA24899@phnom.home.cmpxchg.org>
Date:	Tue, 21 Oct 2014 14:22:39 -0400
From:	Johannes Weiner <hannes@...xchg.org>
To:	Vladimir Davydov <vdavydov@...allels.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Michal Hocko <mhocko@...e.cz>, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] memcg: remove mem_cgroup_reclaimable check from soft
 reclaim

On Tue, Oct 21, 2014 at 05:15:50PM +0400, Vladimir Davydov wrote:
> mem_cgroup_reclaimable() checks whether a cgroup has reclaimable pages
> on *any* NUMA node. However, the only place where it's called is
> mem_cgroup_soft_reclaim(), which tries to reclaim memory from a
> *specific* zone. So the way how it's used is incorrect - it will return
> true even if the cgroup doesn't have pages on the zone we're scanning.
> 
> I think we can get rid of this check completely, because
> mem_cgroup_shrink_node_zone(), which is called by
> mem_cgroup_soft_reclaim() if mem_cgroup_reclaimable() returns true, is
> equivalent to shrink_lruvec(), which exits almost immediately if the
> lruvec passed to it is empty. So there's no need to optimize anything
> here. Besides, we don't have such a check in the general scan path
> (shrink_zone) either.
> 
> Signed-off-by: Vladimir Davydov <vdavydov@...allels.com>

Acked-by: Johannes Weiner <hannes@...xchg.org>

How about this on top?

---

>From 27bd24b00433d9f6c8d60ba2b13dbff158b06c13 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@...xchg.org>
Date: Tue, 21 Oct 2014 09:53:54 -0400
Subject: [patch] mm: memcontrol: do not filter reclaimable nodes in NUMA
 round-robin

The round-robin node reclaim currently tries to include only nodes
that have memory of the memcg in question, which is quite elaborate.

Just use plain round-robin over the nodes that are allowed by the
task's cpuset, which are the most likely to contain that memcg's
memory.  But even if zones without memcg memory are encountered,
direct reclaim will skip over them without too much hassle.

Signed-off-by: Johannes Weiner <hannes@...xchg.org>
---
 mm/memcontrol.c | 97 +++------------------------------------------------------
 1 file changed, 5 insertions(+), 92 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d353d9e1fdca..293db8234179 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -54,6 +54,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
+#include <linux/cpuset.h>
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include "internal.h"
@@ -129,12 +130,10 @@ static const char * const mem_cgroup_lru_names[] = {
 enum mem_cgroup_events_target {
 	MEM_CGROUP_TARGET_THRESH,
 	MEM_CGROUP_TARGET_SOFTLIMIT,
-	MEM_CGROUP_TARGET_NUMAINFO,
 	MEM_CGROUP_NTARGETS,
 };
 #define THRESHOLDS_EVENTS_TARGET 128
 #define SOFTLIMIT_EVENTS_TARGET 1024
-#define NUMAINFO_EVENTS_TARGET	1024
 
 struct mem_cgroup_stat_cpu {
 	long count[MEM_CGROUP_STAT_NSTATS];
@@ -352,11 +351,6 @@ struct mem_cgroup {
 #endif
 
 	int last_scanned_node;
-#if MAX_NUMNODES > 1
-	nodemask_t	scan_nodes;
-	atomic_t	numainfo_events;
-	atomic_t	numainfo_updating;
-#endif
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
@@ -965,9 +959,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 		case MEM_CGROUP_TARGET_SOFTLIMIT:
 			next = val + SOFTLIMIT_EVENTS_TARGET;
 			break;
-		case MEM_CGROUP_TARGET_NUMAINFO:
-			next = val + NUMAINFO_EVENTS_TARGET;
-			break;
 		default:
 			break;
 		}
@@ -986,22 +977,10 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
-		bool do_softlimit;
-		bool do_numainfo __maybe_unused;
-
-		do_softlimit = mem_cgroup_event_ratelimit(memcg,
-						MEM_CGROUP_TARGET_SOFTLIMIT);
-#if MAX_NUMNODES > 1
-		do_numainfo = mem_cgroup_event_ratelimit(memcg,
-						MEM_CGROUP_TARGET_NUMAINFO);
-#endif
 		mem_cgroup_threshold(memcg);
-		if (unlikely(do_softlimit))
+		if (mem_cgroup_event_ratelimit(memcg,
+					       MEM_CGROUP_TARGET_SOFTLIMIT))
 			mem_cgroup_update_tree(memcg, page);
-#if MAX_NUMNODES > 1
-		if (unlikely(do_numainfo))
-			atomic_inc(&memcg->numainfo_events);
-#endif
 	}
 }
 
@@ -1708,61 +1687,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			 NULL, "Memory cgroup out of memory");
 }
 
-/**
- * test_mem_cgroup_node_reclaimable
- * @memcg: the target memcg
- * @nid: the node ID to be checked.
- * @noswap : specify true here if the user wants flle only information.
- *
- * This function returns whether the specified memcg contains any
- * reclaimable pages on a node. Returns true if there are any reclaimable
- * pages in the node.
- */
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
-		int nid, bool noswap)
-{
-	if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
-		return true;
-	if (noswap || !total_swap_pages)
-		return false;
-	if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
-		return true;
-	return false;
-
-}
 #if MAX_NUMNODES > 1
-
-/*
- * Always updating the nodemask is not very good - even if we have an empty
- * list or the wrong list here, we can start from some node and traverse all
- * nodes based on the zonelist. So update the list loosely once per 10 secs.
- *
- */
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
-{
-	int nid;
-	/*
-	 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
-	 * pagein/pageout changes since the last update.
-	 */
-	if (!atomic_read(&memcg->numainfo_events))
-		return;
-	if (atomic_inc_return(&memcg->numainfo_updating) > 1)
-		return;
-
-	/* make a nodemask where this memcg uses memory from */
-	memcg->scan_nodes = node_states[N_MEMORY];
-
-	for_each_node_mask(nid, node_states[N_MEMORY]) {
-
-		if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
-			node_clear(nid, memcg->scan_nodes);
-	}
-
-	atomic_set(&memcg->numainfo_events, 0);
-	atomic_set(&memcg->numainfo_updating, 0);
-}
-
 /*
  * Selecting a node where we start reclaim from. Because what we need is just
  * reducing usage counter, start from anywhere is O,K. Considering
@@ -1779,21 +1704,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
 	int node;
 
-	mem_cgroup_may_update_nodemask(memcg);
-	node = memcg->last_scanned_node;
-
-	node = next_node(node, memcg->scan_nodes);
+	node = next_node(memcg->last_scanned_node, cpuset_current_mems_allowed);
 	if (node == MAX_NUMNODES)
-		node = first_node(memcg->scan_nodes);
-	/*
-	 * We call this when we hit limit, not when pages are added to LRU.
-	 * No LRU may hold pages because all pages are UNEVICTABLE or
-	 * memcg is too small and all pages are not on LRU. In that case,
-	 * we use curret node.
-	 */
-	if (unlikely(node == MAX_NUMNODES))
-		node = numa_node_id();
-
+		node = first_node(cpuset_current_mems_allowed);
 	memcg->last_scanned_node = node;
 	return node;
 }
-- 
2.1.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/