[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20191021121453.GK9379@dhcp22.suse.cz>
Date: Mon, 21 Oct 2019 14:14:53 +0200
From: Michal Hocko <mhocko@...nel.org>
To: Hillf Danton <hdanton@...a.com>
Cc: linux-mm <linux-mm@...ck.org>,
Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel <linux-kernel@...r.kernel.org>,
Chris Down <chris@...isdown.name>, Tejun Heo <tj@...nel.org>,
Roman Gushchin <guro@...com>,
Johannes Weiner <hannes@...xchg.org>,
Shakeel Butt <shakeelb@...gle.com>,
Matthew Wilcox <willy@...radead.org>,
Minchan Kim <minchan@...nel.org>, Mel Gorman <mgorman@...e.de>
Subject: Re: [RFC v1] memcg: add memcg lru for page reclaiming
On Mon 21-10-19 19:56:54, Hillf Danton wrote:
>
> Currently soft limit reclaim is frozen, see
> Documentation/admin-guide/cgroup-v2.rst for reasons.
>
> Copying the page lru idea, memcg lru is added for selecting victim
> memcg to reclaim pages from under memory pressure. It now works in
> parallel to slr not only because the latter needs some time to reap
> but the coexistence facilitates it a lot to add the lru in a straight
> forward manner.
This doesn't explain what is the problem/feature you would like to
fix/achieve. It also doesn't explain the overall design.
> A lru list paired with a spin lock is added, thanks to the current
> memcg high_work that provides other things it needs, and a couple of
> helpers to add memcg to and pick victim from lru.
>
> V1 is based on 5.4-rc3.
>
> Changes since v0
> - add MEMCG_LRU in init/Kconfig
> - drop changes in mm/vmscan.c
> - make memcg lru work in parallel to slr
>
> Cc: Chris Down <chris@...isdown.name>
> Cc: Tejun Heo <tj@...nel.org>
> Cc: Roman Gushchin <guro@...com>
> Cc: Michal Hocko <mhocko@...nel.org>
> Cc: Johannes Weiner <hannes@...xchg.org>
> Cc: Shakeel Butt <shakeelb@...gle.com>
> Cc: Matthew Wilcox <willy@...radead.org>
> Cc: Minchan Kim <minchan@...nel.org>
> Cc: Mel Gorman <mgorman@...e.de>
> Signed-off-by: Hillf Danton <hdanton@...a.com>
> ---
>
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -843,6 +843,14 @@ config MEMCG
> help
> Provides control over the memory footprint of tasks in a cgroup.
>
> +config MEMCG_LRU
> + bool
> + depends on MEMCG
> + help
> + Select victim memcg on lru for page reclaiming.
> +
> + Say N if unsure.
> +
> config MEMCG_SWAP
> bool "Swap controller"
> depends on MEMCG && SWAP
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -223,6 +223,10 @@ struct mem_cgroup {
> /* Upper bound of normal memory consumption range */
> unsigned long high;
>
> +#ifdef CONFIG_MEMCG_LRU
> + struct list_head lru_node;
> +#endif
> +
> /* Range enforcement for interrupt charges */
> struct work_struct high_work;
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign
> return 0;
> }
>
> +#ifdef CONFIG_MEMCG_LRU
> +static DEFINE_SPINLOCK(memcg_lru_lock);
> +static LIST_HEAD(memcg_lru); /* a copy of page lru */
> +
> +static void memcg_add_lru(struct mem_cgroup *memcg)
> +{
> + spin_lock_irq(&memcg_lru_lock);
> + if (list_empty(&memcg->lru_node))
> + list_add_tail(&memcg->lru_node, &memcg_lru);
> + spin_unlock_irq(&memcg_lru_lock);
> +}
> +
> +static struct mem_cgroup *memcg_pick_lru(void)
> +{
> + struct mem_cgroup *memcg, *next;
> +
> + spin_lock_irq(&memcg_lru_lock);
> +
> + list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) {
> + list_del_init(&memcg->lru_node);
> +
> + if (page_counter_read(&memcg->memory) > memcg->high) {
> + spin_unlock_irq(&memcg_lru_lock);
> + return memcg;
> + }
> + }
> + spin_unlock_irq(&memcg_lru_lock);
> +
> + return NULL;
> +}
> +#endif
> +
> static void reclaim_high(struct mem_cgroup *memcg,
> unsigned int nr_pages,
> gfp_t gfp_mask)
> {
> +#ifdef CONFIG_MEMCG_LRU
> + struct mem_cgroup *start = memcg;
> +#endif
> do {
> if (page_counter_read(&memcg->memory) <= memcg->high)
> continue;
> memcg_memory_event(memcg, MEMCG_HIGH);
> + if (IS_ENABLED(CONFIG_MEMCG_LRU))
> + if (start != memcg) {
> + memcg_add_lru(memcg);
> + return;
> + }
> try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
> } while ((memcg = parent_mem_cgroup(memcg)));
> }
> @@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl
> unsigned long excess;
> unsigned long nr_scanned;
>
> + if (IS_ENABLED(CONFIG_MEMCG_LRU)) {
> + struct mem_cgroup *memcg = memcg_pick_lru();
> + if (memcg)
> + schedule_work(&memcg->high_work);
> + return 0;
> + }
> +
> if (order > 0)
> return 0;
>
> @@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all
> if (memcg_wb_domain_init(memcg, GFP_KERNEL))
> goto fail;
>
> + if (IS_ENABLED(CONFIG_MEMCG_LRU))
> + INIT_LIST_HEAD(&memcg->lru_node);
> INIT_WORK(&memcg->high_work, high_work_func);
> memcg->last_scanned_node = MAX_NUMNODES;
> INIT_LIST_HEAD(&memcg->oom_notify);
> --
>
--
Michal Hocko
SUSE Labs
Powered by blists - more mailing lists