[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d71e6021-777b-3ca9-b08f-64fe7ff51e08@redhat.com>
Date: Fri, 8 Oct 2021 10:17:50 +0200
From: David Hildenbrand <david@...hat.com>
To: ultrachin@....com, akpm@...ux-foundation.org, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Cc: brookxu.cn@...il.com, chen xiaoguang <xiaoggchen@...cent.com>,
zeng jingxiang <linuszeng@...cent.com>,
lu yihui <yihuilu@...cent.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>
Subject: Re: [PATCH] mm: Free per cpu pages async to shorten program exit time
On 08.10.21 08:39, ultrachin@....com wrote:
> From: chen xiaoguang <xiaoggchen@...cent.com>
>
> The exit time is long when program allocated big memory and
> the most time consuming part is free memory which takes 99.9%
> of the total exit time. By using async free we can save 25% of
> exit time.
>
> Signed-off-by: chen xiaoguang <xiaoggchen@...cent.com>
> Signed-off-by: zeng jingxiang <linuszeng@...cent.com>
> Signed-off-by: lu yihui <yihuilu@...cent.com>
I recently discussed with Claudio if it would be possible to tear down
the process MM deferred, because for some use cases (secure/encrypted
virtualization, very large mmaps) tearing down the page tables is
already the much more expensive operation.
There is mmdrop_async(), and I wondered if one could reuse that concept
when tearing down a process -- I didn't look into feasibility, however,
so it's just some very rough idea.
> ---
> include/linux/mm.h | 1 +
> kernel/exit.c | 2 ++
> mm/page_alloc.c | 89 +++++++++++++++++++++++++++++++++++++++++++---
> 3 files changed, 87 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 73a52aba448f..2add3b635eee 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -908,6 +908,7 @@ void put_pages_list(struct list_head *pages);
>
> void split_page(struct page *page, unsigned int order);
> void copy_huge_page(struct page *dst, struct page *src);
> +void kfreepcp_set_run(unsigned int cpu);
>
> /*
> * Compound pages have a destructor function. Provide a
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 91a43e57a32e..269eb81acbe9 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -167,10 +167,12 @@ static void __exit_signal(struct task_struct *tsk)
> static void delayed_put_task_struct(struct rcu_head *rhp)
> {
> struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
> + unsigned int cpu = tsk->cpu;
>
> perf_event_delayed_put(tsk);
> trace_sched_process_free(tsk);
> put_task_struct(tsk);
> + kfreepcp_set_run(cpu);
> }
>
> void put_task_struct_rcu_user(struct task_struct *task)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index b37435c274cf..8a748ea9156b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -72,6 +72,7 @@
> #include <linux/padata.h>
> #include <linux/khugepaged.h>
> #include <linux/buffer_head.h>
> +#include <linux/smpboot.h>
> #include <asm/sections.h>
> #include <asm/tlbflush.h>
> #include <asm/div64.h>
> @@ -147,6 +148,12 @@ DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */
> EXPORT_PER_CPU_SYMBOL(_numa_mem_);
> #endif
>
> +struct freepcp_stat {
> + struct task_struct *thread;
> + bool should_run;
> +};
> +DEFINE_PER_CPU(struct freepcp_stat, kfreepcp);
> +
> /* work_structs for global per-cpu drains */
> struct pcpu_drain {
> struct zone *zone;
> @@ -3361,6 +3368,81 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
> return min(READ_ONCE(pcp->batch) << 2, high);
> }
>
> +void kfreepcp_set_run(unsigned int cpu)
> +{
> + struct task_struct *tsk;
> + struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
> +
> + tsk = stat->thread;
> + per_cpu(kfreepcp.should_run, cpu) = true;
> +
> + if (tsk && !task_is_running(tsk))
> + wake_up_process(tsk);
> +}
> +EXPORT_SYMBOL_GPL(kfreepcp_set_run);
> +
> +static int kfreepcp_should_run(unsigned int cpu)
> +{
> + struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
> +
> + return stat->should_run;
> +}
> +
> +static void run_kfreepcp(unsigned int cpu)
> +{
> + struct zone *zone;
> + struct per_cpu_pages *pcp;
> + unsigned long flags;
> + struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
> + bool need_free_more = false;
> +
> +
> +
> +again:
> + need_free_more = false;
> + for_each_populated_zone(zone) {
> + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
> + if (pcp->count && pcp->high && pcp->count > pcp->high) {
> + unsigned long batch = READ_ONCE(pcp->batch);
> + int high;
> +
> + high = nr_pcp_high(pcp, zone);
> + local_irq_save(flags);
> + free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch),
> + pcp);
> + local_irq_restore(flags);
> + if (pcp->count > pcp->high)
> + need_free_more = true;
> + }
> +
> + cond_resched();
> + }
> + if (need_free_more)
> + goto again;
> +
> + stat->should_run = false;
> +}
> +
> +static struct smp_hotplug_thread freepcp_threads = {
> + .store = &kfreepcp.thread,
> + .thread_should_run = kfreepcp_should_run,
> + .thread_fn = run_kfreepcp,
> + .thread_comm = "kfreepcp/%u",
> +};
> +
> +static int __init freepcp_init(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + per_cpu(kfreepcp.should_run, cpu) = false;
> +
> + BUG_ON(smpboot_register_percpu_thread(&freepcp_threads));
> +
> + return 0;
> +}
> +late_initcall(freepcp_init);
> +
> static void free_unref_page_commit(struct page *page, unsigned long pfn,
> int migratetype, unsigned int order)
> {
> @@ -3375,11 +3457,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
> list_add(&page->lru, &pcp->lists[pindex]);
> pcp->count += 1 << order;
> high = nr_pcp_high(pcp, zone);
> - if (pcp->count >= high) {
> - int batch = READ_ONCE(pcp->batch);
> -
> - free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
> - }
> + if (pcp->count >= high)
> + this_cpu_ptr(&kfreepcp)->should_run = false;
> }
>
> /*
>
--
Thanks,
David / dhildenb
Powered by blists - more mailing lists